1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQ 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQVL 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=BITALG_NOVLX 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=BITALG 12 13 14define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { 15; SSE-LABEL: ugt_1_v16i8: 16; SSE: # %bb.0: 17; SSE-NEXT: pcmpeqd %xmm2, %xmm2 18; SSE-NEXT: movdqa %xmm0, %xmm1 19; SSE-NEXT: paddb %xmm2, %xmm1 20; SSE-NEXT: pand %xmm0, %xmm1 21; SSE-NEXT: pxor %xmm0, %xmm0 22; SSE-NEXT: pcmpeqb %xmm0, %xmm1 23; SSE-NEXT: pxor %xmm2, %xmm1 24; SSE-NEXT: movdqa %xmm1, %xmm0 25; SSE-NEXT: retq 26; 27; AVX1-LABEL: ugt_1_v16i8: 28; AVX1: # %bb.0: 29; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 30; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm2 31; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 32; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 33; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 34; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 35; AVX1-NEXT: retq 36; 37; AVX2-LABEL: ugt_1_v16i8: 38; AVX2: # %bb.0: 39; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 40; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm2 41; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 42; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 43; AVX2-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 44; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 45; AVX2-NEXT: retq 46; 47; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i8: 48; AVX512VPOPCNTDQ: # %bb.0: 49; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 50; AVX512VPOPCNTDQ-NEXT: vpaddb %xmm1, %xmm0, %xmm1 51; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 52; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 53; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 54; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 55; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 56; AVX512VPOPCNTDQ-NEXT: vzeroupper 57; AVX512VPOPCNTDQ-NEXT: retq 58; 59; AVX512VPOPCNTDQVL-LABEL: ugt_1_v16i8: 60; AVX512VPOPCNTDQVL: # %bb.0: 61; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 62; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm1, %xmm0, %xmm1 63; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0 64; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 65; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 66; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 67; AVX512VPOPCNTDQVL-NEXT: retq 68; 69; BITALG_NOVLX-LABEL: ugt_1_v16i8: 70; BITALG_NOVLX: # %bb.0: 71; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 72; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 73; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 74; BITALG_NOVLX-NEXT: vzeroupper 75; BITALG_NOVLX-NEXT: retq 76; 77; BITALG-LABEL: ugt_1_v16i8: 78; BITALG: # %bb.0: 79; BITALG-NEXT: vpopcntb %xmm0, %xmm0 80; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 81; BITALG-NEXT: vpmovm2b %k0, %xmm0 82; BITALG-NEXT: retq 83 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 84 %3 = icmp ugt <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 85 %4 = sext <16 x i1> %3 to <16 x i8> 86 ret <16 x i8> %4 87} 88 89define <16 x i8> @ult_2_v16i8(<16 x i8> %0) { 90; SSE-LABEL: ult_2_v16i8: 91; SSE: # %bb.0: 92; SSE-NEXT: pcmpeqd %xmm1, %xmm1 93; SSE-NEXT: paddb %xmm0, %xmm1 94; SSE-NEXT: pand %xmm1, %xmm0 95; SSE-NEXT: pxor %xmm1, %xmm1 96; SSE-NEXT: pcmpeqb %xmm1, %xmm0 97; SSE-NEXT: retq 98; 99; AVX-LABEL: ult_2_v16i8: 100; AVX: # %bb.0: 101; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 102; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm1 103; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 104; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 105; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 106; AVX-NEXT: retq 107; 108; BITALG_NOVLX-LABEL: ult_2_v16i8: 109; BITALG_NOVLX: # %bb.0: 110; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 111; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 112; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 113; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 114; BITALG_NOVLX-NEXT: vzeroupper 115; BITALG_NOVLX-NEXT: retq 116; 117; BITALG-LABEL: ult_2_v16i8: 118; BITALG: # %bb.0: 119; BITALG-NEXT: vpopcntb %xmm0, %xmm0 120; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 121; BITALG-NEXT: vpmovm2b %k0, %xmm0 122; BITALG-NEXT: retq 123 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 124 %3 = icmp ult <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 125 %4 = sext <16 x i1> %3 to <16 x i8> 126 ret <16 x i8> %4 127} 128 129define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) { 130; SSE2-LABEL: ugt_2_v16i8: 131; SSE2: # %bb.0: 132; SSE2-NEXT: movdqa %xmm0, %xmm1 133; SSE2-NEXT: psrlw $1, %xmm1 134; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 135; SSE2-NEXT: psubb %xmm1, %xmm0 136; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 137; SSE2-NEXT: movdqa %xmm0, %xmm2 138; SSE2-NEXT: pand %xmm1, %xmm2 139; SSE2-NEXT: psrlw $2, %xmm0 140; SSE2-NEXT: pand %xmm1, %xmm0 141; SSE2-NEXT: paddb %xmm2, %xmm0 142; SSE2-NEXT: movdqa %xmm0, %xmm1 143; SSE2-NEXT: psrlw $4, %xmm1 144; SSE2-NEXT: paddb %xmm0, %xmm1 145; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 146; SSE2-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 147; SSE2-NEXT: movdqa %xmm1, %xmm0 148; SSE2-NEXT: retq 149; 150; SSE3-LABEL: ugt_2_v16i8: 151; SSE3: # %bb.0: 152; SSE3-NEXT: movdqa %xmm0, %xmm1 153; SSE3-NEXT: psrlw $1, %xmm1 154; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 155; SSE3-NEXT: psubb %xmm1, %xmm0 156; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 157; SSE3-NEXT: movdqa %xmm0, %xmm2 158; SSE3-NEXT: pand %xmm1, %xmm2 159; SSE3-NEXT: psrlw $2, %xmm0 160; SSE3-NEXT: pand %xmm1, %xmm0 161; SSE3-NEXT: paddb %xmm2, %xmm0 162; SSE3-NEXT: movdqa %xmm0, %xmm1 163; SSE3-NEXT: psrlw $4, %xmm1 164; SSE3-NEXT: paddb %xmm0, %xmm1 165; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 166; SSE3-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 167; SSE3-NEXT: movdqa %xmm1, %xmm0 168; SSE3-NEXT: retq 169; 170; SSSE3-LABEL: ugt_2_v16i8: 171; SSSE3: # %bb.0: 172; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 173; SSSE3-NEXT: movdqa %xmm0, %xmm2 174; SSSE3-NEXT: pand %xmm1, %xmm2 175; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 176; SSSE3-NEXT: movdqa %xmm3, %xmm4 177; SSSE3-NEXT: pshufb %xmm2, %xmm4 178; SSSE3-NEXT: psrlw $4, %xmm0 179; SSSE3-NEXT: pand %xmm1, %xmm0 180; SSSE3-NEXT: pshufb %xmm0, %xmm3 181; SSSE3-NEXT: paddb %xmm4, %xmm3 182; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 183; SSSE3-NEXT: pmaxub %xmm3, %xmm0 184; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 185; SSSE3-NEXT: retq 186; 187; SSE41-LABEL: ugt_2_v16i8: 188; SSE41: # %bb.0: 189; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 190; SSE41-NEXT: movdqa %xmm0, %xmm2 191; SSE41-NEXT: pand %xmm1, %xmm2 192; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 193; SSE41-NEXT: movdqa %xmm3, %xmm4 194; SSE41-NEXT: pshufb %xmm2, %xmm4 195; SSE41-NEXT: psrlw $4, %xmm0 196; SSE41-NEXT: pand %xmm1, %xmm0 197; SSE41-NEXT: pshufb %xmm0, %xmm3 198; SSE41-NEXT: paddb %xmm4, %xmm3 199; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 200; SSE41-NEXT: pmaxub %xmm3, %xmm0 201; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 202; SSE41-NEXT: retq 203; 204; AVX1-LABEL: ugt_2_v16i8: 205; AVX1: # %bb.0: 206; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 207; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 208; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 209; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 210; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 211; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 212; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 213; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 214; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 215; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 216; AVX1-NEXT: retq 217; 218; AVX2-LABEL: ugt_2_v16i8: 219; AVX2: # %bb.0: 220; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 221; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 222; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 223; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 224; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 225; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 226; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 227; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 228; AVX2-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 229; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 230; AVX2-NEXT: retq 231; 232; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i8: 233; AVX512VPOPCNTDQ: # %bb.0: 234; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 235; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 236; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 237; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 238; AVX512VPOPCNTDQ-NEXT: vzeroupper 239; AVX512VPOPCNTDQ-NEXT: retq 240; 241; AVX512VPOPCNTDQVL-LABEL: ugt_2_v16i8: 242; AVX512VPOPCNTDQVL: # %bb.0: 243; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 244; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 245; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 246; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 247; AVX512VPOPCNTDQVL-NEXT: vzeroupper 248; AVX512VPOPCNTDQVL-NEXT: retq 249; 250; BITALG_NOVLX-LABEL: ugt_2_v16i8: 251; BITALG_NOVLX: # %bb.0: 252; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 253; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 254; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 255; BITALG_NOVLX-NEXT: vzeroupper 256; BITALG_NOVLX-NEXT: retq 257; 258; BITALG-LABEL: ugt_2_v16i8: 259; BITALG: # %bb.0: 260; BITALG-NEXT: vpopcntb %xmm0, %xmm0 261; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 262; BITALG-NEXT: vpmovm2b %k0, %xmm0 263; BITALG-NEXT: retq 264 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 265 %3 = icmp ugt <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 266 %4 = sext <16 x i1> %3 to <16 x i8> 267 ret <16 x i8> %4 268} 269 270define <16 x i8> @ult_3_v16i8(<16 x i8> %0) { 271; SSE2-LABEL: ult_3_v16i8: 272; SSE2: # %bb.0: 273; SSE2-NEXT: movdqa %xmm0, %xmm1 274; SSE2-NEXT: psrlw $1, %xmm1 275; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 276; SSE2-NEXT: psubb %xmm1, %xmm0 277; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 278; SSE2-NEXT: movdqa %xmm0, %xmm2 279; SSE2-NEXT: pand %xmm1, %xmm2 280; SSE2-NEXT: psrlw $2, %xmm0 281; SSE2-NEXT: pand %xmm1, %xmm0 282; SSE2-NEXT: paddb %xmm2, %xmm0 283; SSE2-NEXT: movdqa %xmm0, %xmm1 284; SSE2-NEXT: psrlw $4, %xmm1 285; SSE2-NEXT: paddb %xmm0, %xmm1 286; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 287; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 288; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 289; SSE2-NEXT: retq 290; 291; SSE3-LABEL: ult_3_v16i8: 292; SSE3: # %bb.0: 293; SSE3-NEXT: movdqa %xmm0, %xmm1 294; SSE3-NEXT: psrlw $1, %xmm1 295; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 296; SSE3-NEXT: psubb %xmm1, %xmm0 297; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 298; SSE3-NEXT: movdqa %xmm0, %xmm2 299; SSE3-NEXT: pand %xmm1, %xmm2 300; SSE3-NEXT: psrlw $2, %xmm0 301; SSE3-NEXT: pand %xmm1, %xmm0 302; SSE3-NEXT: paddb %xmm2, %xmm0 303; SSE3-NEXT: movdqa %xmm0, %xmm1 304; SSE3-NEXT: psrlw $4, %xmm1 305; SSE3-NEXT: paddb %xmm0, %xmm1 306; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 307; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 308; SSE3-NEXT: pcmpgtb %xmm1, %xmm0 309; SSE3-NEXT: retq 310; 311; SSSE3-LABEL: ult_3_v16i8: 312; SSSE3: # %bb.0: 313; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 314; SSSE3-NEXT: movdqa %xmm0, %xmm2 315; SSSE3-NEXT: pand %xmm1, %xmm2 316; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 317; SSSE3-NEXT: movdqa %xmm3, %xmm4 318; SSSE3-NEXT: pshufb %xmm2, %xmm4 319; SSSE3-NEXT: psrlw $4, %xmm0 320; SSSE3-NEXT: pand %xmm1, %xmm0 321; SSSE3-NEXT: pshufb %xmm0, %xmm3 322; SSSE3-NEXT: paddb %xmm4, %xmm3 323; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 324; SSSE3-NEXT: pminub %xmm3, %xmm0 325; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 326; SSSE3-NEXT: retq 327; 328; SSE41-LABEL: ult_3_v16i8: 329; SSE41: # %bb.0: 330; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 331; SSE41-NEXT: movdqa %xmm0, %xmm2 332; SSE41-NEXT: pand %xmm1, %xmm2 333; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 334; SSE41-NEXT: movdqa %xmm3, %xmm4 335; SSE41-NEXT: pshufb %xmm2, %xmm4 336; SSE41-NEXT: psrlw $4, %xmm0 337; SSE41-NEXT: pand %xmm1, %xmm0 338; SSE41-NEXT: pshufb %xmm0, %xmm3 339; SSE41-NEXT: paddb %xmm4, %xmm3 340; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 341; SSE41-NEXT: pminub %xmm3, %xmm0 342; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 343; SSE41-NEXT: retq 344; 345; AVX1-LABEL: ult_3_v16i8: 346; AVX1: # %bb.0: 347; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 348; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 349; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 350; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 351; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 352; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 353; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 354; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 355; AVX1-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 356; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 357; AVX1-NEXT: retq 358; 359; AVX2-LABEL: ult_3_v16i8: 360; AVX2: # %bb.0: 361; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 362; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 363; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 364; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 365; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 366; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 367; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 368; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 369; AVX2-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 370; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 371; AVX2-NEXT: retq 372; 373; AVX512VPOPCNTDQ-LABEL: ult_3_v16i8: 374; AVX512VPOPCNTDQ: # %bb.0: 375; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 376; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 377; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 378; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 379; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 380; AVX512VPOPCNTDQ-NEXT: vzeroupper 381; AVX512VPOPCNTDQ-NEXT: retq 382; 383; AVX512VPOPCNTDQVL-LABEL: ult_3_v16i8: 384; AVX512VPOPCNTDQVL: # %bb.0: 385; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 386; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 387; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 388; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 389; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 390; AVX512VPOPCNTDQVL-NEXT: vzeroupper 391; AVX512VPOPCNTDQVL-NEXT: retq 392; 393; BITALG_NOVLX-LABEL: ult_3_v16i8: 394; BITALG_NOVLX: # %bb.0: 395; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 396; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 397; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 398; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 399; BITALG_NOVLX-NEXT: vzeroupper 400; BITALG_NOVLX-NEXT: retq 401; 402; BITALG-LABEL: ult_3_v16i8: 403; BITALG: # %bb.0: 404; BITALG-NEXT: vpopcntb %xmm0, %xmm0 405; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 406; BITALG-NEXT: vpmovm2b %k0, %xmm0 407; BITALG-NEXT: retq 408 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 409 %3 = icmp ult <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 410 %4 = sext <16 x i1> %3 to <16 x i8> 411 ret <16 x i8> %4 412} 413 414define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) { 415; SSE2-LABEL: ugt_3_v16i8: 416; SSE2: # %bb.0: 417; SSE2-NEXT: movdqa %xmm0, %xmm1 418; SSE2-NEXT: psrlw $1, %xmm1 419; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 420; SSE2-NEXT: psubb %xmm1, %xmm0 421; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 422; SSE2-NEXT: movdqa %xmm0, %xmm2 423; SSE2-NEXT: pand %xmm1, %xmm2 424; SSE2-NEXT: psrlw $2, %xmm0 425; SSE2-NEXT: pand %xmm1, %xmm0 426; SSE2-NEXT: paddb %xmm2, %xmm0 427; SSE2-NEXT: movdqa %xmm0, %xmm1 428; SSE2-NEXT: psrlw $4, %xmm1 429; SSE2-NEXT: paddb %xmm0, %xmm1 430; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 431; SSE2-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 432; SSE2-NEXT: movdqa %xmm1, %xmm0 433; SSE2-NEXT: retq 434; 435; SSE3-LABEL: ugt_3_v16i8: 436; SSE3: # %bb.0: 437; SSE3-NEXT: movdqa %xmm0, %xmm1 438; SSE3-NEXT: psrlw $1, %xmm1 439; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 440; SSE3-NEXT: psubb %xmm1, %xmm0 441; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 442; SSE3-NEXT: movdqa %xmm0, %xmm2 443; SSE3-NEXT: pand %xmm1, %xmm2 444; SSE3-NEXT: psrlw $2, %xmm0 445; SSE3-NEXT: pand %xmm1, %xmm0 446; SSE3-NEXT: paddb %xmm2, %xmm0 447; SSE3-NEXT: movdqa %xmm0, %xmm1 448; SSE3-NEXT: psrlw $4, %xmm1 449; SSE3-NEXT: paddb %xmm0, %xmm1 450; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 451; SSE3-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 452; SSE3-NEXT: movdqa %xmm1, %xmm0 453; SSE3-NEXT: retq 454; 455; SSSE3-LABEL: ugt_3_v16i8: 456; SSSE3: # %bb.0: 457; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 458; SSSE3-NEXT: movdqa %xmm0, %xmm2 459; SSSE3-NEXT: pand %xmm1, %xmm2 460; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 461; SSSE3-NEXT: movdqa %xmm3, %xmm4 462; SSSE3-NEXT: pshufb %xmm2, %xmm4 463; SSSE3-NEXT: psrlw $4, %xmm0 464; SSSE3-NEXT: pand %xmm1, %xmm0 465; SSSE3-NEXT: pshufb %xmm0, %xmm3 466; SSSE3-NEXT: paddb %xmm4, %xmm3 467; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 468; SSSE3-NEXT: pmaxub %xmm3, %xmm0 469; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 470; SSSE3-NEXT: retq 471; 472; SSE41-LABEL: ugt_3_v16i8: 473; SSE41: # %bb.0: 474; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 475; SSE41-NEXT: movdqa %xmm0, %xmm2 476; SSE41-NEXT: pand %xmm1, %xmm2 477; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 478; SSE41-NEXT: movdqa %xmm3, %xmm4 479; SSE41-NEXT: pshufb %xmm2, %xmm4 480; SSE41-NEXT: psrlw $4, %xmm0 481; SSE41-NEXT: pand %xmm1, %xmm0 482; SSE41-NEXT: pshufb %xmm0, %xmm3 483; SSE41-NEXT: paddb %xmm4, %xmm3 484; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 485; SSE41-NEXT: pmaxub %xmm3, %xmm0 486; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 487; SSE41-NEXT: retq 488; 489; AVX1-LABEL: ugt_3_v16i8: 490; AVX1: # %bb.0: 491; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 492; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 493; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 494; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 495; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 496; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 497; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 498; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 499; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 500; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 501; AVX1-NEXT: retq 502; 503; AVX2-LABEL: ugt_3_v16i8: 504; AVX2: # %bb.0: 505; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 506; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 507; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 508; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 509; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 510; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 511; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 512; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 513; AVX2-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 514; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 515; AVX2-NEXT: retq 516; 517; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i8: 518; AVX512VPOPCNTDQ: # %bb.0: 519; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 520; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 521; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 522; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 523; AVX512VPOPCNTDQ-NEXT: vzeroupper 524; AVX512VPOPCNTDQ-NEXT: retq 525; 526; AVX512VPOPCNTDQVL-LABEL: ugt_3_v16i8: 527; AVX512VPOPCNTDQVL: # %bb.0: 528; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 529; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 530; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 531; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 532; AVX512VPOPCNTDQVL-NEXT: vzeroupper 533; AVX512VPOPCNTDQVL-NEXT: retq 534; 535; BITALG_NOVLX-LABEL: ugt_3_v16i8: 536; BITALG_NOVLX: # %bb.0: 537; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 538; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 539; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 540; BITALG_NOVLX-NEXT: vzeroupper 541; BITALG_NOVLX-NEXT: retq 542; 543; BITALG-LABEL: ugt_3_v16i8: 544; BITALG: # %bb.0: 545; BITALG-NEXT: vpopcntb %xmm0, %xmm0 546; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 547; BITALG-NEXT: vpmovm2b %k0, %xmm0 548; BITALG-NEXT: retq 549 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 550 %3 = icmp ugt <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 551 %4 = sext <16 x i1> %3 to <16 x i8> 552 ret <16 x i8> %4 553} 554 555define <16 x i8> @ult_4_v16i8(<16 x i8> %0) { 556; SSE2-LABEL: ult_4_v16i8: 557; SSE2: # %bb.0: 558; SSE2-NEXT: movdqa %xmm0, %xmm1 559; SSE2-NEXT: psrlw $1, %xmm1 560; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 561; SSE2-NEXT: psubb %xmm1, %xmm0 562; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 563; SSE2-NEXT: movdqa %xmm0, %xmm2 564; SSE2-NEXT: pand %xmm1, %xmm2 565; SSE2-NEXT: psrlw $2, %xmm0 566; SSE2-NEXT: pand %xmm1, %xmm0 567; SSE2-NEXT: paddb %xmm2, %xmm0 568; SSE2-NEXT: movdqa %xmm0, %xmm1 569; SSE2-NEXT: psrlw $4, %xmm1 570; SSE2-NEXT: paddb %xmm0, %xmm1 571; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 572; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 573; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 574; SSE2-NEXT: retq 575; 576; SSE3-LABEL: ult_4_v16i8: 577; SSE3: # %bb.0: 578; SSE3-NEXT: movdqa %xmm0, %xmm1 579; SSE3-NEXT: psrlw $1, %xmm1 580; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 581; SSE3-NEXT: psubb %xmm1, %xmm0 582; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 583; SSE3-NEXT: movdqa %xmm0, %xmm2 584; SSE3-NEXT: pand %xmm1, %xmm2 585; SSE3-NEXT: psrlw $2, %xmm0 586; SSE3-NEXT: pand %xmm1, %xmm0 587; SSE3-NEXT: paddb %xmm2, %xmm0 588; SSE3-NEXT: movdqa %xmm0, %xmm1 589; SSE3-NEXT: psrlw $4, %xmm1 590; SSE3-NEXT: paddb %xmm0, %xmm1 591; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 592; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 593; SSE3-NEXT: pcmpgtb %xmm1, %xmm0 594; SSE3-NEXT: retq 595; 596; SSSE3-LABEL: ult_4_v16i8: 597; SSSE3: # %bb.0: 598; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 599; SSSE3-NEXT: movdqa %xmm0, %xmm2 600; SSSE3-NEXT: pand %xmm1, %xmm2 601; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 602; SSSE3-NEXT: movdqa %xmm3, %xmm4 603; SSSE3-NEXT: pshufb %xmm2, %xmm4 604; SSSE3-NEXT: psrlw $4, %xmm0 605; SSSE3-NEXT: pand %xmm1, %xmm0 606; SSSE3-NEXT: pshufb %xmm0, %xmm3 607; SSSE3-NEXT: paddb %xmm4, %xmm3 608; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 609; SSSE3-NEXT: pminub %xmm3, %xmm0 610; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 611; SSSE3-NEXT: retq 612; 613; SSE41-LABEL: ult_4_v16i8: 614; SSE41: # %bb.0: 615; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 616; SSE41-NEXT: movdqa %xmm0, %xmm2 617; SSE41-NEXT: pand %xmm1, %xmm2 618; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 619; SSE41-NEXT: movdqa %xmm3, %xmm4 620; SSE41-NEXT: pshufb %xmm2, %xmm4 621; SSE41-NEXT: psrlw $4, %xmm0 622; SSE41-NEXT: pand %xmm1, %xmm0 623; SSE41-NEXT: pshufb %xmm0, %xmm3 624; SSE41-NEXT: paddb %xmm4, %xmm3 625; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 626; SSE41-NEXT: pminub %xmm3, %xmm0 627; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 628; SSE41-NEXT: retq 629; 630; AVX1-LABEL: ult_4_v16i8: 631; AVX1: # %bb.0: 632; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 633; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 634; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 635; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 636; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 637; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 638; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 639; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 640; AVX1-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 641; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 642; AVX1-NEXT: retq 643; 644; AVX2-LABEL: ult_4_v16i8: 645; AVX2: # %bb.0: 646; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 647; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 648; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 649; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 650; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 651; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 652; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 653; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 654; AVX2-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 655; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 656; AVX2-NEXT: retq 657; 658; AVX512VPOPCNTDQ-LABEL: ult_4_v16i8: 659; AVX512VPOPCNTDQ: # %bb.0: 660; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 661; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 662; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 663; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 664; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 665; AVX512VPOPCNTDQ-NEXT: vzeroupper 666; AVX512VPOPCNTDQ-NEXT: retq 667; 668; AVX512VPOPCNTDQVL-LABEL: ult_4_v16i8: 669; AVX512VPOPCNTDQVL: # %bb.0: 670; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 671; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 672; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 673; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 674; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 675; AVX512VPOPCNTDQVL-NEXT: vzeroupper 676; AVX512VPOPCNTDQVL-NEXT: retq 677; 678; BITALG_NOVLX-LABEL: ult_4_v16i8: 679; BITALG_NOVLX: # %bb.0: 680; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 681; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 682; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 683; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 684; BITALG_NOVLX-NEXT: vzeroupper 685; BITALG_NOVLX-NEXT: retq 686; 687; BITALG-LABEL: ult_4_v16i8: 688; BITALG: # %bb.0: 689; BITALG-NEXT: vpopcntb %xmm0, %xmm0 690; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 691; BITALG-NEXT: vpmovm2b %k0, %xmm0 692; BITALG-NEXT: retq 693 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 694 %3 = icmp ult <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> 695 %4 = sext <16 x i1> %3 to <16 x i8> 696 ret <16 x i8> %4 697} 698 699define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) { 700; SSE2-LABEL: ugt_4_v16i8: 701; SSE2: # %bb.0: 702; SSE2-NEXT: movdqa %xmm0, %xmm1 703; SSE2-NEXT: psrlw $1, %xmm1 704; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 705; SSE2-NEXT: psubb %xmm1, %xmm0 706; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 707; SSE2-NEXT: movdqa %xmm0, %xmm2 708; SSE2-NEXT: pand %xmm1, %xmm2 709; SSE2-NEXT: psrlw $2, %xmm0 710; SSE2-NEXT: pand %xmm1, %xmm0 711; SSE2-NEXT: paddb %xmm2, %xmm0 712; SSE2-NEXT: movdqa %xmm0, %xmm1 713; SSE2-NEXT: psrlw $4, %xmm1 714; SSE2-NEXT: paddb %xmm0, %xmm1 715; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 716; SSE2-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 717; SSE2-NEXT: movdqa %xmm1, %xmm0 718; SSE2-NEXT: retq 719; 720; SSE3-LABEL: ugt_4_v16i8: 721; SSE3: # %bb.0: 722; SSE3-NEXT: movdqa %xmm0, %xmm1 723; SSE3-NEXT: psrlw $1, %xmm1 724; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 725; SSE3-NEXT: psubb %xmm1, %xmm0 726; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 727; SSE3-NEXT: movdqa %xmm0, %xmm2 728; SSE3-NEXT: pand %xmm1, %xmm2 729; SSE3-NEXT: psrlw $2, %xmm0 730; SSE3-NEXT: pand %xmm1, %xmm0 731; SSE3-NEXT: paddb %xmm2, %xmm0 732; SSE3-NEXT: movdqa %xmm0, %xmm1 733; SSE3-NEXT: psrlw $4, %xmm1 734; SSE3-NEXT: paddb %xmm0, %xmm1 735; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 736; SSE3-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 737; SSE3-NEXT: movdqa %xmm1, %xmm0 738; SSE3-NEXT: retq 739; 740; SSSE3-LABEL: ugt_4_v16i8: 741; SSSE3: # %bb.0: 742; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 743; SSSE3-NEXT: movdqa %xmm0, %xmm2 744; SSSE3-NEXT: pand %xmm1, %xmm2 745; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 746; SSSE3-NEXT: movdqa %xmm3, %xmm4 747; SSSE3-NEXT: pshufb %xmm2, %xmm4 748; SSSE3-NEXT: psrlw $4, %xmm0 749; SSSE3-NEXT: pand %xmm1, %xmm0 750; SSSE3-NEXT: pshufb %xmm0, %xmm3 751; SSSE3-NEXT: paddb %xmm4, %xmm3 752; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 753; SSSE3-NEXT: pmaxub %xmm3, %xmm0 754; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 755; SSSE3-NEXT: retq 756; 757; SSE41-LABEL: ugt_4_v16i8: 758; SSE41: # %bb.0: 759; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 760; SSE41-NEXT: movdqa %xmm0, %xmm2 761; SSE41-NEXT: pand %xmm1, %xmm2 762; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 763; SSE41-NEXT: movdqa %xmm3, %xmm4 764; SSE41-NEXT: pshufb %xmm2, %xmm4 765; SSE41-NEXT: psrlw $4, %xmm0 766; SSE41-NEXT: pand %xmm1, %xmm0 767; SSE41-NEXT: pshufb %xmm0, %xmm3 768; SSE41-NEXT: paddb %xmm4, %xmm3 769; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 770; SSE41-NEXT: pmaxub %xmm3, %xmm0 771; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 772; SSE41-NEXT: retq 773; 774; AVX1-LABEL: ugt_4_v16i8: 775; AVX1: # %bb.0: 776; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 777; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 778; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 779; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 780; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 781; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 782; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 783; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 784; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 785; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 786; AVX1-NEXT: retq 787; 788; AVX2-LABEL: ugt_4_v16i8: 789; AVX2: # %bb.0: 790; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 791; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 792; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 793; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 794; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 795; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 796; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 797; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 798; AVX2-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 799; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 800; AVX2-NEXT: retq 801; 802; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i8: 803; AVX512VPOPCNTDQ: # %bb.0: 804; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 805; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 806; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 807; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 808; AVX512VPOPCNTDQ-NEXT: vzeroupper 809; AVX512VPOPCNTDQ-NEXT: retq 810; 811; AVX512VPOPCNTDQVL-LABEL: ugt_4_v16i8: 812; AVX512VPOPCNTDQVL: # %bb.0: 813; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 814; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 815; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 816; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 817; AVX512VPOPCNTDQVL-NEXT: vzeroupper 818; AVX512VPOPCNTDQVL-NEXT: retq 819; 820; BITALG_NOVLX-LABEL: ugt_4_v16i8: 821; BITALG_NOVLX: # %bb.0: 822; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 823; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 824; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 825; BITALG_NOVLX-NEXT: vzeroupper 826; BITALG_NOVLX-NEXT: retq 827; 828; BITALG-LABEL: ugt_4_v16i8: 829; BITALG: # %bb.0: 830; BITALG-NEXT: vpopcntb %xmm0, %xmm0 831; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 832; BITALG-NEXT: vpmovm2b %k0, %xmm0 833; BITALG-NEXT: retq 834 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 835 %3 = icmp ugt <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> 836 %4 = sext <16 x i1> %3 to <16 x i8> 837 ret <16 x i8> %4 838} 839 840define <16 x i8> @ult_5_v16i8(<16 x i8> %0) { 841; SSE2-LABEL: ult_5_v16i8: 842; SSE2: # %bb.0: 843; SSE2-NEXT: movdqa %xmm0, %xmm1 844; SSE2-NEXT: psrlw $1, %xmm1 845; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 846; SSE2-NEXT: psubb %xmm1, %xmm0 847; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 848; SSE2-NEXT: movdqa %xmm0, %xmm2 849; SSE2-NEXT: pand %xmm1, %xmm2 850; SSE2-NEXT: psrlw $2, %xmm0 851; SSE2-NEXT: pand %xmm1, %xmm0 852; SSE2-NEXT: paddb %xmm2, %xmm0 853; SSE2-NEXT: movdqa %xmm0, %xmm1 854; SSE2-NEXT: psrlw $4, %xmm1 855; SSE2-NEXT: paddb %xmm0, %xmm1 856; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 857; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 858; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 859; SSE2-NEXT: retq 860; 861; SSE3-LABEL: ult_5_v16i8: 862; SSE3: # %bb.0: 863; SSE3-NEXT: movdqa %xmm0, %xmm1 864; SSE3-NEXT: psrlw $1, %xmm1 865; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 866; SSE3-NEXT: psubb %xmm1, %xmm0 867; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 868; SSE3-NEXT: movdqa %xmm0, %xmm2 869; SSE3-NEXT: pand %xmm1, %xmm2 870; SSE3-NEXT: psrlw $2, %xmm0 871; SSE3-NEXT: pand %xmm1, %xmm0 872; SSE3-NEXT: paddb %xmm2, %xmm0 873; SSE3-NEXT: movdqa %xmm0, %xmm1 874; SSE3-NEXT: psrlw $4, %xmm1 875; SSE3-NEXT: paddb %xmm0, %xmm1 876; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 877; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 878; SSE3-NEXT: pcmpgtb %xmm1, %xmm0 879; SSE3-NEXT: retq 880; 881; SSSE3-LABEL: ult_5_v16i8: 882; SSSE3: # %bb.0: 883; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 884; SSSE3-NEXT: movdqa %xmm0, %xmm2 885; SSSE3-NEXT: pand %xmm1, %xmm2 886; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 887; SSSE3-NEXT: movdqa %xmm3, %xmm4 888; SSSE3-NEXT: pshufb %xmm2, %xmm4 889; SSSE3-NEXT: psrlw $4, %xmm0 890; SSSE3-NEXT: pand %xmm1, %xmm0 891; SSSE3-NEXT: pshufb %xmm0, %xmm3 892; SSSE3-NEXT: paddb %xmm4, %xmm3 893; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 894; SSSE3-NEXT: pminub %xmm3, %xmm0 895; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 896; SSSE3-NEXT: retq 897; 898; SSE41-LABEL: ult_5_v16i8: 899; SSE41: # %bb.0: 900; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 901; SSE41-NEXT: movdqa %xmm0, %xmm2 902; SSE41-NEXT: pand %xmm1, %xmm2 903; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 904; SSE41-NEXT: movdqa %xmm3, %xmm4 905; SSE41-NEXT: pshufb %xmm2, %xmm4 906; SSE41-NEXT: psrlw $4, %xmm0 907; SSE41-NEXT: pand %xmm1, %xmm0 908; SSE41-NEXT: pshufb %xmm0, %xmm3 909; SSE41-NEXT: paddb %xmm4, %xmm3 910; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 911; SSE41-NEXT: pminub %xmm3, %xmm0 912; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 913; SSE41-NEXT: retq 914; 915; AVX1-LABEL: ult_5_v16i8: 916; AVX1: # %bb.0: 917; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 918; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 919; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 920; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 921; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 922; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 923; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 924; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 925; AVX1-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 926; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 927; AVX1-NEXT: retq 928; 929; AVX2-LABEL: ult_5_v16i8: 930; AVX2: # %bb.0: 931; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 932; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 933; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 934; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 935; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 936; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 937; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 938; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 939; AVX2-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 940; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 941; AVX2-NEXT: retq 942; 943; AVX512VPOPCNTDQ-LABEL: ult_5_v16i8: 944; AVX512VPOPCNTDQ: # %bb.0: 945; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 946; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 947; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 948; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 949; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 950; AVX512VPOPCNTDQ-NEXT: vzeroupper 951; AVX512VPOPCNTDQ-NEXT: retq 952; 953; AVX512VPOPCNTDQVL-LABEL: ult_5_v16i8: 954; AVX512VPOPCNTDQVL: # %bb.0: 955; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 956; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 957; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 958; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 959; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 960; AVX512VPOPCNTDQVL-NEXT: vzeroupper 961; AVX512VPOPCNTDQVL-NEXT: retq 962; 963; BITALG_NOVLX-LABEL: ult_5_v16i8: 964; BITALG_NOVLX: # %bb.0: 965; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 966; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 967; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 968; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 969; BITALG_NOVLX-NEXT: vzeroupper 970; BITALG_NOVLX-NEXT: retq 971; 972; BITALG-LABEL: ult_5_v16i8: 973; BITALG: # %bb.0: 974; BITALG-NEXT: vpopcntb %xmm0, %xmm0 975; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 976; BITALG-NEXT: vpmovm2b %k0, %xmm0 977; BITALG-NEXT: retq 978 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 979 %3 = icmp ult <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> 980 %4 = sext <16 x i1> %3 to <16 x i8> 981 ret <16 x i8> %4 982} 983 984define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) { 985; SSE2-LABEL: ugt_5_v16i8: 986; SSE2: # %bb.0: 987; SSE2-NEXT: movdqa %xmm0, %xmm1 988; SSE2-NEXT: psrlw $1, %xmm1 989; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 990; SSE2-NEXT: psubb %xmm1, %xmm0 991; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 992; SSE2-NEXT: movdqa %xmm0, %xmm2 993; SSE2-NEXT: pand %xmm1, %xmm2 994; SSE2-NEXT: psrlw $2, %xmm0 995; SSE2-NEXT: pand %xmm1, %xmm0 996; SSE2-NEXT: paddb %xmm2, %xmm0 997; SSE2-NEXT: movdqa %xmm0, %xmm1 998; SSE2-NEXT: psrlw $4, %xmm1 999; SSE2-NEXT: paddb %xmm0, %xmm1 1000; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1001; SSE2-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 1002; SSE2-NEXT: movdqa %xmm1, %xmm0 1003; SSE2-NEXT: retq 1004; 1005; SSE3-LABEL: ugt_5_v16i8: 1006; SSE3: # %bb.0: 1007; SSE3-NEXT: movdqa %xmm0, %xmm1 1008; SSE3-NEXT: psrlw $1, %xmm1 1009; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1010; SSE3-NEXT: psubb %xmm1, %xmm0 1011; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1012; SSE3-NEXT: movdqa %xmm0, %xmm2 1013; SSE3-NEXT: pand %xmm1, %xmm2 1014; SSE3-NEXT: psrlw $2, %xmm0 1015; SSE3-NEXT: pand %xmm1, %xmm0 1016; SSE3-NEXT: paddb %xmm2, %xmm0 1017; SSE3-NEXT: movdqa %xmm0, %xmm1 1018; SSE3-NEXT: psrlw $4, %xmm1 1019; SSE3-NEXT: paddb %xmm0, %xmm1 1020; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1021; SSE3-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 1022; SSE3-NEXT: movdqa %xmm1, %xmm0 1023; SSE3-NEXT: retq 1024; 1025; SSSE3-LABEL: ugt_5_v16i8: 1026; SSSE3: # %bb.0: 1027; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1028; SSSE3-NEXT: movdqa %xmm0, %xmm2 1029; SSSE3-NEXT: pand %xmm1, %xmm2 1030; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1031; SSSE3-NEXT: movdqa %xmm3, %xmm4 1032; SSSE3-NEXT: pshufb %xmm2, %xmm4 1033; SSSE3-NEXT: psrlw $4, %xmm0 1034; SSSE3-NEXT: pand %xmm1, %xmm0 1035; SSSE3-NEXT: pshufb %xmm0, %xmm3 1036; SSSE3-NEXT: paddb %xmm4, %xmm3 1037; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1038; SSSE3-NEXT: pmaxub %xmm3, %xmm0 1039; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 1040; SSSE3-NEXT: retq 1041; 1042; SSE41-LABEL: ugt_5_v16i8: 1043; SSE41: # %bb.0: 1044; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1045; SSE41-NEXT: movdqa %xmm0, %xmm2 1046; SSE41-NEXT: pand %xmm1, %xmm2 1047; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1048; SSE41-NEXT: movdqa %xmm3, %xmm4 1049; SSE41-NEXT: pshufb %xmm2, %xmm4 1050; SSE41-NEXT: psrlw $4, %xmm0 1051; SSE41-NEXT: pand %xmm1, %xmm0 1052; SSE41-NEXT: pshufb %xmm0, %xmm3 1053; SSE41-NEXT: paddb %xmm4, %xmm3 1054; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1055; SSE41-NEXT: pmaxub %xmm3, %xmm0 1056; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 1057; SSE41-NEXT: retq 1058; 1059; AVX1-LABEL: ugt_5_v16i8: 1060; AVX1: # %bb.0: 1061; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1062; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 1063; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1064; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1065; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 1066; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1067; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1068; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1069; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 1070; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1071; AVX1-NEXT: retq 1072; 1073; AVX2-LABEL: ugt_5_v16i8: 1074; AVX2: # %bb.0: 1075; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1076; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 1077; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1078; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1079; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 1080; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1081; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1082; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1083; AVX2-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 1084; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1085; AVX2-NEXT: retq 1086; 1087; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i8: 1088; AVX512VPOPCNTDQ: # %bb.0: 1089; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1090; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 1091; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 1092; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 1093; AVX512VPOPCNTDQ-NEXT: vzeroupper 1094; AVX512VPOPCNTDQ-NEXT: retq 1095; 1096; AVX512VPOPCNTDQVL-LABEL: ugt_5_v16i8: 1097; AVX512VPOPCNTDQVL: # %bb.0: 1098; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1099; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 1100; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 1101; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 1102; AVX512VPOPCNTDQVL-NEXT: vzeroupper 1103; AVX512VPOPCNTDQVL-NEXT: retq 1104; 1105; BITALG_NOVLX-LABEL: ugt_5_v16i8: 1106; BITALG_NOVLX: # %bb.0: 1107; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1108; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 1109; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 1110; BITALG_NOVLX-NEXT: vzeroupper 1111; BITALG_NOVLX-NEXT: retq 1112; 1113; BITALG-LABEL: ugt_5_v16i8: 1114; BITALG: # %bb.0: 1115; BITALG-NEXT: vpopcntb %xmm0, %xmm0 1116; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 1117; BITALG-NEXT: vpmovm2b %k0, %xmm0 1118; BITALG-NEXT: retq 1119 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 1120 %3 = icmp ugt <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> 1121 %4 = sext <16 x i1> %3 to <16 x i8> 1122 ret <16 x i8> %4 1123} 1124 1125define <16 x i8> @ult_6_v16i8(<16 x i8> %0) { 1126; SSE2-LABEL: ult_6_v16i8: 1127; SSE2: # %bb.0: 1128; SSE2-NEXT: movdqa %xmm0, %xmm1 1129; SSE2-NEXT: psrlw $1, %xmm1 1130; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1131; SSE2-NEXT: psubb %xmm1, %xmm0 1132; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1133; SSE2-NEXT: movdqa %xmm0, %xmm2 1134; SSE2-NEXT: pand %xmm1, %xmm2 1135; SSE2-NEXT: psrlw $2, %xmm0 1136; SSE2-NEXT: pand %xmm1, %xmm0 1137; SSE2-NEXT: paddb %xmm2, %xmm0 1138; SSE2-NEXT: movdqa %xmm0, %xmm1 1139; SSE2-NEXT: psrlw $4, %xmm1 1140; SSE2-NEXT: paddb %xmm0, %xmm1 1141; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1142; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1143; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1144; SSE2-NEXT: retq 1145; 1146; SSE3-LABEL: ult_6_v16i8: 1147; SSE3: # %bb.0: 1148; SSE3-NEXT: movdqa %xmm0, %xmm1 1149; SSE3-NEXT: psrlw $1, %xmm1 1150; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1151; SSE3-NEXT: psubb %xmm1, %xmm0 1152; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1153; SSE3-NEXT: movdqa %xmm0, %xmm2 1154; SSE3-NEXT: pand %xmm1, %xmm2 1155; SSE3-NEXT: psrlw $2, %xmm0 1156; SSE3-NEXT: pand %xmm1, %xmm0 1157; SSE3-NEXT: paddb %xmm2, %xmm0 1158; SSE3-NEXT: movdqa %xmm0, %xmm1 1159; SSE3-NEXT: psrlw $4, %xmm1 1160; SSE3-NEXT: paddb %xmm0, %xmm1 1161; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1162; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1163; SSE3-NEXT: pcmpgtb %xmm1, %xmm0 1164; SSE3-NEXT: retq 1165; 1166; SSSE3-LABEL: ult_6_v16i8: 1167; SSSE3: # %bb.0: 1168; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1169; SSSE3-NEXT: movdqa %xmm0, %xmm2 1170; SSSE3-NEXT: pand %xmm1, %xmm2 1171; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1172; SSSE3-NEXT: movdqa %xmm3, %xmm4 1173; SSSE3-NEXT: pshufb %xmm2, %xmm4 1174; SSSE3-NEXT: psrlw $4, %xmm0 1175; SSSE3-NEXT: pand %xmm1, %xmm0 1176; SSSE3-NEXT: pshufb %xmm0, %xmm3 1177; SSSE3-NEXT: paddb %xmm4, %xmm3 1178; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 1179; SSSE3-NEXT: pminub %xmm3, %xmm0 1180; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 1181; SSSE3-NEXT: retq 1182; 1183; SSE41-LABEL: ult_6_v16i8: 1184; SSE41: # %bb.0: 1185; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1186; SSE41-NEXT: movdqa %xmm0, %xmm2 1187; SSE41-NEXT: pand %xmm1, %xmm2 1188; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1189; SSE41-NEXT: movdqa %xmm3, %xmm4 1190; SSE41-NEXT: pshufb %xmm2, %xmm4 1191; SSE41-NEXT: psrlw $4, %xmm0 1192; SSE41-NEXT: pand %xmm1, %xmm0 1193; SSE41-NEXT: pshufb %xmm0, %xmm3 1194; SSE41-NEXT: paddb %xmm4, %xmm3 1195; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 1196; SSE41-NEXT: pminub %xmm3, %xmm0 1197; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 1198; SSE41-NEXT: retq 1199; 1200; AVX1-LABEL: ult_6_v16i8: 1201; AVX1: # %bb.0: 1202; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1203; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 1204; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1205; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1206; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 1207; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1208; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1209; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1210; AVX1-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 1211; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1212; AVX1-NEXT: retq 1213; 1214; AVX2-LABEL: ult_6_v16i8: 1215; AVX2: # %bb.0: 1216; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1217; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 1218; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1219; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1220; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 1221; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1222; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1223; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1224; AVX2-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 1225; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1226; AVX2-NEXT: retq 1227; 1228; AVX512VPOPCNTDQ-LABEL: ult_6_v16i8: 1229; AVX512VPOPCNTDQ: # %bb.0: 1230; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1231; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 1232; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 1233; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1234; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1235; AVX512VPOPCNTDQ-NEXT: vzeroupper 1236; AVX512VPOPCNTDQ-NEXT: retq 1237; 1238; AVX512VPOPCNTDQVL-LABEL: ult_6_v16i8: 1239; AVX512VPOPCNTDQVL: # %bb.0: 1240; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1241; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 1242; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 1243; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1244; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1245; AVX512VPOPCNTDQVL-NEXT: vzeroupper 1246; AVX512VPOPCNTDQVL-NEXT: retq 1247; 1248; BITALG_NOVLX-LABEL: ult_6_v16i8: 1249; BITALG_NOVLX: # %bb.0: 1250; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1251; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 1252; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1253; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1254; BITALG_NOVLX-NEXT: vzeroupper 1255; BITALG_NOVLX-NEXT: retq 1256; 1257; BITALG-LABEL: ult_6_v16i8: 1258; BITALG: # %bb.0: 1259; BITALG-NEXT: vpopcntb %xmm0, %xmm0 1260; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 1261; BITALG-NEXT: vpmovm2b %k0, %xmm0 1262; BITALG-NEXT: retq 1263 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 1264 %3 = icmp ult <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6> 1265 %4 = sext <16 x i1> %3 to <16 x i8> 1266 ret <16 x i8> %4 1267} 1268 1269define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) { 1270; SSE2-LABEL: ugt_6_v16i8: 1271; SSE2: # %bb.0: 1272; SSE2-NEXT: movdqa %xmm0, %xmm1 1273; SSE2-NEXT: psrlw $1, %xmm1 1274; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1275; SSE2-NEXT: psubb %xmm1, %xmm0 1276; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1277; SSE2-NEXT: movdqa %xmm0, %xmm2 1278; SSE2-NEXT: pand %xmm1, %xmm2 1279; SSE2-NEXT: psrlw $2, %xmm0 1280; SSE2-NEXT: pand %xmm1, %xmm0 1281; SSE2-NEXT: paddb %xmm2, %xmm0 1282; SSE2-NEXT: movdqa %xmm0, %xmm1 1283; SSE2-NEXT: psrlw $4, %xmm1 1284; SSE2-NEXT: paddb %xmm0, %xmm1 1285; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1286; SSE2-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 1287; SSE2-NEXT: movdqa %xmm1, %xmm0 1288; SSE2-NEXT: retq 1289; 1290; SSE3-LABEL: ugt_6_v16i8: 1291; SSE3: # %bb.0: 1292; SSE3-NEXT: movdqa %xmm0, %xmm1 1293; SSE3-NEXT: psrlw $1, %xmm1 1294; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1295; SSE3-NEXT: psubb %xmm1, %xmm0 1296; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1297; SSE3-NEXT: movdqa %xmm0, %xmm2 1298; SSE3-NEXT: pand %xmm1, %xmm2 1299; SSE3-NEXT: psrlw $2, %xmm0 1300; SSE3-NEXT: pand %xmm1, %xmm0 1301; SSE3-NEXT: paddb %xmm2, %xmm0 1302; SSE3-NEXT: movdqa %xmm0, %xmm1 1303; SSE3-NEXT: psrlw $4, %xmm1 1304; SSE3-NEXT: paddb %xmm0, %xmm1 1305; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1306; SSE3-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 1307; SSE3-NEXT: movdqa %xmm1, %xmm0 1308; SSE3-NEXT: retq 1309; 1310; SSSE3-LABEL: ugt_6_v16i8: 1311; SSSE3: # %bb.0: 1312; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1313; SSSE3-NEXT: movdqa %xmm0, %xmm2 1314; SSSE3-NEXT: pand %xmm1, %xmm2 1315; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1316; SSSE3-NEXT: movdqa %xmm3, %xmm4 1317; SSSE3-NEXT: pshufb %xmm2, %xmm4 1318; SSSE3-NEXT: psrlw $4, %xmm0 1319; SSSE3-NEXT: pand %xmm1, %xmm0 1320; SSSE3-NEXT: pshufb %xmm0, %xmm3 1321; SSSE3-NEXT: paddb %xmm4, %xmm3 1322; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1323; SSSE3-NEXT: pmaxub %xmm3, %xmm0 1324; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 1325; SSSE3-NEXT: retq 1326; 1327; SSE41-LABEL: ugt_6_v16i8: 1328; SSE41: # %bb.0: 1329; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1330; SSE41-NEXT: movdqa %xmm0, %xmm2 1331; SSE41-NEXT: pand %xmm1, %xmm2 1332; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1333; SSE41-NEXT: movdqa %xmm3, %xmm4 1334; SSE41-NEXT: pshufb %xmm2, %xmm4 1335; SSE41-NEXT: psrlw $4, %xmm0 1336; SSE41-NEXT: pand %xmm1, %xmm0 1337; SSE41-NEXT: pshufb %xmm0, %xmm3 1338; SSE41-NEXT: paddb %xmm4, %xmm3 1339; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1340; SSE41-NEXT: pmaxub %xmm3, %xmm0 1341; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 1342; SSE41-NEXT: retq 1343; 1344; AVX1-LABEL: ugt_6_v16i8: 1345; AVX1: # %bb.0: 1346; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1347; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 1348; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1349; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1350; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 1351; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1352; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1353; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1354; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 1355; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1356; AVX1-NEXT: retq 1357; 1358; AVX2-LABEL: ugt_6_v16i8: 1359; AVX2: # %bb.0: 1360; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1361; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 1362; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1363; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1364; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 1365; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1366; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1367; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1368; AVX2-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 1369; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1370; AVX2-NEXT: retq 1371; 1372; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i8: 1373; AVX512VPOPCNTDQ: # %bb.0: 1374; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1375; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 1376; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 1377; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 1378; AVX512VPOPCNTDQ-NEXT: vzeroupper 1379; AVX512VPOPCNTDQ-NEXT: retq 1380; 1381; AVX512VPOPCNTDQVL-LABEL: ugt_6_v16i8: 1382; AVX512VPOPCNTDQVL: # %bb.0: 1383; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1384; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 1385; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 1386; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 1387; AVX512VPOPCNTDQVL-NEXT: vzeroupper 1388; AVX512VPOPCNTDQVL-NEXT: retq 1389; 1390; BITALG_NOVLX-LABEL: ugt_6_v16i8: 1391; BITALG_NOVLX: # %bb.0: 1392; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1393; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 1394; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 1395; BITALG_NOVLX-NEXT: vzeroupper 1396; BITALG_NOVLX-NEXT: retq 1397; 1398; BITALG-LABEL: ugt_6_v16i8: 1399; BITALG: # %bb.0: 1400; BITALG-NEXT: vpopcntb %xmm0, %xmm0 1401; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 1402; BITALG-NEXT: vpmovm2b %k0, %xmm0 1403; BITALG-NEXT: retq 1404 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 1405 %3 = icmp ugt <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6> 1406 %4 = sext <16 x i1> %3 to <16 x i8> 1407 ret <16 x i8> %4 1408} 1409 1410define <16 x i8> @ult_7_v16i8(<16 x i8> %0) { 1411; SSE2-LABEL: ult_7_v16i8: 1412; SSE2: # %bb.0: 1413; SSE2-NEXT: movdqa %xmm0, %xmm1 1414; SSE2-NEXT: psrlw $1, %xmm1 1415; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1416; SSE2-NEXT: psubb %xmm1, %xmm0 1417; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1418; SSE2-NEXT: movdqa %xmm0, %xmm2 1419; SSE2-NEXT: pand %xmm1, %xmm2 1420; SSE2-NEXT: psrlw $2, %xmm0 1421; SSE2-NEXT: pand %xmm1, %xmm0 1422; SSE2-NEXT: paddb %xmm2, %xmm0 1423; SSE2-NEXT: movdqa %xmm0, %xmm1 1424; SSE2-NEXT: psrlw $4, %xmm1 1425; SSE2-NEXT: paddb %xmm0, %xmm1 1426; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1427; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1428; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1429; SSE2-NEXT: retq 1430; 1431; SSE3-LABEL: ult_7_v16i8: 1432; SSE3: # %bb.0: 1433; SSE3-NEXT: movdqa %xmm0, %xmm1 1434; SSE3-NEXT: psrlw $1, %xmm1 1435; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1436; SSE3-NEXT: psubb %xmm1, %xmm0 1437; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1438; SSE3-NEXT: movdqa %xmm0, %xmm2 1439; SSE3-NEXT: pand %xmm1, %xmm2 1440; SSE3-NEXT: psrlw $2, %xmm0 1441; SSE3-NEXT: pand %xmm1, %xmm0 1442; SSE3-NEXT: paddb %xmm2, %xmm0 1443; SSE3-NEXT: movdqa %xmm0, %xmm1 1444; SSE3-NEXT: psrlw $4, %xmm1 1445; SSE3-NEXT: paddb %xmm0, %xmm1 1446; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1447; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1448; SSE3-NEXT: pcmpgtb %xmm1, %xmm0 1449; SSE3-NEXT: retq 1450; 1451; SSSE3-LABEL: ult_7_v16i8: 1452; SSSE3: # %bb.0: 1453; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1454; SSSE3-NEXT: movdqa %xmm0, %xmm2 1455; SSSE3-NEXT: pand %xmm1, %xmm2 1456; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1457; SSSE3-NEXT: movdqa %xmm3, %xmm4 1458; SSSE3-NEXT: pshufb %xmm2, %xmm4 1459; SSSE3-NEXT: psrlw $4, %xmm0 1460; SSSE3-NEXT: pand %xmm1, %xmm0 1461; SSSE3-NEXT: pshufb %xmm0, %xmm3 1462; SSSE3-NEXT: paddb %xmm4, %xmm3 1463; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1464; SSSE3-NEXT: pminub %xmm3, %xmm0 1465; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 1466; SSSE3-NEXT: retq 1467; 1468; SSE41-LABEL: ult_7_v16i8: 1469; SSE41: # %bb.0: 1470; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1471; SSE41-NEXT: movdqa %xmm0, %xmm2 1472; SSE41-NEXT: pand %xmm1, %xmm2 1473; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1474; SSE41-NEXT: movdqa %xmm3, %xmm4 1475; SSE41-NEXT: pshufb %xmm2, %xmm4 1476; SSE41-NEXT: psrlw $4, %xmm0 1477; SSE41-NEXT: pand %xmm1, %xmm0 1478; SSE41-NEXT: pshufb %xmm0, %xmm3 1479; SSE41-NEXT: paddb %xmm4, %xmm3 1480; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 1481; SSE41-NEXT: pminub %xmm3, %xmm0 1482; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 1483; SSE41-NEXT: retq 1484; 1485; AVX1-LABEL: ult_7_v16i8: 1486; AVX1: # %bb.0: 1487; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1488; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 1489; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1490; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1491; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 1492; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1493; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1494; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1495; AVX1-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 1496; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1497; AVX1-NEXT: retq 1498; 1499; AVX2-LABEL: ult_7_v16i8: 1500; AVX2: # %bb.0: 1501; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1502; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 1503; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1504; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1505; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 1506; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1507; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1508; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1509; AVX2-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 1510; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1511; AVX2-NEXT: retq 1512; 1513; AVX512VPOPCNTDQ-LABEL: ult_7_v16i8: 1514; AVX512VPOPCNTDQ: # %bb.0: 1515; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1516; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 1517; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 1518; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1519; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1520; AVX512VPOPCNTDQ-NEXT: vzeroupper 1521; AVX512VPOPCNTDQ-NEXT: retq 1522; 1523; AVX512VPOPCNTDQVL-LABEL: ult_7_v16i8: 1524; AVX512VPOPCNTDQVL: # %bb.0: 1525; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1526; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 1527; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 1528; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1529; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1530; AVX512VPOPCNTDQVL-NEXT: vzeroupper 1531; AVX512VPOPCNTDQVL-NEXT: retq 1532; 1533; BITALG_NOVLX-LABEL: ult_7_v16i8: 1534; BITALG_NOVLX: # %bb.0: 1535; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1536; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 1537; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1538; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1539; BITALG_NOVLX-NEXT: vzeroupper 1540; BITALG_NOVLX-NEXT: retq 1541; 1542; BITALG-LABEL: ult_7_v16i8: 1543; BITALG: # %bb.0: 1544; BITALG-NEXT: vpopcntb %xmm0, %xmm0 1545; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 1546; BITALG-NEXT: vpmovm2b %k0, %xmm0 1547; BITALG-NEXT: retq 1548 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 1549 %3 = icmp ult <16 x i8> %2, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 1550 %4 = sext <16 x i1> %3 to <16 x i8> 1551 ret <16 x i8> %4 1552} 1553 1554define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { 1555; SSE-LABEL: ugt_1_v8i16: 1556; SSE: # %bb.0: 1557; SSE-NEXT: pcmpeqd %xmm2, %xmm2 1558; SSE-NEXT: movdqa %xmm0, %xmm1 1559; SSE-NEXT: paddw %xmm2, %xmm1 1560; SSE-NEXT: pand %xmm0, %xmm1 1561; SSE-NEXT: pxor %xmm0, %xmm0 1562; SSE-NEXT: pcmpeqw %xmm0, %xmm1 1563; SSE-NEXT: pxor %xmm2, %xmm1 1564; SSE-NEXT: movdqa %xmm1, %xmm0 1565; SSE-NEXT: retq 1566; 1567; AVX1-LABEL: ugt_1_v8i16: 1568; AVX1: # %bb.0: 1569; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1570; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2 1571; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1572; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1573; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 1574; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1575; AVX1-NEXT: retq 1576; 1577; AVX2-LABEL: ugt_1_v8i16: 1578; AVX2: # %bb.0: 1579; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1580; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm2 1581; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 1582; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1583; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 1584; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1585; AVX2-NEXT: retq 1586; 1587; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i16: 1588; AVX512VPOPCNTDQ: # %bb.0: 1589; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1590; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm1, %xmm0, %xmm1 1591; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 1592; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1593; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1594; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1595; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1596; AVX512VPOPCNTDQ-NEXT: vzeroupper 1597; AVX512VPOPCNTDQ-NEXT: retq 1598; 1599; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i16: 1600; AVX512VPOPCNTDQVL: # %bb.0: 1601; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1602; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm1 1603; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0 1604; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1605; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1606; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1607; AVX512VPOPCNTDQVL-NEXT: retq 1608; 1609; BITALG_NOVLX-LABEL: ugt_1_v8i16: 1610; BITALG_NOVLX: # %bb.0: 1611; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1612; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 1613; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 1614; BITALG_NOVLX-NEXT: vzeroupper 1615; BITALG_NOVLX-NEXT: retq 1616; 1617; BITALG-LABEL: ugt_1_v8i16: 1618; BITALG: # %bb.0: 1619; BITALG-NEXT: vpopcntw %xmm0, %xmm0 1620; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 1621; BITALG-NEXT: vpmovm2w %k0, %xmm0 1622; BITALG-NEXT: retq 1623 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 1624 %3 = icmp ugt <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1625 %4 = sext <8 x i1> %3 to <8 x i16> 1626 ret <8 x i16> %4 1627} 1628 1629define <8 x i16> @ult_2_v8i16(<8 x i16> %0) { 1630; SSE-LABEL: ult_2_v8i16: 1631; SSE: # %bb.0: 1632; SSE-NEXT: pcmpeqd %xmm1, %xmm1 1633; SSE-NEXT: paddw %xmm0, %xmm1 1634; SSE-NEXT: pand %xmm1, %xmm0 1635; SSE-NEXT: pxor %xmm1, %xmm1 1636; SSE-NEXT: pcmpeqw %xmm1, %xmm0 1637; SSE-NEXT: retq 1638; 1639; AVX-LABEL: ult_2_v8i16: 1640; AVX: # %bb.0: 1641; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1642; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm1 1643; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 1644; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1645; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1646; AVX-NEXT: retq 1647; 1648; BITALG_NOVLX-LABEL: ult_2_v8i16: 1649; BITALG_NOVLX: # %bb.0: 1650; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1651; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 1652; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] 1653; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1654; BITALG_NOVLX-NEXT: vzeroupper 1655; BITALG_NOVLX-NEXT: retq 1656; 1657; BITALG-LABEL: ult_2_v8i16: 1658; BITALG: # %bb.0: 1659; BITALG-NEXT: vpopcntw %xmm0, %xmm0 1660; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 1661; BITALG-NEXT: vpmovm2w %k0, %xmm0 1662; BITALG-NEXT: retq 1663 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 1664 %3 = icmp ult <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 1665 %4 = sext <8 x i1> %3 to <8 x i16> 1666 ret <8 x i16> %4 1667} 1668 1669define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { 1670; SSE2-LABEL: ugt_2_v8i16: 1671; SSE2: # %bb.0: 1672; SSE2-NEXT: movdqa %xmm0, %xmm1 1673; SSE2-NEXT: psrlw $1, %xmm1 1674; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1675; SSE2-NEXT: psubb %xmm1, %xmm0 1676; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1677; SSE2-NEXT: movdqa %xmm0, %xmm2 1678; SSE2-NEXT: pand %xmm1, %xmm2 1679; SSE2-NEXT: psrlw $2, %xmm0 1680; SSE2-NEXT: pand %xmm1, %xmm0 1681; SSE2-NEXT: paddb %xmm2, %xmm0 1682; SSE2-NEXT: movdqa %xmm0, %xmm1 1683; SSE2-NEXT: psrlw $4, %xmm1 1684; SSE2-NEXT: paddb %xmm0, %xmm1 1685; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1686; SSE2-NEXT: movdqa %xmm1, %xmm0 1687; SSE2-NEXT: psllw $8, %xmm0 1688; SSE2-NEXT: paddb %xmm1, %xmm0 1689; SSE2-NEXT: psrlw $8, %xmm0 1690; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 1691; SSE2-NEXT: retq 1692; 1693; SSE3-LABEL: ugt_2_v8i16: 1694; SSE3: # %bb.0: 1695; SSE3-NEXT: movdqa %xmm0, %xmm1 1696; SSE3-NEXT: psrlw $1, %xmm1 1697; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1698; SSE3-NEXT: psubb %xmm1, %xmm0 1699; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1700; SSE3-NEXT: movdqa %xmm0, %xmm2 1701; SSE3-NEXT: pand %xmm1, %xmm2 1702; SSE3-NEXT: psrlw $2, %xmm0 1703; SSE3-NEXT: pand %xmm1, %xmm0 1704; SSE3-NEXT: paddb %xmm2, %xmm0 1705; SSE3-NEXT: movdqa %xmm0, %xmm1 1706; SSE3-NEXT: psrlw $4, %xmm1 1707; SSE3-NEXT: paddb %xmm0, %xmm1 1708; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1709; SSE3-NEXT: movdqa %xmm1, %xmm0 1710; SSE3-NEXT: psllw $8, %xmm0 1711; SSE3-NEXT: paddb %xmm1, %xmm0 1712; SSE3-NEXT: psrlw $8, %xmm0 1713; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 1714; SSE3-NEXT: retq 1715; 1716; SSSE3-LABEL: ugt_2_v8i16: 1717; SSSE3: # %bb.0: 1718; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1719; SSSE3-NEXT: movdqa %xmm0, %xmm2 1720; SSSE3-NEXT: pand %xmm1, %xmm2 1721; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1722; SSSE3-NEXT: movdqa %xmm3, %xmm4 1723; SSSE3-NEXT: pshufb %xmm2, %xmm4 1724; SSSE3-NEXT: psrlw $4, %xmm0 1725; SSSE3-NEXT: pand %xmm1, %xmm0 1726; SSSE3-NEXT: pshufb %xmm0, %xmm3 1727; SSSE3-NEXT: paddb %xmm4, %xmm3 1728; SSSE3-NEXT: movdqa %xmm3, %xmm0 1729; SSSE3-NEXT: psllw $8, %xmm0 1730; SSSE3-NEXT: paddb %xmm3, %xmm0 1731; SSSE3-NEXT: psrlw $8, %xmm0 1732; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 1733; SSSE3-NEXT: retq 1734; 1735; SSE41-LABEL: ugt_2_v8i16: 1736; SSE41: # %bb.0: 1737; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1738; SSE41-NEXT: movdqa %xmm0, %xmm2 1739; SSE41-NEXT: pand %xmm1, %xmm2 1740; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1741; SSE41-NEXT: movdqa %xmm3, %xmm4 1742; SSE41-NEXT: pshufb %xmm2, %xmm4 1743; SSE41-NEXT: psrlw $4, %xmm0 1744; SSE41-NEXT: pand %xmm1, %xmm0 1745; SSE41-NEXT: pshufb %xmm0, %xmm3 1746; SSE41-NEXT: paddb %xmm4, %xmm3 1747; SSE41-NEXT: movdqa %xmm3, %xmm0 1748; SSE41-NEXT: psllw $8, %xmm0 1749; SSE41-NEXT: paddb %xmm3, %xmm0 1750; SSE41-NEXT: psrlw $8, %xmm0 1751; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 1752; SSE41-NEXT: retq 1753; 1754; AVX1-LABEL: ugt_2_v8i16: 1755; AVX1: # %bb.0: 1756; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1757; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 1758; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1759; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1760; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 1761; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1762; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1763; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1764; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 1765; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 1766; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 1767; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 1768; AVX1-NEXT: retq 1769; 1770; AVX2-LABEL: ugt_2_v8i16: 1771; AVX2: # %bb.0: 1772; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1773; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 1774; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1775; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1776; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 1777; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1778; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1779; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1780; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 1781; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 1782; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 1783; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 1784; AVX2-NEXT: retq 1785; 1786; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i16: 1787; AVX512VPOPCNTDQ: # %bb.0: 1788; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1789; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 1790; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 1791; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 1792; AVX512VPOPCNTDQ-NEXT: vzeroupper 1793; AVX512VPOPCNTDQ-NEXT: retq 1794; 1795; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i16: 1796; AVX512VPOPCNTDQVL: # %bb.0: 1797; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1798; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 1799; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 1800; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 1801; AVX512VPOPCNTDQVL-NEXT: vzeroupper 1802; AVX512VPOPCNTDQVL-NEXT: retq 1803; 1804; BITALG_NOVLX-LABEL: ugt_2_v8i16: 1805; BITALG_NOVLX: # %bb.0: 1806; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1807; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 1808; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 1809; BITALG_NOVLX-NEXT: vzeroupper 1810; BITALG_NOVLX-NEXT: retq 1811; 1812; BITALG-LABEL: ugt_2_v8i16: 1813; BITALG: # %bb.0: 1814; BITALG-NEXT: vpopcntw %xmm0, %xmm0 1815; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 1816; BITALG-NEXT: vpmovm2w %k0, %xmm0 1817; BITALG-NEXT: retq 1818 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 1819 %3 = icmp ugt <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 1820 %4 = sext <8 x i1> %3 to <8 x i16> 1821 ret <8 x i16> %4 1822} 1823 1824define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { 1825; SSE2-LABEL: ult_3_v8i16: 1826; SSE2: # %bb.0: 1827; SSE2-NEXT: movdqa %xmm0, %xmm1 1828; SSE2-NEXT: psrlw $1, %xmm1 1829; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1830; SSE2-NEXT: psubb %xmm1, %xmm0 1831; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1832; SSE2-NEXT: movdqa %xmm0, %xmm2 1833; SSE2-NEXT: pand %xmm1, %xmm2 1834; SSE2-NEXT: psrlw $2, %xmm0 1835; SSE2-NEXT: pand %xmm1, %xmm0 1836; SSE2-NEXT: paddb %xmm2, %xmm0 1837; SSE2-NEXT: movdqa %xmm0, %xmm1 1838; SSE2-NEXT: psrlw $4, %xmm1 1839; SSE2-NEXT: paddb %xmm0, %xmm1 1840; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1841; SSE2-NEXT: movdqa %xmm1, %xmm2 1842; SSE2-NEXT: psllw $8, %xmm2 1843; SSE2-NEXT: paddb %xmm1, %xmm2 1844; SSE2-NEXT: psrlw $8, %xmm2 1845; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] 1846; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 1847; SSE2-NEXT: retq 1848; 1849; SSE3-LABEL: ult_3_v8i16: 1850; SSE3: # %bb.0: 1851; SSE3-NEXT: movdqa %xmm0, %xmm1 1852; SSE3-NEXT: psrlw $1, %xmm1 1853; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1854; SSE3-NEXT: psubb %xmm1, %xmm0 1855; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1856; SSE3-NEXT: movdqa %xmm0, %xmm2 1857; SSE3-NEXT: pand %xmm1, %xmm2 1858; SSE3-NEXT: psrlw $2, %xmm0 1859; SSE3-NEXT: pand %xmm1, %xmm0 1860; SSE3-NEXT: paddb %xmm2, %xmm0 1861; SSE3-NEXT: movdqa %xmm0, %xmm1 1862; SSE3-NEXT: psrlw $4, %xmm1 1863; SSE3-NEXT: paddb %xmm0, %xmm1 1864; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 1865; SSE3-NEXT: movdqa %xmm1, %xmm2 1866; SSE3-NEXT: psllw $8, %xmm2 1867; SSE3-NEXT: paddb %xmm1, %xmm2 1868; SSE3-NEXT: psrlw $8, %xmm2 1869; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] 1870; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 1871; SSE3-NEXT: retq 1872; 1873; SSSE3-LABEL: ult_3_v8i16: 1874; SSSE3: # %bb.0: 1875; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1876; SSSE3-NEXT: movdqa %xmm0, %xmm2 1877; SSSE3-NEXT: pand %xmm1, %xmm2 1878; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1879; SSSE3-NEXT: movdqa %xmm3, %xmm4 1880; SSSE3-NEXT: pshufb %xmm2, %xmm4 1881; SSSE3-NEXT: psrlw $4, %xmm0 1882; SSSE3-NEXT: pand %xmm1, %xmm0 1883; SSSE3-NEXT: pshufb %xmm0, %xmm3 1884; SSSE3-NEXT: paddb %xmm4, %xmm3 1885; SSSE3-NEXT: movdqa %xmm3, %xmm1 1886; SSSE3-NEXT: psllw $8, %xmm1 1887; SSSE3-NEXT: paddb %xmm3, %xmm1 1888; SSSE3-NEXT: psrlw $8, %xmm1 1889; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] 1890; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 1891; SSSE3-NEXT: retq 1892; 1893; SSE41-LABEL: ult_3_v8i16: 1894; SSE41: # %bb.0: 1895; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1896; SSE41-NEXT: movdqa %xmm0, %xmm2 1897; SSE41-NEXT: pand %xmm1, %xmm2 1898; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1899; SSE41-NEXT: movdqa %xmm3, %xmm4 1900; SSE41-NEXT: pshufb %xmm2, %xmm4 1901; SSE41-NEXT: psrlw $4, %xmm0 1902; SSE41-NEXT: pand %xmm1, %xmm0 1903; SSE41-NEXT: pshufb %xmm0, %xmm3 1904; SSE41-NEXT: paddb %xmm4, %xmm3 1905; SSE41-NEXT: movdqa %xmm3, %xmm1 1906; SSE41-NEXT: psllw $8, %xmm1 1907; SSE41-NEXT: paddb %xmm3, %xmm1 1908; SSE41-NEXT: psrlw $8, %xmm1 1909; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] 1910; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 1911; SSE41-NEXT: retq 1912; 1913; AVX1-LABEL: ult_3_v8i16: 1914; AVX1: # %bb.0: 1915; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1916; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 1917; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1918; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1919; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 1920; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1921; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1922; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1923; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 1924; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 1925; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 1926; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] 1927; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1928; AVX1-NEXT: retq 1929; 1930; AVX2-LABEL: ult_3_v8i16: 1931; AVX2: # %bb.0: 1932; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1933; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 1934; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1935; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 1936; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 1937; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1938; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 1939; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1940; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 1941; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 1942; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 1943; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] 1944; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1945; AVX2-NEXT: retq 1946; 1947; AVX512VPOPCNTDQ-LABEL: ult_3_v8i16: 1948; AVX512VPOPCNTDQ: # %bb.0: 1949; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1950; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 1951; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 1952; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] 1953; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1954; AVX512VPOPCNTDQ-NEXT: vzeroupper 1955; AVX512VPOPCNTDQ-NEXT: retq 1956; 1957; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i16: 1958; AVX512VPOPCNTDQVL: # %bb.0: 1959; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1960; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 1961; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 1962; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] 1963; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1964; AVX512VPOPCNTDQVL-NEXT: vzeroupper 1965; AVX512VPOPCNTDQVL-NEXT: retq 1966; 1967; BITALG_NOVLX-LABEL: ult_3_v8i16: 1968; BITALG_NOVLX: # %bb.0: 1969; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1970; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 1971; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] 1972; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1973; BITALG_NOVLX-NEXT: vzeroupper 1974; BITALG_NOVLX-NEXT: retq 1975; 1976; BITALG-LABEL: ult_3_v8i16: 1977; BITALG: # %bb.0: 1978; BITALG-NEXT: vpopcntw %xmm0, %xmm0 1979; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 1980; BITALG-NEXT: vpmovm2w %k0, %xmm0 1981; BITALG-NEXT: retq 1982 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 1983 %3 = icmp ult <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1984 %4 = sext <8 x i1> %3 to <8 x i16> 1985 ret <8 x i16> %4 1986} 1987 1988define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { 1989; SSE2-LABEL: ugt_3_v8i16: 1990; SSE2: # %bb.0: 1991; SSE2-NEXT: movdqa %xmm0, %xmm1 1992; SSE2-NEXT: psrlw $1, %xmm1 1993; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1994; SSE2-NEXT: psubb %xmm1, %xmm0 1995; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1996; SSE2-NEXT: movdqa %xmm0, %xmm2 1997; SSE2-NEXT: pand %xmm1, %xmm2 1998; SSE2-NEXT: psrlw $2, %xmm0 1999; SSE2-NEXT: pand %xmm1, %xmm0 2000; SSE2-NEXT: paddb %xmm2, %xmm0 2001; SSE2-NEXT: movdqa %xmm0, %xmm1 2002; SSE2-NEXT: psrlw $4, %xmm1 2003; SSE2-NEXT: paddb %xmm0, %xmm1 2004; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2005; SSE2-NEXT: movdqa %xmm1, %xmm0 2006; SSE2-NEXT: psllw $8, %xmm0 2007; SSE2-NEXT: paddb %xmm1, %xmm0 2008; SSE2-NEXT: psrlw $8, %xmm0 2009; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2010; SSE2-NEXT: retq 2011; 2012; SSE3-LABEL: ugt_3_v8i16: 2013; SSE3: # %bb.0: 2014; SSE3-NEXT: movdqa %xmm0, %xmm1 2015; SSE3-NEXT: psrlw $1, %xmm1 2016; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2017; SSE3-NEXT: psubb %xmm1, %xmm0 2018; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2019; SSE3-NEXT: movdqa %xmm0, %xmm2 2020; SSE3-NEXT: pand %xmm1, %xmm2 2021; SSE3-NEXT: psrlw $2, %xmm0 2022; SSE3-NEXT: pand %xmm1, %xmm0 2023; SSE3-NEXT: paddb %xmm2, %xmm0 2024; SSE3-NEXT: movdqa %xmm0, %xmm1 2025; SSE3-NEXT: psrlw $4, %xmm1 2026; SSE3-NEXT: paddb %xmm0, %xmm1 2027; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2028; SSE3-NEXT: movdqa %xmm1, %xmm0 2029; SSE3-NEXT: psllw $8, %xmm0 2030; SSE3-NEXT: paddb %xmm1, %xmm0 2031; SSE3-NEXT: psrlw $8, %xmm0 2032; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2033; SSE3-NEXT: retq 2034; 2035; SSSE3-LABEL: ugt_3_v8i16: 2036; SSSE3: # %bb.0: 2037; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2038; SSSE3-NEXT: movdqa %xmm0, %xmm2 2039; SSSE3-NEXT: pand %xmm1, %xmm2 2040; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2041; SSSE3-NEXT: movdqa %xmm3, %xmm4 2042; SSSE3-NEXT: pshufb %xmm2, %xmm4 2043; SSSE3-NEXT: psrlw $4, %xmm0 2044; SSSE3-NEXT: pand %xmm1, %xmm0 2045; SSSE3-NEXT: pshufb %xmm0, %xmm3 2046; SSSE3-NEXT: paddb %xmm4, %xmm3 2047; SSSE3-NEXT: movdqa %xmm3, %xmm0 2048; SSSE3-NEXT: psllw $8, %xmm0 2049; SSSE3-NEXT: paddb %xmm3, %xmm0 2050; SSSE3-NEXT: psrlw $8, %xmm0 2051; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2052; SSSE3-NEXT: retq 2053; 2054; SSE41-LABEL: ugt_3_v8i16: 2055; SSE41: # %bb.0: 2056; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2057; SSE41-NEXT: movdqa %xmm0, %xmm2 2058; SSE41-NEXT: pand %xmm1, %xmm2 2059; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2060; SSE41-NEXT: movdqa %xmm3, %xmm4 2061; SSE41-NEXT: pshufb %xmm2, %xmm4 2062; SSE41-NEXT: psrlw $4, %xmm0 2063; SSE41-NEXT: pand %xmm1, %xmm0 2064; SSE41-NEXT: pshufb %xmm0, %xmm3 2065; SSE41-NEXT: paddb %xmm4, %xmm3 2066; SSE41-NEXT: movdqa %xmm3, %xmm0 2067; SSE41-NEXT: psllw $8, %xmm0 2068; SSE41-NEXT: paddb %xmm3, %xmm0 2069; SSE41-NEXT: psrlw $8, %xmm0 2070; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2071; SSE41-NEXT: retq 2072; 2073; AVX1-LABEL: ugt_3_v8i16: 2074; AVX1: # %bb.0: 2075; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2076; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 2077; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2078; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2079; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 2080; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2081; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2082; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2083; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 2084; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2085; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 2086; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2087; AVX1-NEXT: retq 2088; 2089; AVX2-LABEL: ugt_3_v8i16: 2090; AVX2: # %bb.0: 2091; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2092; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 2093; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2094; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2095; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 2096; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2097; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2098; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2099; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 2100; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2101; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 2102; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2103; AVX2-NEXT: retq 2104; 2105; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i16: 2106; AVX512VPOPCNTDQ: # %bb.0: 2107; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2108; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 2109; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 2110; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2111; AVX512VPOPCNTDQ-NEXT: vzeroupper 2112; AVX512VPOPCNTDQ-NEXT: retq 2113; 2114; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i16: 2115; AVX512VPOPCNTDQVL: # %bb.0: 2116; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2117; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 2118; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 2119; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2120; AVX512VPOPCNTDQVL-NEXT: vzeroupper 2121; AVX512VPOPCNTDQVL-NEXT: retq 2122; 2123; BITALG_NOVLX-LABEL: ugt_3_v8i16: 2124; BITALG_NOVLX: # %bb.0: 2125; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2126; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 2127; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2128; BITALG_NOVLX-NEXT: vzeroupper 2129; BITALG_NOVLX-NEXT: retq 2130; 2131; BITALG-LABEL: ugt_3_v8i16: 2132; BITALG: # %bb.0: 2133; BITALG-NEXT: vpopcntw %xmm0, %xmm0 2134; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 2135; BITALG-NEXT: vpmovm2w %k0, %xmm0 2136; BITALG-NEXT: retq 2137 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 2138 %3 = icmp ugt <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 2139 %4 = sext <8 x i1> %3 to <8 x i16> 2140 ret <8 x i16> %4 2141} 2142 2143define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { 2144; SSE2-LABEL: ult_4_v8i16: 2145; SSE2: # %bb.0: 2146; SSE2-NEXT: movdqa %xmm0, %xmm1 2147; SSE2-NEXT: psrlw $1, %xmm1 2148; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2149; SSE2-NEXT: psubb %xmm1, %xmm0 2150; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2151; SSE2-NEXT: movdqa %xmm0, %xmm2 2152; SSE2-NEXT: pand %xmm1, %xmm2 2153; SSE2-NEXT: psrlw $2, %xmm0 2154; SSE2-NEXT: pand %xmm1, %xmm0 2155; SSE2-NEXT: paddb %xmm2, %xmm0 2156; SSE2-NEXT: movdqa %xmm0, %xmm1 2157; SSE2-NEXT: psrlw $4, %xmm1 2158; SSE2-NEXT: paddb %xmm0, %xmm1 2159; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2160; SSE2-NEXT: movdqa %xmm1, %xmm2 2161; SSE2-NEXT: psllw $8, %xmm2 2162; SSE2-NEXT: paddb %xmm1, %xmm2 2163; SSE2-NEXT: psrlw $8, %xmm2 2164; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] 2165; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 2166; SSE2-NEXT: retq 2167; 2168; SSE3-LABEL: ult_4_v8i16: 2169; SSE3: # %bb.0: 2170; SSE3-NEXT: movdqa %xmm0, %xmm1 2171; SSE3-NEXT: psrlw $1, %xmm1 2172; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2173; SSE3-NEXT: psubb %xmm1, %xmm0 2174; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2175; SSE3-NEXT: movdqa %xmm0, %xmm2 2176; SSE3-NEXT: pand %xmm1, %xmm2 2177; SSE3-NEXT: psrlw $2, %xmm0 2178; SSE3-NEXT: pand %xmm1, %xmm0 2179; SSE3-NEXT: paddb %xmm2, %xmm0 2180; SSE3-NEXT: movdqa %xmm0, %xmm1 2181; SSE3-NEXT: psrlw $4, %xmm1 2182; SSE3-NEXT: paddb %xmm0, %xmm1 2183; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2184; SSE3-NEXT: movdqa %xmm1, %xmm2 2185; SSE3-NEXT: psllw $8, %xmm2 2186; SSE3-NEXT: paddb %xmm1, %xmm2 2187; SSE3-NEXT: psrlw $8, %xmm2 2188; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] 2189; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 2190; SSE3-NEXT: retq 2191; 2192; SSSE3-LABEL: ult_4_v8i16: 2193; SSSE3: # %bb.0: 2194; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2195; SSSE3-NEXT: movdqa %xmm0, %xmm2 2196; SSSE3-NEXT: pand %xmm1, %xmm2 2197; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2198; SSSE3-NEXT: movdqa %xmm3, %xmm4 2199; SSSE3-NEXT: pshufb %xmm2, %xmm4 2200; SSSE3-NEXT: psrlw $4, %xmm0 2201; SSSE3-NEXT: pand %xmm1, %xmm0 2202; SSSE3-NEXT: pshufb %xmm0, %xmm3 2203; SSSE3-NEXT: paddb %xmm4, %xmm3 2204; SSSE3-NEXT: movdqa %xmm3, %xmm1 2205; SSSE3-NEXT: psllw $8, %xmm1 2206; SSSE3-NEXT: paddb %xmm3, %xmm1 2207; SSSE3-NEXT: psrlw $8, %xmm1 2208; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] 2209; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 2210; SSSE3-NEXT: retq 2211; 2212; SSE41-LABEL: ult_4_v8i16: 2213; SSE41: # %bb.0: 2214; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2215; SSE41-NEXT: movdqa %xmm0, %xmm2 2216; SSE41-NEXT: pand %xmm1, %xmm2 2217; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2218; SSE41-NEXT: movdqa %xmm3, %xmm4 2219; SSE41-NEXT: pshufb %xmm2, %xmm4 2220; SSE41-NEXT: psrlw $4, %xmm0 2221; SSE41-NEXT: pand %xmm1, %xmm0 2222; SSE41-NEXT: pshufb %xmm0, %xmm3 2223; SSE41-NEXT: paddb %xmm4, %xmm3 2224; SSE41-NEXT: movdqa %xmm3, %xmm1 2225; SSE41-NEXT: psllw $8, %xmm1 2226; SSE41-NEXT: paddb %xmm3, %xmm1 2227; SSE41-NEXT: psrlw $8, %xmm1 2228; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] 2229; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 2230; SSE41-NEXT: retq 2231; 2232; AVX1-LABEL: ult_4_v8i16: 2233; AVX1: # %bb.0: 2234; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2235; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 2236; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2237; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2238; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 2239; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2240; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2241; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2242; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 2243; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2244; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 2245; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] 2246; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2247; AVX1-NEXT: retq 2248; 2249; AVX2-LABEL: ult_4_v8i16: 2250; AVX2: # %bb.0: 2251; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2252; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 2253; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2254; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2255; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 2256; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2257; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2258; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2259; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 2260; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2261; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 2262; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] 2263; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2264; AVX2-NEXT: retq 2265; 2266; AVX512VPOPCNTDQ-LABEL: ult_4_v8i16: 2267; AVX512VPOPCNTDQ: # %bb.0: 2268; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2269; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 2270; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 2271; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] 2272; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2273; AVX512VPOPCNTDQ-NEXT: vzeroupper 2274; AVX512VPOPCNTDQ-NEXT: retq 2275; 2276; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i16: 2277; AVX512VPOPCNTDQVL: # %bb.0: 2278; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2279; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 2280; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 2281; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] 2282; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2283; AVX512VPOPCNTDQVL-NEXT: vzeroupper 2284; AVX512VPOPCNTDQVL-NEXT: retq 2285; 2286; BITALG_NOVLX-LABEL: ult_4_v8i16: 2287; BITALG_NOVLX: # %bb.0: 2288; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2289; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 2290; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] 2291; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2292; BITALG_NOVLX-NEXT: vzeroupper 2293; BITALG_NOVLX-NEXT: retq 2294; 2295; BITALG-LABEL: ult_4_v8i16: 2296; BITALG: # %bb.0: 2297; BITALG-NEXT: vpopcntw %xmm0, %xmm0 2298; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 2299; BITALG-NEXT: vpmovm2w %k0, %xmm0 2300; BITALG-NEXT: retq 2301 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 2302 %3 = icmp ult <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 2303 %4 = sext <8 x i1> %3 to <8 x i16> 2304 ret <8 x i16> %4 2305} 2306 2307define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { 2308; SSE2-LABEL: ugt_4_v8i16: 2309; SSE2: # %bb.0: 2310; SSE2-NEXT: movdqa %xmm0, %xmm1 2311; SSE2-NEXT: psrlw $1, %xmm1 2312; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2313; SSE2-NEXT: psubb %xmm1, %xmm0 2314; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2315; SSE2-NEXT: movdqa %xmm0, %xmm2 2316; SSE2-NEXT: pand %xmm1, %xmm2 2317; SSE2-NEXT: psrlw $2, %xmm0 2318; SSE2-NEXT: pand %xmm1, %xmm0 2319; SSE2-NEXT: paddb %xmm2, %xmm0 2320; SSE2-NEXT: movdqa %xmm0, %xmm1 2321; SSE2-NEXT: psrlw $4, %xmm1 2322; SSE2-NEXT: paddb %xmm0, %xmm1 2323; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2324; SSE2-NEXT: movdqa %xmm1, %xmm0 2325; SSE2-NEXT: psllw $8, %xmm0 2326; SSE2-NEXT: paddb %xmm1, %xmm0 2327; SSE2-NEXT: psrlw $8, %xmm0 2328; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2329; SSE2-NEXT: retq 2330; 2331; SSE3-LABEL: ugt_4_v8i16: 2332; SSE3: # %bb.0: 2333; SSE3-NEXT: movdqa %xmm0, %xmm1 2334; SSE3-NEXT: psrlw $1, %xmm1 2335; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2336; SSE3-NEXT: psubb %xmm1, %xmm0 2337; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2338; SSE3-NEXT: movdqa %xmm0, %xmm2 2339; SSE3-NEXT: pand %xmm1, %xmm2 2340; SSE3-NEXT: psrlw $2, %xmm0 2341; SSE3-NEXT: pand %xmm1, %xmm0 2342; SSE3-NEXT: paddb %xmm2, %xmm0 2343; SSE3-NEXT: movdqa %xmm0, %xmm1 2344; SSE3-NEXT: psrlw $4, %xmm1 2345; SSE3-NEXT: paddb %xmm0, %xmm1 2346; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2347; SSE3-NEXT: movdqa %xmm1, %xmm0 2348; SSE3-NEXT: psllw $8, %xmm0 2349; SSE3-NEXT: paddb %xmm1, %xmm0 2350; SSE3-NEXT: psrlw $8, %xmm0 2351; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2352; SSE3-NEXT: retq 2353; 2354; SSSE3-LABEL: ugt_4_v8i16: 2355; SSSE3: # %bb.0: 2356; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2357; SSSE3-NEXT: movdqa %xmm0, %xmm2 2358; SSSE3-NEXT: pand %xmm1, %xmm2 2359; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2360; SSSE3-NEXT: movdqa %xmm3, %xmm4 2361; SSSE3-NEXT: pshufb %xmm2, %xmm4 2362; SSSE3-NEXT: psrlw $4, %xmm0 2363; SSSE3-NEXT: pand %xmm1, %xmm0 2364; SSSE3-NEXT: pshufb %xmm0, %xmm3 2365; SSSE3-NEXT: paddb %xmm4, %xmm3 2366; SSSE3-NEXT: movdqa %xmm3, %xmm0 2367; SSSE3-NEXT: psllw $8, %xmm0 2368; SSSE3-NEXT: paddb %xmm3, %xmm0 2369; SSSE3-NEXT: psrlw $8, %xmm0 2370; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2371; SSSE3-NEXT: retq 2372; 2373; SSE41-LABEL: ugt_4_v8i16: 2374; SSE41: # %bb.0: 2375; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2376; SSE41-NEXT: movdqa %xmm0, %xmm2 2377; SSE41-NEXT: pand %xmm1, %xmm2 2378; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2379; SSE41-NEXT: movdqa %xmm3, %xmm4 2380; SSE41-NEXT: pshufb %xmm2, %xmm4 2381; SSE41-NEXT: psrlw $4, %xmm0 2382; SSE41-NEXT: pand %xmm1, %xmm0 2383; SSE41-NEXT: pshufb %xmm0, %xmm3 2384; SSE41-NEXT: paddb %xmm4, %xmm3 2385; SSE41-NEXT: movdqa %xmm3, %xmm0 2386; SSE41-NEXT: psllw $8, %xmm0 2387; SSE41-NEXT: paddb %xmm3, %xmm0 2388; SSE41-NEXT: psrlw $8, %xmm0 2389; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2390; SSE41-NEXT: retq 2391; 2392; AVX1-LABEL: ugt_4_v8i16: 2393; AVX1: # %bb.0: 2394; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2395; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 2396; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2397; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2398; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 2399; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2400; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2401; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2402; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 2403; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2404; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 2405; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2406; AVX1-NEXT: retq 2407; 2408; AVX2-LABEL: ugt_4_v8i16: 2409; AVX2: # %bb.0: 2410; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2411; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 2412; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2413; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2414; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 2415; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2416; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2417; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2418; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 2419; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2420; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 2421; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2422; AVX2-NEXT: retq 2423; 2424; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i16: 2425; AVX512VPOPCNTDQ: # %bb.0: 2426; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2427; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 2428; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 2429; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2430; AVX512VPOPCNTDQ-NEXT: vzeroupper 2431; AVX512VPOPCNTDQ-NEXT: retq 2432; 2433; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i16: 2434; AVX512VPOPCNTDQVL: # %bb.0: 2435; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2436; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 2437; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 2438; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2439; AVX512VPOPCNTDQVL-NEXT: vzeroupper 2440; AVX512VPOPCNTDQVL-NEXT: retq 2441; 2442; BITALG_NOVLX-LABEL: ugt_4_v8i16: 2443; BITALG_NOVLX: # %bb.0: 2444; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2445; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 2446; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2447; BITALG_NOVLX-NEXT: vzeroupper 2448; BITALG_NOVLX-NEXT: retq 2449; 2450; BITALG-LABEL: ugt_4_v8i16: 2451; BITALG: # %bb.0: 2452; BITALG-NEXT: vpopcntw %xmm0, %xmm0 2453; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 2454; BITALG-NEXT: vpmovm2w %k0, %xmm0 2455; BITALG-NEXT: retq 2456 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 2457 %3 = icmp ugt <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 2458 %4 = sext <8 x i1> %3 to <8 x i16> 2459 ret <8 x i16> %4 2460} 2461 2462define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { 2463; SSE2-LABEL: ult_5_v8i16: 2464; SSE2: # %bb.0: 2465; SSE2-NEXT: movdqa %xmm0, %xmm1 2466; SSE2-NEXT: psrlw $1, %xmm1 2467; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2468; SSE2-NEXT: psubb %xmm1, %xmm0 2469; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2470; SSE2-NEXT: movdqa %xmm0, %xmm2 2471; SSE2-NEXT: pand %xmm1, %xmm2 2472; SSE2-NEXT: psrlw $2, %xmm0 2473; SSE2-NEXT: pand %xmm1, %xmm0 2474; SSE2-NEXT: paddb %xmm2, %xmm0 2475; SSE2-NEXT: movdqa %xmm0, %xmm1 2476; SSE2-NEXT: psrlw $4, %xmm1 2477; SSE2-NEXT: paddb %xmm0, %xmm1 2478; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2479; SSE2-NEXT: movdqa %xmm1, %xmm2 2480; SSE2-NEXT: psllw $8, %xmm2 2481; SSE2-NEXT: paddb %xmm1, %xmm2 2482; SSE2-NEXT: psrlw $8, %xmm2 2483; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] 2484; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 2485; SSE2-NEXT: retq 2486; 2487; SSE3-LABEL: ult_5_v8i16: 2488; SSE3: # %bb.0: 2489; SSE3-NEXT: movdqa %xmm0, %xmm1 2490; SSE3-NEXT: psrlw $1, %xmm1 2491; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2492; SSE3-NEXT: psubb %xmm1, %xmm0 2493; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2494; SSE3-NEXT: movdqa %xmm0, %xmm2 2495; SSE3-NEXT: pand %xmm1, %xmm2 2496; SSE3-NEXT: psrlw $2, %xmm0 2497; SSE3-NEXT: pand %xmm1, %xmm0 2498; SSE3-NEXT: paddb %xmm2, %xmm0 2499; SSE3-NEXT: movdqa %xmm0, %xmm1 2500; SSE3-NEXT: psrlw $4, %xmm1 2501; SSE3-NEXT: paddb %xmm0, %xmm1 2502; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2503; SSE3-NEXT: movdqa %xmm1, %xmm2 2504; SSE3-NEXT: psllw $8, %xmm2 2505; SSE3-NEXT: paddb %xmm1, %xmm2 2506; SSE3-NEXT: psrlw $8, %xmm2 2507; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] 2508; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 2509; SSE3-NEXT: retq 2510; 2511; SSSE3-LABEL: ult_5_v8i16: 2512; SSSE3: # %bb.0: 2513; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2514; SSSE3-NEXT: movdqa %xmm0, %xmm2 2515; SSSE3-NEXT: pand %xmm1, %xmm2 2516; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2517; SSSE3-NEXT: movdqa %xmm3, %xmm4 2518; SSSE3-NEXT: pshufb %xmm2, %xmm4 2519; SSSE3-NEXT: psrlw $4, %xmm0 2520; SSSE3-NEXT: pand %xmm1, %xmm0 2521; SSSE3-NEXT: pshufb %xmm0, %xmm3 2522; SSSE3-NEXT: paddb %xmm4, %xmm3 2523; SSSE3-NEXT: movdqa %xmm3, %xmm1 2524; SSSE3-NEXT: psllw $8, %xmm1 2525; SSSE3-NEXT: paddb %xmm3, %xmm1 2526; SSSE3-NEXT: psrlw $8, %xmm1 2527; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] 2528; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 2529; SSSE3-NEXT: retq 2530; 2531; SSE41-LABEL: ult_5_v8i16: 2532; SSE41: # %bb.0: 2533; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2534; SSE41-NEXT: movdqa %xmm0, %xmm2 2535; SSE41-NEXT: pand %xmm1, %xmm2 2536; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2537; SSE41-NEXT: movdqa %xmm3, %xmm4 2538; SSE41-NEXT: pshufb %xmm2, %xmm4 2539; SSE41-NEXT: psrlw $4, %xmm0 2540; SSE41-NEXT: pand %xmm1, %xmm0 2541; SSE41-NEXT: pshufb %xmm0, %xmm3 2542; SSE41-NEXT: paddb %xmm4, %xmm3 2543; SSE41-NEXT: movdqa %xmm3, %xmm1 2544; SSE41-NEXT: psllw $8, %xmm1 2545; SSE41-NEXT: paddb %xmm3, %xmm1 2546; SSE41-NEXT: psrlw $8, %xmm1 2547; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] 2548; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 2549; SSE41-NEXT: retq 2550; 2551; AVX1-LABEL: ult_5_v8i16: 2552; AVX1: # %bb.0: 2553; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2554; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 2555; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2556; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2557; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 2558; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2559; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2560; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2561; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 2562; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2563; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 2564; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] 2565; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2566; AVX1-NEXT: retq 2567; 2568; AVX2-LABEL: ult_5_v8i16: 2569; AVX2: # %bb.0: 2570; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2571; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 2572; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2573; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2574; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 2575; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2576; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2577; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2578; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 2579; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2580; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 2581; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] 2582; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2583; AVX2-NEXT: retq 2584; 2585; AVX512VPOPCNTDQ-LABEL: ult_5_v8i16: 2586; AVX512VPOPCNTDQ: # %bb.0: 2587; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2588; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 2589; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 2590; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] 2591; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2592; AVX512VPOPCNTDQ-NEXT: vzeroupper 2593; AVX512VPOPCNTDQ-NEXT: retq 2594; 2595; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i16: 2596; AVX512VPOPCNTDQVL: # %bb.0: 2597; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2598; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 2599; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 2600; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] 2601; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2602; AVX512VPOPCNTDQVL-NEXT: vzeroupper 2603; AVX512VPOPCNTDQVL-NEXT: retq 2604; 2605; BITALG_NOVLX-LABEL: ult_5_v8i16: 2606; BITALG_NOVLX: # %bb.0: 2607; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2608; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 2609; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] 2610; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2611; BITALG_NOVLX-NEXT: vzeroupper 2612; BITALG_NOVLX-NEXT: retq 2613; 2614; BITALG-LABEL: ult_5_v8i16: 2615; BITALG: # %bb.0: 2616; BITALG-NEXT: vpopcntw %xmm0, %xmm0 2617; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 2618; BITALG-NEXT: vpmovm2w %k0, %xmm0 2619; BITALG-NEXT: retq 2620 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 2621 %3 = icmp ult <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> 2622 %4 = sext <8 x i1> %3 to <8 x i16> 2623 ret <8 x i16> %4 2624} 2625 2626define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { 2627; SSE2-LABEL: ugt_5_v8i16: 2628; SSE2: # %bb.0: 2629; SSE2-NEXT: movdqa %xmm0, %xmm1 2630; SSE2-NEXT: psrlw $1, %xmm1 2631; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2632; SSE2-NEXT: psubb %xmm1, %xmm0 2633; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2634; SSE2-NEXT: movdqa %xmm0, %xmm2 2635; SSE2-NEXT: pand %xmm1, %xmm2 2636; SSE2-NEXT: psrlw $2, %xmm0 2637; SSE2-NEXT: pand %xmm1, %xmm0 2638; SSE2-NEXT: paddb %xmm2, %xmm0 2639; SSE2-NEXT: movdqa %xmm0, %xmm1 2640; SSE2-NEXT: psrlw $4, %xmm1 2641; SSE2-NEXT: paddb %xmm0, %xmm1 2642; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2643; SSE2-NEXT: movdqa %xmm1, %xmm0 2644; SSE2-NEXT: psllw $8, %xmm0 2645; SSE2-NEXT: paddb %xmm1, %xmm0 2646; SSE2-NEXT: psrlw $8, %xmm0 2647; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2648; SSE2-NEXT: retq 2649; 2650; SSE3-LABEL: ugt_5_v8i16: 2651; SSE3: # %bb.0: 2652; SSE3-NEXT: movdqa %xmm0, %xmm1 2653; SSE3-NEXT: psrlw $1, %xmm1 2654; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2655; SSE3-NEXT: psubb %xmm1, %xmm0 2656; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2657; SSE3-NEXT: movdqa %xmm0, %xmm2 2658; SSE3-NEXT: pand %xmm1, %xmm2 2659; SSE3-NEXT: psrlw $2, %xmm0 2660; SSE3-NEXT: pand %xmm1, %xmm0 2661; SSE3-NEXT: paddb %xmm2, %xmm0 2662; SSE3-NEXT: movdqa %xmm0, %xmm1 2663; SSE3-NEXT: psrlw $4, %xmm1 2664; SSE3-NEXT: paddb %xmm0, %xmm1 2665; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2666; SSE3-NEXT: movdqa %xmm1, %xmm0 2667; SSE3-NEXT: psllw $8, %xmm0 2668; SSE3-NEXT: paddb %xmm1, %xmm0 2669; SSE3-NEXT: psrlw $8, %xmm0 2670; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2671; SSE3-NEXT: retq 2672; 2673; SSSE3-LABEL: ugt_5_v8i16: 2674; SSSE3: # %bb.0: 2675; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2676; SSSE3-NEXT: movdqa %xmm0, %xmm2 2677; SSSE3-NEXT: pand %xmm1, %xmm2 2678; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2679; SSSE3-NEXT: movdqa %xmm3, %xmm4 2680; SSSE3-NEXT: pshufb %xmm2, %xmm4 2681; SSSE3-NEXT: psrlw $4, %xmm0 2682; SSSE3-NEXT: pand %xmm1, %xmm0 2683; SSSE3-NEXT: pshufb %xmm0, %xmm3 2684; SSSE3-NEXT: paddb %xmm4, %xmm3 2685; SSSE3-NEXT: movdqa %xmm3, %xmm0 2686; SSSE3-NEXT: psllw $8, %xmm0 2687; SSSE3-NEXT: paddb %xmm3, %xmm0 2688; SSSE3-NEXT: psrlw $8, %xmm0 2689; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2690; SSSE3-NEXT: retq 2691; 2692; SSE41-LABEL: ugt_5_v8i16: 2693; SSE41: # %bb.0: 2694; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2695; SSE41-NEXT: movdqa %xmm0, %xmm2 2696; SSE41-NEXT: pand %xmm1, %xmm2 2697; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2698; SSE41-NEXT: movdqa %xmm3, %xmm4 2699; SSE41-NEXT: pshufb %xmm2, %xmm4 2700; SSE41-NEXT: psrlw $4, %xmm0 2701; SSE41-NEXT: pand %xmm1, %xmm0 2702; SSE41-NEXT: pshufb %xmm0, %xmm3 2703; SSE41-NEXT: paddb %xmm4, %xmm3 2704; SSE41-NEXT: movdqa %xmm3, %xmm0 2705; SSE41-NEXT: psllw $8, %xmm0 2706; SSE41-NEXT: paddb %xmm3, %xmm0 2707; SSE41-NEXT: psrlw $8, %xmm0 2708; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2709; SSE41-NEXT: retq 2710; 2711; AVX1-LABEL: ugt_5_v8i16: 2712; AVX1: # %bb.0: 2713; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2714; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 2715; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2716; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2717; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 2718; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2719; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2720; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2721; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 2722; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2723; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 2724; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2725; AVX1-NEXT: retq 2726; 2727; AVX2-LABEL: ugt_5_v8i16: 2728; AVX2: # %bb.0: 2729; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2730; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 2731; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2732; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2733; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 2734; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2735; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2736; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2737; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 2738; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2739; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 2740; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2741; AVX2-NEXT: retq 2742; 2743; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i16: 2744; AVX512VPOPCNTDQ: # %bb.0: 2745; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2746; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 2747; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 2748; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2749; AVX512VPOPCNTDQ-NEXT: vzeroupper 2750; AVX512VPOPCNTDQ-NEXT: retq 2751; 2752; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i16: 2753; AVX512VPOPCNTDQVL: # %bb.0: 2754; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2755; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 2756; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 2757; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2758; AVX512VPOPCNTDQVL-NEXT: vzeroupper 2759; AVX512VPOPCNTDQVL-NEXT: retq 2760; 2761; BITALG_NOVLX-LABEL: ugt_5_v8i16: 2762; BITALG_NOVLX: # %bb.0: 2763; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2764; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 2765; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 2766; BITALG_NOVLX-NEXT: vzeroupper 2767; BITALG_NOVLX-NEXT: retq 2768; 2769; BITALG-LABEL: ugt_5_v8i16: 2770; BITALG: # %bb.0: 2771; BITALG-NEXT: vpopcntw %xmm0, %xmm0 2772; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 2773; BITALG-NEXT: vpmovm2w %k0, %xmm0 2774; BITALG-NEXT: retq 2775 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 2776 %3 = icmp ugt <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> 2777 %4 = sext <8 x i1> %3 to <8 x i16> 2778 ret <8 x i16> %4 2779} 2780 2781define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { 2782; SSE2-LABEL: ult_6_v8i16: 2783; SSE2: # %bb.0: 2784; SSE2-NEXT: movdqa %xmm0, %xmm1 2785; SSE2-NEXT: psrlw $1, %xmm1 2786; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2787; SSE2-NEXT: psubb %xmm1, %xmm0 2788; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2789; SSE2-NEXT: movdqa %xmm0, %xmm2 2790; SSE2-NEXT: pand %xmm1, %xmm2 2791; SSE2-NEXT: psrlw $2, %xmm0 2792; SSE2-NEXT: pand %xmm1, %xmm0 2793; SSE2-NEXT: paddb %xmm2, %xmm0 2794; SSE2-NEXT: movdqa %xmm0, %xmm1 2795; SSE2-NEXT: psrlw $4, %xmm1 2796; SSE2-NEXT: paddb %xmm0, %xmm1 2797; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2798; SSE2-NEXT: movdqa %xmm1, %xmm2 2799; SSE2-NEXT: psllw $8, %xmm2 2800; SSE2-NEXT: paddb %xmm1, %xmm2 2801; SSE2-NEXT: psrlw $8, %xmm2 2802; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] 2803; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 2804; SSE2-NEXT: retq 2805; 2806; SSE3-LABEL: ult_6_v8i16: 2807; SSE3: # %bb.0: 2808; SSE3-NEXT: movdqa %xmm0, %xmm1 2809; SSE3-NEXT: psrlw $1, %xmm1 2810; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2811; SSE3-NEXT: psubb %xmm1, %xmm0 2812; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2813; SSE3-NEXT: movdqa %xmm0, %xmm2 2814; SSE3-NEXT: pand %xmm1, %xmm2 2815; SSE3-NEXT: psrlw $2, %xmm0 2816; SSE3-NEXT: pand %xmm1, %xmm0 2817; SSE3-NEXT: paddb %xmm2, %xmm0 2818; SSE3-NEXT: movdqa %xmm0, %xmm1 2819; SSE3-NEXT: psrlw $4, %xmm1 2820; SSE3-NEXT: paddb %xmm0, %xmm1 2821; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2822; SSE3-NEXT: movdqa %xmm1, %xmm2 2823; SSE3-NEXT: psllw $8, %xmm2 2824; SSE3-NEXT: paddb %xmm1, %xmm2 2825; SSE3-NEXT: psrlw $8, %xmm2 2826; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] 2827; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 2828; SSE3-NEXT: retq 2829; 2830; SSSE3-LABEL: ult_6_v8i16: 2831; SSSE3: # %bb.0: 2832; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2833; SSSE3-NEXT: movdqa %xmm0, %xmm2 2834; SSSE3-NEXT: pand %xmm1, %xmm2 2835; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2836; SSSE3-NEXT: movdqa %xmm3, %xmm4 2837; SSSE3-NEXT: pshufb %xmm2, %xmm4 2838; SSSE3-NEXT: psrlw $4, %xmm0 2839; SSSE3-NEXT: pand %xmm1, %xmm0 2840; SSSE3-NEXT: pshufb %xmm0, %xmm3 2841; SSSE3-NEXT: paddb %xmm4, %xmm3 2842; SSSE3-NEXT: movdqa %xmm3, %xmm1 2843; SSSE3-NEXT: psllw $8, %xmm1 2844; SSSE3-NEXT: paddb %xmm3, %xmm1 2845; SSSE3-NEXT: psrlw $8, %xmm1 2846; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] 2847; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 2848; SSSE3-NEXT: retq 2849; 2850; SSE41-LABEL: ult_6_v8i16: 2851; SSE41: # %bb.0: 2852; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2853; SSE41-NEXT: movdqa %xmm0, %xmm2 2854; SSE41-NEXT: pand %xmm1, %xmm2 2855; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2856; SSE41-NEXT: movdqa %xmm3, %xmm4 2857; SSE41-NEXT: pshufb %xmm2, %xmm4 2858; SSE41-NEXT: psrlw $4, %xmm0 2859; SSE41-NEXT: pand %xmm1, %xmm0 2860; SSE41-NEXT: pshufb %xmm0, %xmm3 2861; SSE41-NEXT: paddb %xmm4, %xmm3 2862; SSE41-NEXT: movdqa %xmm3, %xmm1 2863; SSE41-NEXT: psllw $8, %xmm1 2864; SSE41-NEXT: paddb %xmm3, %xmm1 2865; SSE41-NEXT: psrlw $8, %xmm1 2866; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] 2867; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 2868; SSE41-NEXT: retq 2869; 2870; AVX1-LABEL: ult_6_v8i16: 2871; AVX1: # %bb.0: 2872; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2873; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 2874; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2875; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2876; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 2877; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2878; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2879; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2880; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 2881; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2882; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 2883; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] 2884; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2885; AVX1-NEXT: retq 2886; 2887; AVX2-LABEL: ult_6_v8i16: 2888; AVX2: # %bb.0: 2889; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2890; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 2891; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2892; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 2893; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 2894; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2895; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 2896; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2897; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 2898; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 2899; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 2900; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] 2901; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2902; AVX2-NEXT: retq 2903; 2904; AVX512VPOPCNTDQ-LABEL: ult_6_v8i16: 2905; AVX512VPOPCNTDQ: # %bb.0: 2906; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2907; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 2908; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 2909; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] 2910; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2911; AVX512VPOPCNTDQ-NEXT: vzeroupper 2912; AVX512VPOPCNTDQ-NEXT: retq 2913; 2914; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i16: 2915; AVX512VPOPCNTDQVL: # %bb.0: 2916; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2917; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 2918; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 2919; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] 2920; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2921; AVX512VPOPCNTDQVL-NEXT: vzeroupper 2922; AVX512VPOPCNTDQVL-NEXT: retq 2923; 2924; BITALG_NOVLX-LABEL: ult_6_v8i16: 2925; BITALG_NOVLX: # %bb.0: 2926; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2927; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 2928; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] 2929; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 2930; BITALG_NOVLX-NEXT: vzeroupper 2931; BITALG_NOVLX-NEXT: retq 2932; 2933; BITALG-LABEL: ult_6_v8i16: 2934; BITALG: # %bb.0: 2935; BITALG-NEXT: vpopcntw %xmm0, %xmm0 2936; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 2937; BITALG-NEXT: vpmovm2w %k0, %xmm0 2938; BITALG-NEXT: retq 2939 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 2940 %3 = icmp ult <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6> 2941 %4 = sext <8 x i1> %3 to <8 x i16> 2942 ret <8 x i16> %4 2943} 2944 2945define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { 2946; SSE2-LABEL: ugt_6_v8i16: 2947; SSE2: # %bb.0: 2948; SSE2-NEXT: movdqa %xmm0, %xmm1 2949; SSE2-NEXT: psrlw $1, %xmm1 2950; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2951; SSE2-NEXT: psubb %xmm1, %xmm0 2952; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2953; SSE2-NEXT: movdqa %xmm0, %xmm2 2954; SSE2-NEXT: pand %xmm1, %xmm2 2955; SSE2-NEXT: psrlw $2, %xmm0 2956; SSE2-NEXT: pand %xmm1, %xmm0 2957; SSE2-NEXT: paddb %xmm2, %xmm0 2958; SSE2-NEXT: movdqa %xmm0, %xmm1 2959; SSE2-NEXT: psrlw $4, %xmm1 2960; SSE2-NEXT: paddb %xmm0, %xmm1 2961; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2962; SSE2-NEXT: movdqa %xmm1, %xmm0 2963; SSE2-NEXT: psllw $8, %xmm0 2964; SSE2-NEXT: paddb %xmm1, %xmm0 2965; SSE2-NEXT: psrlw $8, %xmm0 2966; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2967; SSE2-NEXT: retq 2968; 2969; SSE3-LABEL: ugt_6_v8i16: 2970; SSE3: # %bb.0: 2971; SSE3-NEXT: movdqa %xmm0, %xmm1 2972; SSE3-NEXT: psrlw $1, %xmm1 2973; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2974; SSE3-NEXT: psubb %xmm1, %xmm0 2975; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 2976; SSE3-NEXT: movdqa %xmm0, %xmm2 2977; SSE3-NEXT: pand %xmm1, %xmm2 2978; SSE3-NEXT: psrlw $2, %xmm0 2979; SSE3-NEXT: pand %xmm1, %xmm0 2980; SSE3-NEXT: paddb %xmm2, %xmm0 2981; SSE3-NEXT: movdqa %xmm0, %xmm1 2982; SSE3-NEXT: psrlw $4, %xmm1 2983; SSE3-NEXT: paddb %xmm0, %xmm1 2984; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 2985; SSE3-NEXT: movdqa %xmm1, %xmm0 2986; SSE3-NEXT: psllw $8, %xmm0 2987; SSE3-NEXT: paddb %xmm1, %xmm0 2988; SSE3-NEXT: psrlw $8, %xmm0 2989; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 2990; SSE3-NEXT: retq 2991; 2992; SSSE3-LABEL: ugt_6_v8i16: 2993; SSSE3: # %bb.0: 2994; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2995; SSSE3-NEXT: movdqa %xmm0, %xmm2 2996; SSSE3-NEXT: pand %xmm1, %xmm2 2997; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 2998; SSSE3-NEXT: movdqa %xmm3, %xmm4 2999; SSSE3-NEXT: pshufb %xmm2, %xmm4 3000; SSSE3-NEXT: psrlw $4, %xmm0 3001; SSSE3-NEXT: pand %xmm1, %xmm0 3002; SSSE3-NEXT: pshufb %xmm0, %xmm3 3003; SSSE3-NEXT: paddb %xmm4, %xmm3 3004; SSSE3-NEXT: movdqa %xmm3, %xmm0 3005; SSSE3-NEXT: psllw $8, %xmm0 3006; SSSE3-NEXT: paddb %xmm3, %xmm0 3007; SSSE3-NEXT: psrlw $8, %xmm0 3008; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3009; SSSE3-NEXT: retq 3010; 3011; SSE41-LABEL: ugt_6_v8i16: 3012; SSE41: # %bb.0: 3013; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3014; SSE41-NEXT: movdqa %xmm0, %xmm2 3015; SSE41-NEXT: pand %xmm1, %xmm2 3016; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3017; SSE41-NEXT: movdqa %xmm3, %xmm4 3018; SSE41-NEXT: pshufb %xmm2, %xmm4 3019; SSE41-NEXT: psrlw $4, %xmm0 3020; SSE41-NEXT: pand %xmm1, %xmm0 3021; SSE41-NEXT: pshufb %xmm0, %xmm3 3022; SSE41-NEXT: paddb %xmm4, %xmm3 3023; SSE41-NEXT: movdqa %xmm3, %xmm0 3024; SSE41-NEXT: psllw $8, %xmm0 3025; SSE41-NEXT: paddb %xmm3, %xmm0 3026; SSE41-NEXT: psrlw $8, %xmm0 3027; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3028; SSE41-NEXT: retq 3029; 3030; AVX1-LABEL: ugt_6_v8i16: 3031; AVX1: # %bb.0: 3032; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3033; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 3034; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3035; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3036; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 3037; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 3038; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3039; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3040; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3041; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3042; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3043; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3044; AVX1-NEXT: retq 3045; 3046; AVX2-LABEL: ugt_6_v8i16: 3047; AVX2: # %bb.0: 3048; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3049; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 3050; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3051; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3052; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 3053; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 3054; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3055; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3056; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 3057; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3058; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 3059; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3060; AVX2-NEXT: retq 3061; 3062; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i16: 3063; AVX512VPOPCNTDQ: # %bb.0: 3064; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3065; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 3066; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 3067; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3068; AVX512VPOPCNTDQ-NEXT: vzeroupper 3069; AVX512VPOPCNTDQ-NEXT: retq 3070; 3071; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i16: 3072; AVX512VPOPCNTDQVL: # %bb.0: 3073; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3074; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 3075; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 3076; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3077; AVX512VPOPCNTDQVL-NEXT: vzeroupper 3078; AVX512VPOPCNTDQVL-NEXT: retq 3079; 3080; BITALG_NOVLX-LABEL: ugt_6_v8i16: 3081; BITALG_NOVLX: # %bb.0: 3082; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3083; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 3084; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3085; BITALG_NOVLX-NEXT: vzeroupper 3086; BITALG_NOVLX-NEXT: retq 3087; 3088; BITALG-LABEL: ugt_6_v8i16: 3089; BITALG: # %bb.0: 3090; BITALG-NEXT: vpopcntw %xmm0, %xmm0 3091; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 3092; BITALG-NEXT: vpmovm2w %k0, %xmm0 3093; BITALG-NEXT: retq 3094 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 3095 %3 = icmp ugt <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6> 3096 %4 = sext <8 x i1> %3 to <8 x i16> 3097 ret <8 x i16> %4 3098} 3099 3100define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { 3101; SSE2-LABEL: ult_7_v8i16: 3102; SSE2: # %bb.0: 3103; SSE2-NEXT: movdqa %xmm0, %xmm1 3104; SSE2-NEXT: psrlw $1, %xmm1 3105; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3106; SSE2-NEXT: psubb %xmm1, %xmm0 3107; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3108; SSE2-NEXT: movdqa %xmm0, %xmm2 3109; SSE2-NEXT: pand %xmm1, %xmm2 3110; SSE2-NEXT: psrlw $2, %xmm0 3111; SSE2-NEXT: pand %xmm1, %xmm0 3112; SSE2-NEXT: paddb %xmm2, %xmm0 3113; SSE2-NEXT: movdqa %xmm0, %xmm1 3114; SSE2-NEXT: psrlw $4, %xmm1 3115; SSE2-NEXT: paddb %xmm0, %xmm1 3116; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3117; SSE2-NEXT: movdqa %xmm1, %xmm2 3118; SSE2-NEXT: psllw $8, %xmm2 3119; SSE2-NEXT: paddb %xmm1, %xmm2 3120; SSE2-NEXT: psrlw $8, %xmm2 3121; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] 3122; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 3123; SSE2-NEXT: retq 3124; 3125; SSE3-LABEL: ult_7_v8i16: 3126; SSE3: # %bb.0: 3127; SSE3-NEXT: movdqa %xmm0, %xmm1 3128; SSE3-NEXT: psrlw $1, %xmm1 3129; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3130; SSE3-NEXT: psubb %xmm1, %xmm0 3131; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3132; SSE3-NEXT: movdqa %xmm0, %xmm2 3133; SSE3-NEXT: pand %xmm1, %xmm2 3134; SSE3-NEXT: psrlw $2, %xmm0 3135; SSE3-NEXT: pand %xmm1, %xmm0 3136; SSE3-NEXT: paddb %xmm2, %xmm0 3137; SSE3-NEXT: movdqa %xmm0, %xmm1 3138; SSE3-NEXT: psrlw $4, %xmm1 3139; SSE3-NEXT: paddb %xmm0, %xmm1 3140; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3141; SSE3-NEXT: movdqa %xmm1, %xmm2 3142; SSE3-NEXT: psllw $8, %xmm2 3143; SSE3-NEXT: paddb %xmm1, %xmm2 3144; SSE3-NEXT: psrlw $8, %xmm2 3145; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] 3146; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 3147; SSE3-NEXT: retq 3148; 3149; SSSE3-LABEL: ult_7_v8i16: 3150; SSSE3: # %bb.0: 3151; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3152; SSSE3-NEXT: movdqa %xmm0, %xmm2 3153; SSSE3-NEXT: pand %xmm1, %xmm2 3154; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3155; SSSE3-NEXT: movdqa %xmm3, %xmm4 3156; SSSE3-NEXT: pshufb %xmm2, %xmm4 3157; SSSE3-NEXT: psrlw $4, %xmm0 3158; SSSE3-NEXT: pand %xmm1, %xmm0 3159; SSSE3-NEXT: pshufb %xmm0, %xmm3 3160; SSSE3-NEXT: paddb %xmm4, %xmm3 3161; SSSE3-NEXT: movdqa %xmm3, %xmm1 3162; SSSE3-NEXT: psllw $8, %xmm1 3163; SSSE3-NEXT: paddb %xmm3, %xmm1 3164; SSSE3-NEXT: psrlw $8, %xmm1 3165; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] 3166; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 3167; SSSE3-NEXT: retq 3168; 3169; SSE41-LABEL: ult_7_v8i16: 3170; SSE41: # %bb.0: 3171; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3172; SSE41-NEXT: movdqa %xmm0, %xmm2 3173; SSE41-NEXT: pand %xmm1, %xmm2 3174; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3175; SSE41-NEXT: movdqa %xmm3, %xmm4 3176; SSE41-NEXT: pshufb %xmm2, %xmm4 3177; SSE41-NEXT: psrlw $4, %xmm0 3178; SSE41-NEXT: pand %xmm1, %xmm0 3179; SSE41-NEXT: pshufb %xmm0, %xmm3 3180; SSE41-NEXT: paddb %xmm4, %xmm3 3181; SSE41-NEXT: movdqa %xmm3, %xmm1 3182; SSE41-NEXT: psllw $8, %xmm1 3183; SSE41-NEXT: paddb %xmm3, %xmm1 3184; SSE41-NEXT: psrlw $8, %xmm1 3185; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] 3186; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 3187; SSE41-NEXT: retq 3188; 3189; AVX1-LABEL: ult_7_v8i16: 3190; AVX1: # %bb.0: 3191; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3192; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 3193; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3194; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3195; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 3196; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 3197; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3198; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3199; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3200; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3201; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3202; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] 3203; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3204; AVX1-NEXT: retq 3205; 3206; AVX2-LABEL: ult_7_v8i16: 3207; AVX2: # %bb.0: 3208; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3209; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 3210; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3211; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3212; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 3213; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 3214; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3215; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3216; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 3217; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3218; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 3219; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] 3220; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3221; AVX2-NEXT: retq 3222; 3223; AVX512VPOPCNTDQ-LABEL: ult_7_v8i16: 3224; AVX512VPOPCNTDQ: # %bb.0: 3225; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3226; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 3227; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 3228; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] 3229; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3230; AVX512VPOPCNTDQ-NEXT: vzeroupper 3231; AVX512VPOPCNTDQ-NEXT: retq 3232; 3233; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i16: 3234; AVX512VPOPCNTDQVL: # %bb.0: 3235; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3236; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 3237; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 3238; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] 3239; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3240; AVX512VPOPCNTDQVL-NEXT: vzeroupper 3241; AVX512VPOPCNTDQVL-NEXT: retq 3242; 3243; BITALG_NOVLX-LABEL: ult_7_v8i16: 3244; BITALG_NOVLX: # %bb.0: 3245; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3246; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 3247; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] 3248; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3249; BITALG_NOVLX-NEXT: vzeroupper 3250; BITALG_NOVLX-NEXT: retq 3251; 3252; BITALG-LABEL: ult_7_v8i16: 3253; BITALG: # %bb.0: 3254; BITALG-NEXT: vpopcntw %xmm0, %xmm0 3255; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 3256; BITALG-NEXT: vpmovm2w %k0, %xmm0 3257; BITALG-NEXT: retq 3258 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 3259 %3 = icmp ult <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 3260 %4 = sext <8 x i1> %3 to <8 x i16> 3261 ret <8 x i16> %4 3262} 3263 3264define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { 3265; SSE2-LABEL: ugt_7_v8i16: 3266; SSE2: # %bb.0: 3267; SSE2-NEXT: movdqa %xmm0, %xmm1 3268; SSE2-NEXT: psrlw $1, %xmm1 3269; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3270; SSE2-NEXT: psubb %xmm1, %xmm0 3271; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3272; SSE2-NEXT: movdqa %xmm0, %xmm2 3273; SSE2-NEXT: pand %xmm1, %xmm2 3274; SSE2-NEXT: psrlw $2, %xmm0 3275; SSE2-NEXT: pand %xmm1, %xmm0 3276; SSE2-NEXT: paddb %xmm2, %xmm0 3277; SSE2-NEXT: movdqa %xmm0, %xmm1 3278; SSE2-NEXT: psrlw $4, %xmm1 3279; SSE2-NEXT: paddb %xmm0, %xmm1 3280; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3281; SSE2-NEXT: movdqa %xmm1, %xmm0 3282; SSE2-NEXT: psllw $8, %xmm0 3283; SSE2-NEXT: paddb %xmm1, %xmm0 3284; SSE2-NEXT: psrlw $8, %xmm0 3285; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3286; SSE2-NEXT: retq 3287; 3288; SSE3-LABEL: ugt_7_v8i16: 3289; SSE3: # %bb.0: 3290; SSE3-NEXT: movdqa %xmm0, %xmm1 3291; SSE3-NEXT: psrlw $1, %xmm1 3292; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3293; SSE3-NEXT: psubb %xmm1, %xmm0 3294; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3295; SSE3-NEXT: movdqa %xmm0, %xmm2 3296; SSE3-NEXT: pand %xmm1, %xmm2 3297; SSE3-NEXT: psrlw $2, %xmm0 3298; SSE3-NEXT: pand %xmm1, %xmm0 3299; SSE3-NEXT: paddb %xmm2, %xmm0 3300; SSE3-NEXT: movdqa %xmm0, %xmm1 3301; SSE3-NEXT: psrlw $4, %xmm1 3302; SSE3-NEXT: paddb %xmm0, %xmm1 3303; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3304; SSE3-NEXT: movdqa %xmm1, %xmm0 3305; SSE3-NEXT: psllw $8, %xmm0 3306; SSE3-NEXT: paddb %xmm1, %xmm0 3307; SSE3-NEXT: psrlw $8, %xmm0 3308; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3309; SSE3-NEXT: retq 3310; 3311; SSSE3-LABEL: ugt_7_v8i16: 3312; SSSE3: # %bb.0: 3313; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3314; SSSE3-NEXT: movdqa %xmm0, %xmm2 3315; SSSE3-NEXT: pand %xmm1, %xmm2 3316; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3317; SSSE3-NEXT: movdqa %xmm3, %xmm4 3318; SSSE3-NEXT: pshufb %xmm2, %xmm4 3319; SSSE3-NEXT: psrlw $4, %xmm0 3320; SSSE3-NEXT: pand %xmm1, %xmm0 3321; SSSE3-NEXT: pshufb %xmm0, %xmm3 3322; SSSE3-NEXT: paddb %xmm4, %xmm3 3323; SSSE3-NEXT: movdqa %xmm3, %xmm0 3324; SSSE3-NEXT: psllw $8, %xmm0 3325; SSSE3-NEXT: paddb %xmm3, %xmm0 3326; SSSE3-NEXT: psrlw $8, %xmm0 3327; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3328; SSSE3-NEXT: retq 3329; 3330; SSE41-LABEL: ugt_7_v8i16: 3331; SSE41: # %bb.0: 3332; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3333; SSE41-NEXT: movdqa %xmm0, %xmm2 3334; SSE41-NEXT: pand %xmm1, %xmm2 3335; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3336; SSE41-NEXT: movdqa %xmm3, %xmm4 3337; SSE41-NEXT: pshufb %xmm2, %xmm4 3338; SSE41-NEXT: psrlw $4, %xmm0 3339; SSE41-NEXT: pand %xmm1, %xmm0 3340; SSE41-NEXT: pshufb %xmm0, %xmm3 3341; SSE41-NEXT: paddb %xmm4, %xmm3 3342; SSE41-NEXT: movdqa %xmm3, %xmm0 3343; SSE41-NEXT: psllw $8, %xmm0 3344; SSE41-NEXT: paddb %xmm3, %xmm0 3345; SSE41-NEXT: psrlw $8, %xmm0 3346; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3347; SSE41-NEXT: retq 3348; 3349; AVX1-LABEL: ugt_7_v8i16: 3350; AVX1: # %bb.0: 3351; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3352; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 3353; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3354; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3355; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 3356; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 3357; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3358; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3359; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3360; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3361; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3362; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3363; AVX1-NEXT: retq 3364; 3365; AVX2-LABEL: ugt_7_v8i16: 3366; AVX2: # %bb.0: 3367; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3368; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 3369; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3370; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3371; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 3372; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 3373; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3374; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3375; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 3376; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3377; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 3378; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3379; AVX2-NEXT: retq 3380; 3381; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i16: 3382; AVX512VPOPCNTDQ: # %bb.0: 3383; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3384; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 3385; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 3386; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3387; AVX512VPOPCNTDQ-NEXT: vzeroupper 3388; AVX512VPOPCNTDQ-NEXT: retq 3389; 3390; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i16: 3391; AVX512VPOPCNTDQVL: # %bb.0: 3392; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3393; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 3394; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 3395; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3396; AVX512VPOPCNTDQVL-NEXT: vzeroupper 3397; AVX512VPOPCNTDQVL-NEXT: retq 3398; 3399; BITALG_NOVLX-LABEL: ugt_7_v8i16: 3400; BITALG_NOVLX: # %bb.0: 3401; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3402; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 3403; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3404; BITALG_NOVLX-NEXT: vzeroupper 3405; BITALG_NOVLX-NEXT: retq 3406; 3407; BITALG-LABEL: ugt_7_v8i16: 3408; BITALG: # %bb.0: 3409; BITALG-NEXT: vpopcntw %xmm0, %xmm0 3410; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 3411; BITALG-NEXT: vpmovm2w %k0, %xmm0 3412; BITALG-NEXT: retq 3413 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 3414 %3 = icmp ugt <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 3415 %4 = sext <8 x i1> %3 to <8 x i16> 3416 ret <8 x i16> %4 3417} 3418 3419define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { 3420; SSE2-LABEL: ult_8_v8i16: 3421; SSE2: # %bb.0: 3422; SSE2-NEXT: movdqa %xmm0, %xmm1 3423; SSE2-NEXT: psrlw $1, %xmm1 3424; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3425; SSE2-NEXT: psubb %xmm1, %xmm0 3426; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3427; SSE2-NEXT: movdqa %xmm0, %xmm2 3428; SSE2-NEXT: pand %xmm1, %xmm2 3429; SSE2-NEXT: psrlw $2, %xmm0 3430; SSE2-NEXT: pand %xmm1, %xmm0 3431; SSE2-NEXT: paddb %xmm2, %xmm0 3432; SSE2-NEXT: movdqa %xmm0, %xmm1 3433; SSE2-NEXT: psrlw $4, %xmm1 3434; SSE2-NEXT: paddb %xmm0, %xmm1 3435; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3436; SSE2-NEXT: movdqa %xmm1, %xmm2 3437; SSE2-NEXT: psllw $8, %xmm2 3438; SSE2-NEXT: paddb %xmm1, %xmm2 3439; SSE2-NEXT: psrlw $8, %xmm2 3440; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] 3441; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 3442; SSE2-NEXT: retq 3443; 3444; SSE3-LABEL: ult_8_v8i16: 3445; SSE3: # %bb.0: 3446; SSE3-NEXT: movdqa %xmm0, %xmm1 3447; SSE3-NEXT: psrlw $1, %xmm1 3448; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3449; SSE3-NEXT: psubb %xmm1, %xmm0 3450; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3451; SSE3-NEXT: movdqa %xmm0, %xmm2 3452; SSE3-NEXT: pand %xmm1, %xmm2 3453; SSE3-NEXT: psrlw $2, %xmm0 3454; SSE3-NEXT: pand %xmm1, %xmm0 3455; SSE3-NEXT: paddb %xmm2, %xmm0 3456; SSE3-NEXT: movdqa %xmm0, %xmm1 3457; SSE3-NEXT: psrlw $4, %xmm1 3458; SSE3-NEXT: paddb %xmm0, %xmm1 3459; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3460; SSE3-NEXT: movdqa %xmm1, %xmm2 3461; SSE3-NEXT: psllw $8, %xmm2 3462; SSE3-NEXT: paddb %xmm1, %xmm2 3463; SSE3-NEXT: psrlw $8, %xmm2 3464; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] 3465; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 3466; SSE3-NEXT: retq 3467; 3468; SSSE3-LABEL: ult_8_v8i16: 3469; SSSE3: # %bb.0: 3470; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3471; SSSE3-NEXT: movdqa %xmm0, %xmm2 3472; SSSE3-NEXT: pand %xmm1, %xmm2 3473; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3474; SSSE3-NEXT: movdqa %xmm3, %xmm4 3475; SSSE3-NEXT: pshufb %xmm2, %xmm4 3476; SSSE3-NEXT: psrlw $4, %xmm0 3477; SSSE3-NEXT: pand %xmm1, %xmm0 3478; SSSE3-NEXT: pshufb %xmm0, %xmm3 3479; SSSE3-NEXT: paddb %xmm4, %xmm3 3480; SSSE3-NEXT: movdqa %xmm3, %xmm1 3481; SSSE3-NEXT: psllw $8, %xmm1 3482; SSSE3-NEXT: paddb %xmm3, %xmm1 3483; SSSE3-NEXT: psrlw $8, %xmm1 3484; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] 3485; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 3486; SSSE3-NEXT: retq 3487; 3488; SSE41-LABEL: ult_8_v8i16: 3489; SSE41: # %bb.0: 3490; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3491; SSE41-NEXT: movdqa %xmm0, %xmm2 3492; SSE41-NEXT: pand %xmm1, %xmm2 3493; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3494; SSE41-NEXT: movdqa %xmm3, %xmm4 3495; SSE41-NEXT: pshufb %xmm2, %xmm4 3496; SSE41-NEXT: psrlw $4, %xmm0 3497; SSE41-NEXT: pand %xmm1, %xmm0 3498; SSE41-NEXT: pshufb %xmm0, %xmm3 3499; SSE41-NEXT: paddb %xmm4, %xmm3 3500; SSE41-NEXT: movdqa %xmm3, %xmm1 3501; SSE41-NEXT: psllw $8, %xmm1 3502; SSE41-NEXT: paddb %xmm3, %xmm1 3503; SSE41-NEXT: psrlw $8, %xmm1 3504; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] 3505; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 3506; SSE41-NEXT: retq 3507; 3508; AVX1-LABEL: ult_8_v8i16: 3509; AVX1: # %bb.0: 3510; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3511; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 3512; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3513; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3514; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 3515; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 3516; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3517; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3518; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3519; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3520; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3521; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] 3522; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3523; AVX1-NEXT: retq 3524; 3525; AVX2-LABEL: ult_8_v8i16: 3526; AVX2: # %bb.0: 3527; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3528; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 3529; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3530; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3531; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 3532; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 3533; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3534; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3535; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 3536; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3537; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 3538; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] 3539; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3540; AVX2-NEXT: retq 3541; 3542; AVX512VPOPCNTDQ-LABEL: ult_8_v8i16: 3543; AVX512VPOPCNTDQ: # %bb.0: 3544; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3545; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 3546; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 3547; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] 3548; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3549; AVX512VPOPCNTDQ-NEXT: vzeroupper 3550; AVX512VPOPCNTDQ-NEXT: retq 3551; 3552; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i16: 3553; AVX512VPOPCNTDQVL: # %bb.0: 3554; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3555; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 3556; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 3557; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] 3558; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3559; AVX512VPOPCNTDQVL-NEXT: vzeroupper 3560; AVX512VPOPCNTDQVL-NEXT: retq 3561; 3562; BITALG_NOVLX-LABEL: ult_8_v8i16: 3563; BITALG_NOVLX: # %bb.0: 3564; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3565; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 3566; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] 3567; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3568; BITALG_NOVLX-NEXT: vzeroupper 3569; BITALG_NOVLX-NEXT: retq 3570; 3571; BITALG-LABEL: ult_8_v8i16: 3572; BITALG: # %bb.0: 3573; BITALG-NEXT: vpopcntw %xmm0, %xmm0 3574; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 3575; BITALG-NEXT: vpmovm2w %k0, %xmm0 3576; BITALG-NEXT: retq 3577 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 3578 %3 = icmp ult <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 3579 %4 = sext <8 x i1> %3 to <8 x i16> 3580 ret <8 x i16> %4 3581} 3582 3583define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { 3584; SSE2-LABEL: ugt_8_v8i16: 3585; SSE2: # %bb.0: 3586; SSE2-NEXT: movdqa %xmm0, %xmm1 3587; SSE2-NEXT: psrlw $1, %xmm1 3588; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3589; SSE2-NEXT: psubb %xmm1, %xmm0 3590; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3591; SSE2-NEXT: movdqa %xmm0, %xmm2 3592; SSE2-NEXT: pand %xmm1, %xmm2 3593; SSE2-NEXT: psrlw $2, %xmm0 3594; SSE2-NEXT: pand %xmm1, %xmm0 3595; SSE2-NEXT: paddb %xmm2, %xmm0 3596; SSE2-NEXT: movdqa %xmm0, %xmm1 3597; SSE2-NEXT: psrlw $4, %xmm1 3598; SSE2-NEXT: paddb %xmm0, %xmm1 3599; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3600; SSE2-NEXT: movdqa %xmm1, %xmm0 3601; SSE2-NEXT: psllw $8, %xmm0 3602; SSE2-NEXT: paddb %xmm1, %xmm0 3603; SSE2-NEXT: psrlw $8, %xmm0 3604; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3605; SSE2-NEXT: retq 3606; 3607; SSE3-LABEL: ugt_8_v8i16: 3608; SSE3: # %bb.0: 3609; SSE3-NEXT: movdqa %xmm0, %xmm1 3610; SSE3-NEXT: psrlw $1, %xmm1 3611; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3612; SSE3-NEXT: psubb %xmm1, %xmm0 3613; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3614; SSE3-NEXT: movdqa %xmm0, %xmm2 3615; SSE3-NEXT: pand %xmm1, %xmm2 3616; SSE3-NEXT: psrlw $2, %xmm0 3617; SSE3-NEXT: pand %xmm1, %xmm0 3618; SSE3-NEXT: paddb %xmm2, %xmm0 3619; SSE3-NEXT: movdqa %xmm0, %xmm1 3620; SSE3-NEXT: psrlw $4, %xmm1 3621; SSE3-NEXT: paddb %xmm0, %xmm1 3622; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3623; SSE3-NEXT: movdqa %xmm1, %xmm0 3624; SSE3-NEXT: psllw $8, %xmm0 3625; SSE3-NEXT: paddb %xmm1, %xmm0 3626; SSE3-NEXT: psrlw $8, %xmm0 3627; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3628; SSE3-NEXT: retq 3629; 3630; SSSE3-LABEL: ugt_8_v8i16: 3631; SSSE3: # %bb.0: 3632; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3633; SSSE3-NEXT: movdqa %xmm0, %xmm2 3634; SSSE3-NEXT: pand %xmm1, %xmm2 3635; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3636; SSSE3-NEXT: movdqa %xmm3, %xmm4 3637; SSSE3-NEXT: pshufb %xmm2, %xmm4 3638; SSSE3-NEXT: psrlw $4, %xmm0 3639; SSSE3-NEXT: pand %xmm1, %xmm0 3640; SSSE3-NEXT: pshufb %xmm0, %xmm3 3641; SSSE3-NEXT: paddb %xmm4, %xmm3 3642; SSSE3-NEXT: movdqa %xmm3, %xmm0 3643; SSSE3-NEXT: psllw $8, %xmm0 3644; SSSE3-NEXT: paddb %xmm3, %xmm0 3645; SSSE3-NEXT: psrlw $8, %xmm0 3646; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3647; SSSE3-NEXT: retq 3648; 3649; SSE41-LABEL: ugt_8_v8i16: 3650; SSE41: # %bb.0: 3651; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3652; SSE41-NEXT: movdqa %xmm0, %xmm2 3653; SSE41-NEXT: pand %xmm1, %xmm2 3654; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3655; SSE41-NEXT: movdqa %xmm3, %xmm4 3656; SSE41-NEXT: pshufb %xmm2, %xmm4 3657; SSE41-NEXT: psrlw $4, %xmm0 3658; SSE41-NEXT: pand %xmm1, %xmm0 3659; SSE41-NEXT: pshufb %xmm0, %xmm3 3660; SSE41-NEXT: paddb %xmm4, %xmm3 3661; SSE41-NEXT: movdqa %xmm3, %xmm0 3662; SSE41-NEXT: psllw $8, %xmm0 3663; SSE41-NEXT: paddb %xmm3, %xmm0 3664; SSE41-NEXT: psrlw $8, %xmm0 3665; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3666; SSE41-NEXT: retq 3667; 3668; AVX1-LABEL: ugt_8_v8i16: 3669; AVX1: # %bb.0: 3670; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3671; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 3672; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3673; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3674; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 3675; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 3676; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3677; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3678; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3679; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3680; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3681; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3682; AVX1-NEXT: retq 3683; 3684; AVX2-LABEL: ugt_8_v8i16: 3685; AVX2: # %bb.0: 3686; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3687; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 3688; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3689; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3690; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 3691; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 3692; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3693; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3694; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 3695; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3696; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 3697; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3698; AVX2-NEXT: retq 3699; 3700; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i16: 3701; AVX512VPOPCNTDQ: # %bb.0: 3702; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3703; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 3704; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 3705; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3706; AVX512VPOPCNTDQ-NEXT: vzeroupper 3707; AVX512VPOPCNTDQ-NEXT: retq 3708; 3709; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i16: 3710; AVX512VPOPCNTDQVL: # %bb.0: 3711; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3712; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 3713; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 3714; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3715; AVX512VPOPCNTDQVL-NEXT: vzeroupper 3716; AVX512VPOPCNTDQVL-NEXT: retq 3717; 3718; BITALG_NOVLX-LABEL: ugt_8_v8i16: 3719; BITALG_NOVLX: # %bb.0: 3720; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3721; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 3722; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 3723; BITALG_NOVLX-NEXT: vzeroupper 3724; BITALG_NOVLX-NEXT: retq 3725; 3726; BITALG-LABEL: ugt_8_v8i16: 3727; BITALG: # %bb.0: 3728; BITALG-NEXT: vpopcntw %xmm0, %xmm0 3729; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 3730; BITALG-NEXT: vpmovm2w %k0, %xmm0 3731; BITALG-NEXT: retq 3732 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 3733 %3 = icmp ugt <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 3734 %4 = sext <8 x i1> %3 to <8 x i16> 3735 ret <8 x i16> %4 3736} 3737 3738define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { 3739; SSE2-LABEL: ult_9_v8i16: 3740; SSE2: # %bb.0: 3741; SSE2-NEXT: movdqa %xmm0, %xmm1 3742; SSE2-NEXT: psrlw $1, %xmm1 3743; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3744; SSE2-NEXT: psubb %xmm1, %xmm0 3745; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3746; SSE2-NEXT: movdqa %xmm0, %xmm2 3747; SSE2-NEXT: pand %xmm1, %xmm2 3748; SSE2-NEXT: psrlw $2, %xmm0 3749; SSE2-NEXT: pand %xmm1, %xmm0 3750; SSE2-NEXT: paddb %xmm2, %xmm0 3751; SSE2-NEXT: movdqa %xmm0, %xmm1 3752; SSE2-NEXT: psrlw $4, %xmm1 3753; SSE2-NEXT: paddb %xmm0, %xmm1 3754; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3755; SSE2-NEXT: movdqa %xmm1, %xmm2 3756; SSE2-NEXT: psllw $8, %xmm2 3757; SSE2-NEXT: paddb %xmm1, %xmm2 3758; SSE2-NEXT: psrlw $8, %xmm2 3759; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] 3760; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 3761; SSE2-NEXT: retq 3762; 3763; SSE3-LABEL: ult_9_v8i16: 3764; SSE3: # %bb.0: 3765; SSE3-NEXT: movdqa %xmm0, %xmm1 3766; SSE3-NEXT: psrlw $1, %xmm1 3767; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3768; SSE3-NEXT: psubb %xmm1, %xmm0 3769; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3770; SSE3-NEXT: movdqa %xmm0, %xmm2 3771; SSE3-NEXT: pand %xmm1, %xmm2 3772; SSE3-NEXT: psrlw $2, %xmm0 3773; SSE3-NEXT: pand %xmm1, %xmm0 3774; SSE3-NEXT: paddb %xmm2, %xmm0 3775; SSE3-NEXT: movdqa %xmm0, %xmm1 3776; SSE3-NEXT: psrlw $4, %xmm1 3777; SSE3-NEXT: paddb %xmm0, %xmm1 3778; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3779; SSE3-NEXT: movdqa %xmm1, %xmm2 3780; SSE3-NEXT: psllw $8, %xmm2 3781; SSE3-NEXT: paddb %xmm1, %xmm2 3782; SSE3-NEXT: psrlw $8, %xmm2 3783; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] 3784; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 3785; SSE3-NEXT: retq 3786; 3787; SSSE3-LABEL: ult_9_v8i16: 3788; SSSE3: # %bb.0: 3789; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3790; SSSE3-NEXT: movdqa %xmm0, %xmm2 3791; SSSE3-NEXT: pand %xmm1, %xmm2 3792; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3793; SSSE3-NEXT: movdqa %xmm3, %xmm4 3794; SSSE3-NEXT: pshufb %xmm2, %xmm4 3795; SSSE3-NEXT: psrlw $4, %xmm0 3796; SSSE3-NEXT: pand %xmm1, %xmm0 3797; SSSE3-NEXT: pshufb %xmm0, %xmm3 3798; SSSE3-NEXT: paddb %xmm4, %xmm3 3799; SSSE3-NEXT: movdqa %xmm3, %xmm1 3800; SSSE3-NEXT: psllw $8, %xmm1 3801; SSSE3-NEXT: paddb %xmm3, %xmm1 3802; SSSE3-NEXT: psrlw $8, %xmm1 3803; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] 3804; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 3805; SSSE3-NEXT: retq 3806; 3807; SSE41-LABEL: ult_9_v8i16: 3808; SSE41: # %bb.0: 3809; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3810; SSE41-NEXT: movdqa %xmm0, %xmm2 3811; SSE41-NEXT: pand %xmm1, %xmm2 3812; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3813; SSE41-NEXT: movdqa %xmm3, %xmm4 3814; SSE41-NEXT: pshufb %xmm2, %xmm4 3815; SSE41-NEXT: psrlw $4, %xmm0 3816; SSE41-NEXT: pand %xmm1, %xmm0 3817; SSE41-NEXT: pshufb %xmm0, %xmm3 3818; SSE41-NEXT: paddb %xmm4, %xmm3 3819; SSE41-NEXT: movdqa %xmm3, %xmm1 3820; SSE41-NEXT: psllw $8, %xmm1 3821; SSE41-NEXT: paddb %xmm3, %xmm1 3822; SSE41-NEXT: psrlw $8, %xmm1 3823; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] 3824; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 3825; SSE41-NEXT: retq 3826; 3827; AVX1-LABEL: ult_9_v8i16: 3828; AVX1: # %bb.0: 3829; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3830; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 3831; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3832; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3833; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 3834; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 3835; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3836; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3837; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3838; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3839; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3840; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] 3841; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3842; AVX1-NEXT: retq 3843; 3844; AVX2-LABEL: ult_9_v8i16: 3845; AVX2: # %bb.0: 3846; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3847; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 3848; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3849; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3850; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 3851; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 3852; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3853; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3854; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 3855; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3856; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 3857; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] 3858; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3859; AVX2-NEXT: retq 3860; 3861; AVX512VPOPCNTDQ-LABEL: ult_9_v8i16: 3862; AVX512VPOPCNTDQ: # %bb.0: 3863; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3864; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 3865; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 3866; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] 3867; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3868; AVX512VPOPCNTDQ-NEXT: vzeroupper 3869; AVX512VPOPCNTDQ-NEXT: retq 3870; 3871; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i16: 3872; AVX512VPOPCNTDQVL: # %bb.0: 3873; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3874; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 3875; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 3876; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] 3877; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3878; AVX512VPOPCNTDQVL-NEXT: vzeroupper 3879; AVX512VPOPCNTDQVL-NEXT: retq 3880; 3881; BITALG_NOVLX-LABEL: ult_9_v8i16: 3882; BITALG_NOVLX: # %bb.0: 3883; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3884; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 3885; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] 3886; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 3887; BITALG_NOVLX-NEXT: vzeroupper 3888; BITALG_NOVLX-NEXT: retq 3889; 3890; BITALG-LABEL: ult_9_v8i16: 3891; BITALG: # %bb.0: 3892; BITALG-NEXT: vpopcntw %xmm0, %xmm0 3893; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 3894; BITALG-NEXT: vpmovm2w %k0, %xmm0 3895; BITALG-NEXT: retq 3896 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 3897 %3 = icmp ult <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 3898 %4 = sext <8 x i1> %3 to <8 x i16> 3899 ret <8 x i16> %4 3900} 3901 3902define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { 3903; SSE2-LABEL: ugt_9_v8i16: 3904; SSE2: # %bb.0: 3905; SSE2-NEXT: movdqa %xmm0, %xmm1 3906; SSE2-NEXT: psrlw $1, %xmm1 3907; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3908; SSE2-NEXT: psubb %xmm1, %xmm0 3909; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3910; SSE2-NEXT: movdqa %xmm0, %xmm2 3911; SSE2-NEXT: pand %xmm1, %xmm2 3912; SSE2-NEXT: psrlw $2, %xmm0 3913; SSE2-NEXT: pand %xmm1, %xmm0 3914; SSE2-NEXT: paddb %xmm2, %xmm0 3915; SSE2-NEXT: movdqa %xmm0, %xmm1 3916; SSE2-NEXT: psrlw $4, %xmm1 3917; SSE2-NEXT: paddb %xmm0, %xmm1 3918; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 3919; SSE2-NEXT: movdqa %xmm1, %xmm0 3920; SSE2-NEXT: psllw $8, %xmm0 3921; SSE2-NEXT: paddb %xmm1, %xmm0 3922; SSE2-NEXT: psrlw $8, %xmm0 3923; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3924; SSE2-NEXT: retq 3925; 3926; SSE3-LABEL: ugt_9_v8i16: 3927; SSE3: # %bb.0: 3928; SSE3-NEXT: movdqa %xmm0, %xmm1 3929; SSE3-NEXT: psrlw $1, %xmm1 3930; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3931; SSE3-NEXT: psubb %xmm1, %xmm0 3932; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 3933; SSE3-NEXT: movdqa %xmm0, %xmm2 3934; SSE3-NEXT: pand %xmm1, %xmm2 3935; SSE3-NEXT: psrlw $2, %xmm0 3936; SSE3-NEXT: pand %xmm1, %xmm0 3937; SSE3-NEXT: paddb %xmm2, %xmm0 3938; SSE3-NEXT: movdqa %xmm0, %xmm1 3939; SSE3-NEXT: psrlw $4, %xmm1 3940; SSE3-NEXT: paddb %xmm0, %xmm1 3941; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 3942; SSE3-NEXT: movdqa %xmm1, %xmm0 3943; SSE3-NEXT: psllw $8, %xmm0 3944; SSE3-NEXT: paddb %xmm1, %xmm0 3945; SSE3-NEXT: psrlw $8, %xmm0 3946; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3947; SSE3-NEXT: retq 3948; 3949; SSSE3-LABEL: ugt_9_v8i16: 3950; SSSE3: # %bb.0: 3951; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3952; SSSE3-NEXT: movdqa %xmm0, %xmm2 3953; SSSE3-NEXT: pand %xmm1, %xmm2 3954; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3955; SSSE3-NEXT: movdqa %xmm3, %xmm4 3956; SSSE3-NEXT: pshufb %xmm2, %xmm4 3957; SSSE3-NEXT: psrlw $4, %xmm0 3958; SSSE3-NEXT: pand %xmm1, %xmm0 3959; SSSE3-NEXT: pshufb %xmm0, %xmm3 3960; SSSE3-NEXT: paddb %xmm4, %xmm3 3961; SSSE3-NEXT: movdqa %xmm3, %xmm0 3962; SSSE3-NEXT: psllw $8, %xmm0 3963; SSSE3-NEXT: paddb %xmm3, %xmm0 3964; SSSE3-NEXT: psrlw $8, %xmm0 3965; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3966; SSSE3-NEXT: retq 3967; 3968; SSE41-LABEL: ugt_9_v8i16: 3969; SSE41: # %bb.0: 3970; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3971; SSE41-NEXT: movdqa %xmm0, %xmm2 3972; SSE41-NEXT: pand %xmm1, %xmm2 3973; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3974; SSE41-NEXT: movdqa %xmm3, %xmm4 3975; SSE41-NEXT: pshufb %xmm2, %xmm4 3976; SSE41-NEXT: psrlw $4, %xmm0 3977; SSE41-NEXT: pand %xmm1, %xmm0 3978; SSE41-NEXT: pshufb %xmm0, %xmm3 3979; SSE41-NEXT: paddb %xmm4, %xmm3 3980; SSE41-NEXT: movdqa %xmm3, %xmm0 3981; SSE41-NEXT: psllw $8, %xmm0 3982; SSE41-NEXT: paddb %xmm3, %xmm0 3983; SSE41-NEXT: psrlw $8, %xmm0 3984; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 3985; SSE41-NEXT: retq 3986; 3987; AVX1-LABEL: ugt_9_v8i16: 3988; AVX1: # %bb.0: 3989; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 3990; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 3991; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 3992; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 3993; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 3994; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 3995; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 3996; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3997; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3998; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 3999; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4000; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4001; AVX1-NEXT: retq 4002; 4003; AVX2-LABEL: ugt_9_v8i16: 4004; AVX2: # %bb.0: 4005; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4006; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 4007; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4008; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4009; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 4010; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 4011; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4012; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4013; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 4014; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4015; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 4016; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4017; AVX2-NEXT: retq 4018; 4019; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i16: 4020; AVX512VPOPCNTDQ: # %bb.0: 4021; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4022; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 4023; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 4024; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4025; AVX512VPOPCNTDQ-NEXT: vzeroupper 4026; AVX512VPOPCNTDQ-NEXT: retq 4027; 4028; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i16: 4029; AVX512VPOPCNTDQVL: # %bb.0: 4030; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4031; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 4032; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 4033; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4034; AVX512VPOPCNTDQVL-NEXT: vzeroupper 4035; AVX512VPOPCNTDQVL-NEXT: retq 4036; 4037; BITALG_NOVLX-LABEL: ugt_9_v8i16: 4038; BITALG_NOVLX: # %bb.0: 4039; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4040; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 4041; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4042; BITALG_NOVLX-NEXT: vzeroupper 4043; BITALG_NOVLX-NEXT: retq 4044; 4045; BITALG-LABEL: ugt_9_v8i16: 4046; BITALG: # %bb.0: 4047; BITALG-NEXT: vpopcntw %xmm0, %xmm0 4048; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 4049; BITALG-NEXT: vpmovm2w %k0, %xmm0 4050; BITALG-NEXT: retq 4051 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 4052 %3 = icmp ugt <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 4053 %4 = sext <8 x i1> %3 to <8 x i16> 4054 ret <8 x i16> %4 4055} 4056 4057define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { 4058; SSE2-LABEL: ult_10_v8i16: 4059; SSE2: # %bb.0: 4060; SSE2-NEXT: movdqa %xmm0, %xmm1 4061; SSE2-NEXT: psrlw $1, %xmm1 4062; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4063; SSE2-NEXT: psubb %xmm1, %xmm0 4064; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4065; SSE2-NEXT: movdqa %xmm0, %xmm2 4066; SSE2-NEXT: pand %xmm1, %xmm2 4067; SSE2-NEXT: psrlw $2, %xmm0 4068; SSE2-NEXT: pand %xmm1, %xmm0 4069; SSE2-NEXT: paddb %xmm2, %xmm0 4070; SSE2-NEXT: movdqa %xmm0, %xmm1 4071; SSE2-NEXT: psrlw $4, %xmm1 4072; SSE2-NEXT: paddb %xmm0, %xmm1 4073; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4074; SSE2-NEXT: movdqa %xmm1, %xmm2 4075; SSE2-NEXT: psllw $8, %xmm2 4076; SSE2-NEXT: paddb %xmm1, %xmm2 4077; SSE2-NEXT: psrlw $8, %xmm2 4078; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] 4079; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 4080; SSE2-NEXT: retq 4081; 4082; SSE3-LABEL: ult_10_v8i16: 4083; SSE3: # %bb.0: 4084; SSE3-NEXT: movdqa %xmm0, %xmm1 4085; SSE3-NEXT: psrlw $1, %xmm1 4086; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4087; SSE3-NEXT: psubb %xmm1, %xmm0 4088; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4089; SSE3-NEXT: movdqa %xmm0, %xmm2 4090; SSE3-NEXT: pand %xmm1, %xmm2 4091; SSE3-NEXT: psrlw $2, %xmm0 4092; SSE3-NEXT: pand %xmm1, %xmm0 4093; SSE3-NEXT: paddb %xmm2, %xmm0 4094; SSE3-NEXT: movdqa %xmm0, %xmm1 4095; SSE3-NEXT: psrlw $4, %xmm1 4096; SSE3-NEXT: paddb %xmm0, %xmm1 4097; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4098; SSE3-NEXT: movdqa %xmm1, %xmm2 4099; SSE3-NEXT: psllw $8, %xmm2 4100; SSE3-NEXT: paddb %xmm1, %xmm2 4101; SSE3-NEXT: psrlw $8, %xmm2 4102; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] 4103; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 4104; SSE3-NEXT: retq 4105; 4106; SSSE3-LABEL: ult_10_v8i16: 4107; SSSE3: # %bb.0: 4108; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4109; SSSE3-NEXT: movdqa %xmm0, %xmm2 4110; SSSE3-NEXT: pand %xmm1, %xmm2 4111; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4112; SSSE3-NEXT: movdqa %xmm3, %xmm4 4113; SSSE3-NEXT: pshufb %xmm2, %xmm4 4114; SSSE3-NEXT: psrlw $4, %xmm0 4115; SSSE3-NEXT: pand %xmm1, %xmm0 4116; SSSE3-NEXT: pshufb %xmm0, %xmm3 4117; SSSE3-NEXT: paddb %xmm4, %xmm3 4118; SSSE3-NEXT: movdqa %xmm3, %xmm1 4119; SSSE3-NEXT: psllw $8, %xmm1 4120; SSSE3-NEXT: paddb %xmm3, %xmm1 4121; SSSE3-NEXT: psrlw $8, %xmm1 4122; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] 4123; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 4124; SSSE3-NEXT: retq 4125; 4126; SSE41-LABEL: ult_10_v8i16: 4127; SSE41: # %bb.0: 4128; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4129; SSE41-NEXT: movdqa %xmm0, %xmm2 4130; SSE41-NEXT: pand %xmm1, %xmm2 4131; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4132; SSE41-NEXT: movdqa %xmm3, %xmm4 4133; SSE41-NEXT: pshufb %xmm2, %xmm4 4134; SSE41-NEXT: psrlw $4, %xmm0 4135; SSE41-NEXT: pand %xmm1, %xmm0 4136; SSE41-NEXT: pshufb %xmm0, %xmm3 4137; SSE41-NEXT: paddb %xmm4, %xmm3 4138; SSE41-NEXT: movdqa %xmm3, %xmm1 4139; SSE41-NEXT: psllw $8, %xmm1 4140; SSE41-NEXT: paddb %xmm3, %xmm1 4141; SSE41-NEXT: psrlw $8, %xmm1 4142; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] 4143; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 4144; SSE41-NEXT: retq 4145; 4146; AVX1-LABEL: ult_10_v8i16: 4147; AVX1: # %bb.0: 4148; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4149; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 4150; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4151; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4152; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 4153; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 4154; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4155; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4156; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 4157; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4158; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4159; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] 4160; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4161; AVX1-NEXT: retq 4162; 4163; AVX2-LABEL: ult_10_v8i16: 4164; AVX2: # %bb.0: 4165; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4166; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 4167; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4168; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4169; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 4170; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 4171; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4172; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4173; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 4174; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4175; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 4176; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] 4177; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4178; AVX2-NEXT: retq 4179; 4180; AVX512VPOPCNTDQ-LABEL: ult_10_v8i16: 4181; AVX512VPOPCNTDQ: # %bb.0: 4182; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4183; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 4184; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 4185; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] 4186; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4187; AVX512VPOPCNTDQ-NEXT: vzeroupper 4188; AVX512VPOPCNTDQ-NEXT: retq 4189; 4190; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i16: 4191; AVX512VPOPCNTDQVL: # %bb.0: 4192; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4193; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 4194; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 4195; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] 4196; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4197; AVX512VPOPCNTDQVL-NEXT: vzeroupper 4198; AVX512VPOPCNTDQVL-NEXT: retq 4199; 4200; BITALG_NOVLX-LABEL: ult_10_v8i16: 4201; BITALG_NOVLX: # %bb.0: 4202; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4203; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 4204; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] 4205; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4206; BITALG_NOVLX-NEXT: vzeroupper 4207; BITALG_NOVLX-NEXT: retq 4208; 4209; BITALG-LABEL: ult_10_v8i16: 4210; BITALG: # %bb.0: 4211; BITALG-NEXT: vpopcntw %xmm0, %xmm0 4212; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 4213; BITALG-NEXT: vpmovm2w %k0, %xmm0 4214; BITALG-NEXT: retq 4215 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 4216 %3 = icmp ult <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 4217 %4 = sext <8 x i1> %3 to <8 x i16> 4218 ret <8 x i16> %4 4219} 4220 4221define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { 4222; SSE2-LABEL: ugt_10_v8i16: 4223; SSE2: # %bb.0: 4224; SSE2-NEXT: movdqa %xmm0, %xmm1 4225; SSE2-NEXT: psrlw $1, %xmm1 4226; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4227; SSE2-NEXT: psubb %xmm1, %xmm0 4228; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4229; SSE2-NEXT: movdqa %xmm0, %xmm2 4230; SSE2-NEXT: pand %xmm1, %xmm2 4231; SSE2-NEXT: psrlw $2, %xmm0 4232; SSE2-NEXT: pand %xmm1, %xmm0 4233; SSE2-NEXT: paddb %xmm2, %xmm0 4234; SSE2-NEXT: movdqa %xmm0, %xmm1 4235; SSE2-NEXT: psrlw $4, %xmm1 4236; SSE2-NEXT: paddb %xmm0, %xmm1 4237; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4238; SSE2-NEXT: movdqa %xmm1, %xmm0 4239; SSE2-NEXT: psllw $8, %xmm0 4240; SSE2-NEXT: paddb %xmm1, %xmm0 4241; SSE2-NEXT: psrlw $8, %xmm0 4242; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4243; SSE2-NEXT: retq 4244; 4245; SSE3-LABEL: ugt_10_v8i16: 4246; SSE3: # %bb.0: 4247; SSE3-NEXT: movdqa %xmm0, %xmm1 4248; SSE3-NEXT: psrlw $1, %xmm1 4249; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4250; SSE3-NEXT: psubb %xmm1, %xmm0 4251; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4252; SSE3-NEXT: movdqa %xmm0, %xmm2 4253; SSE3-NEXT: pand %xmm1, %xmm2 4254; SSE3-NEXT: psrlw $2, %xmm0 4255; SSE3-NEXT: pand %xmm1, %xmm0 4256; SSE3-NEXT: paddb %xmm2, %xmm0 4257; SSE3-NEXT: movdqa %xmm0, %xmm1 4258; SSE3-NEXT: psrlw $4, %xmm1 4259; SSE3-NEXT: paddb %xmm0, %xmm1 4260; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4261; SSE3-NEXT: movdqa %xmm1, %xmm0 4262; SSE3-NEXT: psllw $8, %xmm0 4263; SSE3-NEXT: paddb %xmm1, %xmm0 4264; SSE3-NEXT: psrlw $8, %xmm0 4265; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4266; SSE3-NEXT: retq 4267; 4268; SSSE3-LABEL: ugt_10_v8i16: 4269; SSSE3: # %bb.0: 4270; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4271; SSSE3-NEXT: movdqa %xmm0, %xmm2 4272; SSSE3-NEXT: pand %xmm1, %xmm2 4273; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4274; SSSE3-NEXT: movdqa %xmm3, %xmm4 4275; SSSE3-NEXT: pshufb %xmm2, %xmm4 4276; SSSE3-NEXT: psrlw $4, %xmm0 4277; SSSE3-NEXT: pand %xmm1, %xmm0 4278; SSSE3-NEXT: pshufb %xmm0, %xmm3 4279; SSSE3-NEXT: paddb %xmm4, %xmm3 4280; SSSE3-NEXT: movdqa %xmm3, %xmm0 4281; SSSE3-NEXT: psllw $8, %xmm0 4282; SSSE3-NEXT: paddb %xmm3, %xmm0 4283; SSSE3-NEXT: psrlw $8, %xmm0 4284; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4285; SSSE3-NEXT: retq 4286; 4287; SSE41-LABEL: ugt_10_v8i16: 4288; SSE41: # %bb.0: 4289; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4290; SSE41-NEXT: movdqa %xmm0, %xmm2 4291; SSE41-NEXT: pand %xmm1, %xmm2 4292; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4293; SSE41-NEXT: movdqa %xmm3, %xmm4 4294; SSE41-NEXT: pshufb %xmm2, %xmm4 4295; SSE41-NEXT: psrlw $4, %xmm0 4296; SSE41-NEXT: pand %xmm1, %xmm0 4297; SSE41-NEXT: pshufb %xmm0, %xmm3 4298; SSE41-NEXT: paddb %xmm4, %xmm3 4299; SSE41-NEXT: movdqa %xmm3, %xmm0 4300; SSE41-NEXT: psllw $8, %xmm0 4301; SSE41-NEXT: paddb %xmm3, %xmm0 4302; SSE41-NEXT: psrlw $8, %xmm0 4303; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4304; SSE41-NEXT: retq 4305; 4306; AVX1-LABEL: ugt_10_v8i16: 4307; AVX1: # %bb.0: 4308; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4309; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 4310; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4311; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4312; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 4313; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 4314; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4315; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4316; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 4317; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4318; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4319; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4320; AVX1-NEXT: retq 4321; 4322; AVX2-LABEL: ugt_10_v8i16: 4323; AVX2: # %bb.0: 4324; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4325; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 4326; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4327; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4328; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 4329; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 4330; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4331; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4332; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 4333; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4334; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 4335; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4336; AVX2-NEXT: retq 4337; 4338; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i16: 4339; AVX512VPOPCNTDQ: # %bb.0: 4340; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4341; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 4342; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 4343; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4344; AVX512VPOPCNTDQ-NEXT: vzeroupper 4345; AVX512VPOPCNTDQ-NEXT: retq 4346; 4347; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i16: 4348; AVX512VPOPCNTDQVL: # %bb.0: 4349; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4350; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 4351; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 4352; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4353; AVX512VPOPCNTDQVL-NEXT: vzeroupper 4354; AVX512VPOPCNTDQVL-NEXT: retq 4355; 4356; BITALG_NOVLX-LABEL: ugt_10_v8i16: 4357; BITALG_NOVLX: # %bb.0: 4358; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4359; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 4360; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4361; BITALG_NOVLX-NEXT: vzeroupper 4362; BITALG_NOVLX-NEXT: retq 4363; 4364; BITALG-LABEL: ugt_10_v8i16: 4365; BITALG: # %bb.0: 4366; BITALG-NEXT: vpopcntw %xmm0, %xmm0 4367; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 4368; BITALG-NEXT: vpmovm2w %k0, %xmm0 4369; BITALG-NEXT: retq 4370 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 4371 %3 = icmp ugt <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 4372 %4 = sext <8 x i1> %3 to <8 x i16> 4373 ret <8 x i16> %4 4374} 4375 4376define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { 4377; SSE2-LABEL: ult_11_v8i16: 4378; SSE2: # %bb.0: 4379; SSE2-NEXT: movdqa %xmm0, %xmm1 4380; SSE2-NEXT: psrlw $1, %xmm1 4381; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4382; SSE2-NEXT: psubb %xmm1, %xmm0 4383; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4384; SSE2-NEXT: movdqa %xmm0, %xmm2 4385; SSE2-NEXT: pand %xmm1, %xmm2 4386; SSE2-NEXT: psrlw $2, %xmm0 4387; SSE2-NEXT: pand %xmm1, %xmm0 4388; SSE2-NEXT: paddb %xmm2, %xmm0 4389; SSE2-NEXT: movdqa %xmm0, %xmm1 4390; SSE2-NEXT: psrlw $4, %xmm1 4391; SSE2-NEXT: paddb %xmm0, %xmm1 4392; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4393; SSE2-NEXT: movdqa %xmm1, %xmm2 4394; SSE2-NEXT: psllw $8, %xmm2 4395; SSE2-NEXT: paddb %xmm1, %xmm2 4396; SSE2-NEXT: psrlw $8, %xmm2 4397; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] 4398; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 4399; SSE2-NEXT: retq 4400; 4401; SSE3-LABEL: ult_11_v8i16: 4402; SSE3: # %bb.0: 4403; SSE3-NEXT: movdqa %xmm0, %xmm1 4404; SSE3-NEXT: psrlw $1, %xmm1 4405; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4406; SSE3-NEXT: psubb %xmm1, %xmm0 4407; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4408; SSE3-NEXT: movdqa %xmm0, %xmm2 4409; SSE3-NEXT: pand %xmm1, %xmm2 4410; SSE3-NEXT: psrlw $2, %xmm0 4411; SSE3-NEXT: pand %xmm1, %xmm0 4412; SSE3-NEXT: paddb %xmm2, %xmm0 4413; SSE3-NEXT: movdqa %xmm0, %xmm1 4414; SSE3-NEXT: psrlw $4, %xmm1 4415; SSE3-NEXT: paddb %xmm0, %xmm1 4416; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4417; SSE3-NEXT: movdqa %xmm1, %xmm2 4418; SSE3-NEXT: psllw $8, %xmm2 4419; SSE3-NEXT: paddb %xmm1, %xmm2 4420; SSE3-NEXT: psrlw $8, %xmm2 4421; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] 4422; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 4423; SSE3-NEXT: retq 4424; 4425; SSSE3-LABEL: ult_11_v8i16: 4426; SSSE3: # %bb.0: 4427; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4428; SSSE3-NEXT: movdqa %xmm0, %xmm2 4429; SSSE3-NEXT: pand %xmm1, %xmm2 4430; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4431; SSSE3-NEXT: movdqa %xmm3, %xmm4 4432; SSSE3-NEXT: pshufb %xmm2, %xmm4 4433; SSSE3-NEXT: psrlw $4, %xmm0 4434; SSSE3-NEXT: pand %xmm1, %xmm0 4435; SSSE3-NEXT: pshufb %xmm0, %xmm3 4436; SSSE3-NEXT: paddb %xmm4, %xmm3 4437; SSSE3-NEXT: movdqa %xmm3, %xmm1 4438; SSSE3-NEXT: psllw $8, %xmm1 4439; SSSE3-NEXT: paddb %xmm3, %xmm1 4440; SSSE3-NEXT: psrlw $8, %xmm1 4441; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] 4442; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 4443; SSSE3-NEXT: retq 4444; 4445; SSE41-LABEL: ult_11_v8i16: 4446; SSE41: # %bb.0: 4447; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4448; SSE41-NEXT: movdqa %xmm0, %xmm2 4449; SSE41-NEXT: pand %xmm1, %xmm2 4450; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4451; SSE41-NEXT: movdqa %xmm3, %xmm4 4452; SSE41-NEXT: pshufb %xmm2, %xmm4 4453; SSE41-NEXT: psrlw $4, %xmm0 4454; SSE41-NEXT: pand %xmm1, %xmm0 4455; SSE41-NEXT: pshufb %xmm0, %xmm3 4456; SSE41-NEXT: paddb %xmm4, %xmm3 4457; SSE41-NEXT: movdqa %xmm3, %xmm1 4458; SSE41-NEXT: psllw $8, %xmm1 4459; SSE41-NEXT: paddb %xmm3, %xmm1 4460; SSE41-NEXT: psrlw $8, %xmm1 4461; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] 4462; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 4463; SSE41-NEXT: retq 4464; 4465; AVX1-LABEL: ult_11_v8i16: 4466; AVX1: # %bb.0: 4467; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4468; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 4469; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4470; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4471; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 4472; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 4473; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4474; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4475; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 4476; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4477; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4478; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] 4479; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4480; AVX1-NEXT: retq 4481; 4482; AVX2-LABEL: ult_11_v8i16: 4483; AVX2: # %bb.0: 4484; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4485; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 4486; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4487; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4488; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 4489; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 4490; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4491; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4492; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 4493; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4494; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 4495; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] 4496; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4497; AVX2-NEXT: retq 4498; 4499; AVX512VPOPCNTDQ-LABEL: ult_11_v8i16: 4500; AVX512VPOPCNTDQ: # %bb.0: 4501; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4502; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 4503; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 4504; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] 4505; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4506; AVX512VPOPCNTDQ-NEXT: vzeroupper 4507; AVX512VPOPCNTDQ-NEXT: retq 4508; 4509; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i16: 4510; AVX512VPOPCNTDQVL: # %bb.0: 4511; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4512; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 4513; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 4514; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] 4515; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4516; AVX512VPOPCNTDQVL-NEXT: vzeroupper 4517; AVX512VPOPCNTDQVL-NEXT: retq 4518; 4519; BITALG_NOVLX-LABEL: ult_11_v8i16: 4520; BITALG_NOVLX: # %bb.0: 4521; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4522; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 4523; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] 4524; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4525; BITALG_NOVLX-NEXT: vzeroupper 4526; BITALG_NOVLX-NEXT: retq 4527; 4528; BITALG-LABEL: ult_11_v8i16: 4529; BITALG: # %bb.0: 4530; BITALG-NEXT: vpopcntw %xmm0, %xmm0 4531; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 4532; BITALG-NEXT: vpmovm2w %k0, %xmm0 4533; BITALG-NEXT: retq 4534 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 4535 %3 = icmp ult <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 4536 %4 = sext <8 x i1> %3 to <8 x i16> 4537 ret <8 x i16> %4 4538} 4539 4540define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { 4541; SSE2-LABEL: ugt_11_v8i16: 4542; SSE2: # %bb.0: 4543; SSE2-NEXT: movdqa %xmm0, %xmm1 4544; SSE2-NEXT: psrlw $1, %xmm1 4545; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4546; SSE2-NEXT: psubb %xmm1, %xmm0 4547; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4548; SSE2-NEXT: movdqa %xmm0, %xmm2 4549; SSE2-NEXT: pand %xmm1, %xmm2 4550; SSE2-NEXT: psrlw $2, %xmm0 4551; SSE2-NEXT: pand %xmm1, %xmm0 4552; SSE2-NEXT: paddb %xmm2, %xmm0 4553; SSE2-NEXT: movdqa %xmm0, %xmm1 4554; SSE2-NEXT: psrlw $4, %xmm1 4555; SSE2-NEXT: paddb %xmm0, %xmm1 4556; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4557; SSE2-NEXT: movdqa %xmm1, %xmm0 4558; SSE2-NEXT: psllw $8, %xmm0 4559; SSE2-NEXT: paddb %xmm1, %xmm0 4560; SSE2-NEXT: psrlw $8, %xmm0 4561; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4562; SSE2-NEXT: retq 4563; 4564; SSE3-LABEL: ugt_11_v8i16: 4565; SSE3: # %bb.0: 4566; SSE3-NEXT: movdqa %xmm0, %xmm1 4567; SSE3-NEXT: psrlw $1, %xmm1 4568; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4569; SSE3-NEXT: psubb %xmm1, %xmm0 4570; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4571; SSE3-NEXT: movdqa %xmm0, %xmm2 4572; SSE3-NEXT: pand %xmm1, %xmm2 4573; SSE3-NEXT: psrlw $2, %xmm0 4574; SSE3-NEXT: pand %xmm1, %xmm0 4575; SSE3-NEXT: paddb %xmm2, %xmm0 4576; SSE3-NEXT: movdqa %xmm0, %xmm1 4577; SSE3-NEXT: psrlw $4, %xmm1 4578; SSE3-NEXT: paddb %xmm0, %xmm1 4579; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4580; SSE3-NEXT: movdqa %xmm1, %xmm0 4581; SSE3-NEXT: psllw $8, %xmm0 4582; SSE3-NEXT: paddb %xmm1, %xmm0 4583; SSE3-NEXT: psrlw $8, %xmm0 4584; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4585; SSE3-NEXT: retq 4586; 4587; SSSE3-LABEL: ugt_11_v8i16: 4588; SSSE3: # %bb.0: 4589; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4590; SSSE3-NEXT: movdqa %xmm0, %xmm2 4591; SSSE3-NEXT: pand %xmm1, %xmm2 4592; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4593; SSSE3-NEXT: movdqa %xmm3, %xmm4 4594; SSSE3-NEXT: pshufb %xmm2, %xmm4 4595; SSSE3-NEXT: psrlw $4, %xmm0 4596; SSSE3-NEXT: pand %xmm1, %xmm0 4597; SSSE3-NEXT: pshufb %xmm0, %xmm3 4598; SSSE3-NEXT: paddb %xmm4, %xmm3 4599; SSSE3-NEXT: movdqa %xmm3, %xmm0 4600; SSSE3-NEXT: psllw $8, %xmm0 4601; SSSE3-NEXT: paddb %xmm3, %xmm0 4602; SSSE3-NEXT: psrlw $8, %xmm0 4603; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4604; SSSE3-NEXT: retq 4605; 4606; SSE41-LABEL: ugt_11_v8i16: 4607; SSE41: # %bb.0: 4608; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4609; SSE41-NEXT: movdqa %xmm0, %xmm2 4610; SSE41-NEXT: pand %xmm1, %xmm2 4611; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4612; SSE41-NEXT: movdqa %xmm3, %xmm4 4613; SSE41-NEXT: pshufb %xmm2, %xmm4 4614; SSE41-NEXT: psrlw $4, %xmm0 4615; SSE41-NEXT: pand %xmm1, %xmm0 4616; SSE41-NEXT: pshufb %xmm0, %xmm3 4617; SSE41-NEXT: paddb %xmm4, %xmm3 4618; SSE41-NEXT: movdqa %xmm3, %xmm0 4619; SSE41-NEXT: psllw $8, %xmm0 4620; SSE41-NEXT: paddb %xmm3, %xmm0 4621; SSE41-NEXT: psrlw $8, %xmm0 4622; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4623; SSE41-NEXT: retq 4624; 4625; AVX1-LABEL: ugt_11_v8i16: 4626; AVX1: # %bb.0: 4627; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4628; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 4629; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4630; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4631; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 4632; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 4633; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4634; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4635; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 4636; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4637; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4638; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4639; AVX1-NEXT: retq 4640; 4641; AVX2-LABEL: ugt_11_v8i16: 4642; AVX2: # %bb.0: 4643; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4644; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 4645; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4646; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4647; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 4648; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 4649; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4650; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4651; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 4652; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4653; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 4654; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4655; AVX2-NEXT: retq 4656; 4657; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i16: 4658; AVX512VPOPCNTDQ: # %bb.0: 4659; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4660; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 4661; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 4662; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4663; AVX512VPOPCNTDQ-NEXT: vzeroupper 4664; AVX512VPOPCNTDQ-NEXT: retq 4665; 4666; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i16: 4667; AVX512VPOPCNTDQVL: # %bb.0: 4668; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4669; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 4670; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 4671; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4672; AVX512VPOPCNTDQVL-NEXT: vzeroupper 4673; AVX512VPOPCNTDQVL-NEXT: retq 4674; 4675; BITALG_NOVLX-LABEL: ugt_11_v8i16: 4676; BITALG_NOVLX: # %bb.0: 4677; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4678; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 4679; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4680; BITALG_NOVLX-NEXT: vzeroupper 4681; BITALG_NOVLX-NEXT: retq 4682; 4683; BITALG-LABEL: ugt_11_v8i16: 4684; BITALG: # %bb.0: 4685; BITALG-NEXT: vpopcntw %xmm0, %xmm0 4686; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 4687; BITALG-NEXT: vpmovm2w %k0, %xmm0 4688; BITALG-NEXT: retq 4689 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 4690 %3 = icmp ugt <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 4691 %4 = sext <8 x i1> %3 to <8 x i16> 4692 ret <8 x i16> %4 4693} 4694 4695define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { 4696; SSE2-LABEL: ult_12_v8i16: 4697; SSE2: # %bb.0: 4698; SSE2-NEXT: movdqa %xmm0, %xmm1 4699; SSE2-NEXT: psrlw $1, %xmm1 4700; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4701; SSE2-NEXT: psubb %xmm1, %xmm0 4702; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4703; SSE2-NEXT: movdqa %xmm0, %xmm2 4704; SSE2-NEXT: pand %xmm1, %xmm2 4705; SSE2-NEXT: psrlw $2, %xmm0 4706; SSE2-NEXT: pand %xmm1, %xmm0 4707; SSE2-NEXT: paddb %xmm2, %xmm0 4708; SSE2-NEXT: movdqa %xmm0, %xmm1 4709; SSE2-NEXT: psrlw $4, %xmm1 4710; SSE2-NEXT: paddb %xmm0, %xmm1 4711; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4712; SSE2-NEXT: movdqa %xmm1, %xmm2 4713; SSE2-NEXT: psllw $8, %xmm2 4714; SSE2-NEXT: paddb %xmm1, %xmm2 4715; SSE2-NEXT: psrlw $8, %xmm2 4716; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] 4717; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 4718; SSE2-NEXT: retq 4719; 4720; SSE3-LABEL: ult_12_v8i16: 4721; SSE3: # %bb.0: 4722; SSE3-NEXT: movdqa %xmm0, %xmm1 4723; SSE3-NEXT: psrlw $1, %xmm1 4724; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4725; SSE3-NEXT: psubb %xmm1, %xmm0 4726; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4727; SSE3-NEXT: movdqa %xmm0, %xmm2 4728; SSE3-NEXT: pand %xmm1, %xmm2 4729; SSE3-NEXT: psrlw $2, %xmm0 4730; SSE3-NEXT: pand %xmm1, %xmm0 4731; SSE3-NEXT: paddb %xmm2, %xmm0 4732; SSE3-NEXT: movdqa %xmm0, %xmm1 4733; SSE3-NEXT: psrlw $4, %xmm1 4734; SSE3-NEXT: paddb %xmm0, %xmm1 4735; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4736; SSE3-NEXT: movdqa %xmm1, %xmm2 4737; SSE3-NEXT: psllw $8, %xmm2 4738; SSE3-NEXT: paddb %xmm1, %xmm2 4739; SSE3-NEXT: psrlw $8, %xmm2 4740; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] 4741; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 4742; SSE3-NEXT: retq 4743; 4744; SSSE3-LABEL: ult_12_v8i16: 4745; SSSE3: # %bb.0: 4746; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4747; SSSE3-NEXT: movdqa %xmm0, %xmm2 4748; SSSE3-NEXT: pand %xmm1, %xmm2 4749; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4750; SSSE3-NEXT: movdqa %xmm3, %xmm4 4751; SSSE3-NEXT: pshufb %xmm2, %xmm4 4752; SSSE3-NEXT: psrlw $4, %xmm0 4753; SSSE3-NEXT: pand %xmm1, %xmm0 4754; SSSE3-NEXT: pshufb %xmm0, %xmm3 4755; SSSE3-NEXT: paddb %xmm4, %xmm3 4756; SSSE3-NEXT: movdqa %xmm3, %xmm1 4757; SSSE3-NEXT: psllw $8, %xmm1 4758; SSSE3-NEXT: paddb %xmm3, %xmm1 4759; SSSE3-NEXT: psrlw $8, %xmm1 4760; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] 4761; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 4762; SSSE3-NEXT: retq 4763; 4764; SSE41-LABEL: ult_12_v8i16: 4765; SSE41: # %bb.0: 4766; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4767; SSE41-NEXT: movdqa %xmm0, %xmm2 4768; SSE41-NEXT: pand %xmm1, %xmm2 4769; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4770; SSE41-NEXT: movdqa %xmm3, %xmm4 4771; SSE41-NEXT: pshufb %xmm2, %xmm4 4772; SSE41-NEXT: psrlw $4, %xmm0 4773; SSE41-NEXT: pand %xmm1, %xmm0 4774; SSE41-NEXT: pshufb %xmm0, %xmm3 4775; SSE41-NEXT: paddb %xmm4, %xmm3 4776; SSE41-NEXT: movdqa %xmm3, %xmm1 4777; SSE41-NEXT: psllw $8, %xmm1 4778; SSE41-NEXT: paddb %xmm3, %xmm1 4779; SSE41-NEXT: psrlw $8, %xmm1 4780; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] 4781; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 4782; SSE41-NEXT: retq 4783; 4784; AVX1-LABEL: ult_12_v8i16: 4785; AVX1: # %bb.0: 4786; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4787; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 4788; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4789; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4790; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 4791; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 4792; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4793; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4794; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 4795; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4796; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4797; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] 4798; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4799; AVX1-NEXT: retq 4800; 4801; AVX2-LABEL: ult_12_v8i16: 4802; AVX2: # %bb.0: 4803; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4804; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 4805; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4806; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4807; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 4808; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 4809; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4810; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4811; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 4812; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4813; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 4814; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] 4815; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4816; AVX2-NEXT: retq 4817; 4818; AVX512VPOPCNTDQ-LABEL: ult_12_v8i16: 4819; AVX512VPOPCNTDQ: # %bb.0: 4820; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4821; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 4822; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 4823; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] 4824; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4825; AVX512VPOPCNTDQ-NEXT: vzeroupper 4826; AVX512VPOPCNTDQ-NEXT: retq 4827; 4828; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i16: 4829; AVX512VPOPCNTDQVL: # %bb.0: 4830; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4831; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 4832; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 4833; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] 4834; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4835; AVX512VPOPCNTDQVL-NEXT: vzeroupper 4836; AVX512VPOPCNTDQVL-NEXT: retq 4837; 4838; BITALG_NOVLX-LABEL: ult_12_v8i16: 4839; BITALG_NOVLX: # %bb.0: 4840; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4841; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 4842; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] 4843; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 4844; BITALG_NOVLX-NEXT: vzeroupper 4845; BITALG_NOVLX-NEXT: retq 4846; 4847; BITALG-LABEL: ult_12_v8i16: 4848; BITALG: # %bb.0: 4849; BITALG-NEXT: vpopcntw %xmm0, %xmm0 4850; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 4851; BITALG-NEXT: vpmovm2w %k0, %xmm0 4852; BITALG-NEXT: retq 4853 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 4854 %3 = icmp ult <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12> 4855 %4 = sext <8 x i1> %3 to <8 x i16> 4856 ret <8 x i16> %4 4857} 4858 4859define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { 4860; SSE2-LABEL: ugt_12_v8i16: 4861; SSE2: # %bb.0: 4862; SSE2-NEXT: movdqa %xmm0, %xmm1 4863; SSE2-NEXT: psrlw $1, %xmm1 4864; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4865; SSE2-NEXT: psubb %xmm1, %xmm0 4866; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4867; SSE2-NEXT: movdqa %xmm0, %xmm2 4868; SSE2-NEXT: pand %xmm1, %xmm2 4869; SSE2-NEXT: psrlw $2, %xmm0 4870; SSE2-NEXT: pand %xmm1, %xmm0 4871; SSE2-NEXT: paddb %xmm2, %xmm0 4872; SSE2-NEXT: movdqa %xmm0, %xmm1 4873; SSE2-NEXT: psrlw $4, %xmm1 4874; SSE2-NEXT: paddb %xmm0, %xmm1 4875; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4876; SSE2-NEXT: movdqa %xmm1, %xmm0 4877; SSE2-NEXT: psllw $8, %xmm0 4878; SSE2-NEXT: paddb %xmm1, %xmm0 4879; SSE2-NEXT: psrlw $8, %xmm0 4880; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4881; SSE2-NEXT: retq 4882; 4883; SSE3-LABEL: ugt_12_v8i16: 4884; SSE3: # %bb.0: 4885; SSE3-NEXT: movdqa %xmm0, %xmm1 4886; SSE3-NEXT: psrlw $1, %xmm1 4887; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4888; SSE3-NEXT: psubb %xmm1, %xmm0 4889; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 4890; SSE3-NEXT: movdqa %xmm0, %xmm2 4891; SSE3-NEXT: pand %xmm1, %xmm2 4892; SSE3-NEXT: psrlw $2, %xmm0 4893; SSE3-NEXT: pand %xmm1, %xmm0 4894; SSE3-NEXT: paddb %xmm2, %xmm0 4895; SSE3-NEXT: movdqa %xmm0, %xmm1 4896; SSE3-NEXT: psrlw $4, %xmm1 4897; SSE3-NEXT: paddb %xmm0, %xmm1 4898; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 4899; SSE3-NEXT: movdqa %xmm1, %xmm0 4900; SSE3-NEXT: psllw $8, %xmm0 4901; SSE3-NEXT: paddb %xmm1, %xmm0 4902; SSE3-NEXT: psrlw $8, %xmm0 4903; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4904; SSE3-NEXT: retq 4905; 4906; SSSE3-LABEL: ugt_12_v8i16: 4907; SSSE3: # %bb.0: 4908; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4909; SSSE3-NEXT: movdqa %xmm0, %xmm2 4910; SSSE3-NEXT: pand %xmm1, %xmm2 4911; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4912; SSSE3-NEXT: movdqa %xmm3, %xmm4 4913; SSSE3-NEXT: pshufb %xmm2, %xmm4 4914; SSSE3-NEXT: psrlw $4, %xmm0 4915; SSSE3-NEXT: pand %xmm1, %xmm0 4916; SSSE3-NEXT: pshufb %xmm0, %xmm3 4917; SSSE3-NEXT: paddb %xmm4, %xmm3 4918; SSSE3-NEXT: movdqa %xmm3, %xmm0 4919; SSSE3-NEXT: psllw $8, %xmm0 4920; SSSE3-NEXT: paddb %xmm3, %xmm0 4921; SSSE3-NEXT: psrlw $8, %xmm0 4922; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4923; SSSE3-NEXT: retq 4924; 4925; SSE41-LABEL: ugt_12_v8i16: 4926; SSE41: # %bb.0: 4927; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4928; SSE41-NEXT: movdqa %xmm0, %xmm2 4929; SSE41-NEXT: pand %xmm1, %xmm2 4930; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4931; SSE41-NEXT: movdqa %xmm3, %xmm4 4932; SSE41-NEXT: pshufb %xmm2, %xmm4 4933; SSE41-NEXT: psrlw $4, %xmm0 4934; SSE41-NEXT: pand %xmm1, %xmm0 4935; SSE41-NEXT: pshufb %xmm0, %xmm3 4936; SSE41-NEXT: paddb %xmm4, %xmm3 4937; SSE41-NEXT: movdqa %xmm3, %xmm0 4938; SSE41-NEXT: psllw $8, %xmm0 4939; SSE41-NEXT: paddb %xmm3, %xmm0 4940; SSE41-NEXT: psrlw $8, %xmm0 4941; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 4942; SSE41-NEXT: retq 4943; 4944; AVX1-LABEL: ugt_12_v8i16: 4945; AVX1: # %bb.0: 4946; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4947; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 4948; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4949; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4950; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 4951; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 4952; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4953; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4954; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 4955; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4956; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4957; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4958; AVX1-NEXT: retq 4959; 4960; AVX2-LABEL: ugt_12_v8i16: 4961; AVX2: # %bb.0: 4962; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4963; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 4964; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 4965; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 4966; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 4967; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 4968; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 4969; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4970; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 4971; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4972; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 4973; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4974; AVX2-NEXT: retq 4975; 4976; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i16: 4977; AVX512VPOPCNTDQ: # %bb.0: 4978; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4979; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 4980; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 4981; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4982; AVX512VPOPCNTDQ-NEXT: vzeroupper 4983; AVX512VPOPCNTDQ-NEXT: retq 4984; 4985; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i16: 4986; AVX512VPOPCNTDQVL: # %bb.0: 4987; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4988; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 4989; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 4990; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4991; AVX512VPOPCNTDQVL-NEXT: vzeroupper 4992; AVX512VPOPCNTDQVL-NEXT: retq 4993; 4994; BITALG_NOVLX-LABEL: ugt_12_v8i16: 4995; BITALG_NOVLX: # %bb.0: 4996; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4997; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 4998; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 4999; BITALG_NOVLX-NEXT: vzeroupper 5000; BITALG_NOVLX-NEXT: retq 5001; 5002; BITALG-LABEL: ugt_12_v8i16: 5003; BITALG: # %bb.0: 5004; BITALG-NEXT: vpopcntw %xmm0, %xmm0 5005; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 5006; BITALG-NEXT: vpmovm2w %k0, %xmm0 5007; BITALG-NEXT: retq 5008 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 5009 %3 = icmp ugt <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12> 5010 %4 = sext <8 x i1> %3 to <8 x i16> 5011 ret <8 x i16> %4 5012} 5013 5014define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { 5015; SSE2-LABEL: ult_13_v8i16: 5016; SSE2: # %bb.0: 5017; SSE2-NEXT: movdqa %xmm0, %xmm1 5018; SSE2-NEXT: psrlw $1, %xmm1 5019; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5020; SSE2-NEXT: psubb %xmm1, %xmm0 5021; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5022; SSE2-NEXT: movdqa %xmm0, %xmm2 5023; SSE2-NEXT: pand %xmm1, %xmm2 5024; SSE2-NEXT: psrlw $2, %xmm0 5025; SSE2-NEXT: pand %xmm1, %xmm0 5026; SSE2-NEXT: paddb %xmm2, %xmm0 5027; SSE2-NEXT: movdqa %xmm0, %xmm1 5028; SSE2-NEXT: psrlw $4, %xmm1 5029; SSE2-NEXT: paddb %xmm0, %xmm1 5030; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5031; SSE2-NEXT: movdqa %xmm1, %xmm2 5032; SSE2-NEXT: psllw $8, %xmm2 5033; SSE2-NEXT: paddb %xmm1, %xmm2 5034; SSE2-NEXT: psrlw $8, %xmm2 5035; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] 5036; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 5037; SSE2-NEXT: retq 5038; 5039; SSE3-LABEL: ult_13_v8i16: 5040; SSE3: # %bb.0: 5041; SSE3-NEXT: movdqa %xmm0, %xmm1 5042; SSE3-NEXT: psrlw $1, %xmm1 5043; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5044; SSE3-NEXT: psubb %xmm1, %xmm0 5045; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5046; SSE3-NEXT: movdqa %xmm0, %xmm2 5047; SSE3-NEXT: pand %xmm1, %xmm2 5048; SSE3-NEXT: psrlw $2, %xmm0 5049; SSE3-NEXT: pand %xmm1, %xmm0 5050; SSE3-NEXT: paddb %xmm2, %xmm0 5051; SSE3-NEXT: movdqa %xmm0, %xmm1 5052; SSE3-NEXT: psrlw $4, %xmm1 5053; SSE3-NEXT: paddb %xmm0, %xmm1 5054; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5055; SSE3-NEXT: movdqa %xmm1, %xmm2 5056; SSE3-NEXT: psllw $8, %xmm2 5057; SSE3-NEXT: paddb %xmm1, %xmm2 5058; SSE3-NEXT: psrlw $8, %xmm2 5059; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] 5060; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 5061; SSE3-NEXT: retq 5062; 5063; SSSE3-LABEL: ult_13_v8i16: 5064; SSSE3: # %bb.0: 5065; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5066; SSSE3-NEXT: movdqa %xmm0, %xmm2 5067; SSSE3-NEXT: pand %xmm1, %xmm2 5068; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5069; SSSE3-NEXT: movdqa %xmm3, %xmm4 5070; SSSE3-NEXT: pshufb %xmm2, %xmm4 5071; SSSE3-NEXT: psrlw $4, %xmm0 5072; SSSE3-NEXT: pand %xmm1, %xmm0 5073; SSSE3-NEXT: pshufb %xmm0, %xmm3 5074; SSSE3-NEXT: paddb %xmm4, %xmm3 5075; SSSE3-NEXT: movdqa %xmm3, %xmm1 5076; SSSE3-NEXT: psllw $8, %xmm1 5077; SSSE3-NEXT: paddb %xmm3, %xmm1 5078; SSSE3-NEXT: psrlw $8, %xmm1 5079; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] 5080; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 5081; SSSE3-NEXT: retq 5082; 5083; SSE41-LABEL: ult_13_v8i16: 5084; SSE41: # %bb.0: 5085; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5086; SSE41-NEXT: movdqa %xmm0, %xmm2 5087; SSE41-NEXT: pand %xmm1, %xmm2 5088; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5089; SSE41-NEXT: movdqa %xmm3, %xmm4 5090; SSE41-NEXT: pshufb %xmm2, %xmm4 5091; SSE41-NEXT: psrlw $4, %xmm0 5092; SSE41-NEXT: pand %xmm1, %xmm0 5093; SSE41-NEXT: pshufb %xmm0, %xmm3 5094; SSE41-NEXT: paddb %xmm4, %xmm3 5095; SSE41-NEXT: movdqa %xmm3, %xmm1 5096; SSE41-NEXT: psllw $8, %xmm1 5097; SSE41-NEXT: paddb %xmm3, %xmm1 5098; SSE41-NEXT: psrlw $8, %xmm1 5099; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] 5100; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 5101; SSE41-NEXT: retq 5102; 5103; AVX1-LABEL: ult_13_v8i16: 5104; AVX1: # %bb.0: 5105; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5106; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 5107; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5108; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5109; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 5110; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 5111; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5112; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5113; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 5114; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5115; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 5116; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] 5117; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5118; AVX1-NEXT: retq 5119; 5120; AVX2-LABEL: ult_13_v8i16: 5121; AVX2: # %bb.0: 5122; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5123; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 5124; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5125; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5126; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 5127; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 5128; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5129; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5130; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 5131; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5132; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 5133; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] 5134; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5135; AVX2-NEXT: retq 5136; 5137; AVX512VPOPCNTDQ-LABEL: ult_13_v8i16: 5138; AVX512VPOPCNTDQ: # %bb.0: 5139; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5140; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 5141; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 5142; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] 5143; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5144; AVX512VPOPCNTDQ-NEXT: vzeroupper 5145; AVX512VPOPCNTDQ-NEXT: retq 5146; 5147; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i16: 5148; AVX512VPOPCNTDQVL: # %bb.0: 5149; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5150; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 5151; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 5152; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] 5153; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5154; AVX512VPOPCNTDQVL-NEXT: vzeroupper 5155; AVX512VPOPCNTDQVL-NEXT: retq 5156; 5157; BITALG_NOVLX-LABEL: ult_13_v8i16: 5158; BITALG_NOVLX: # %bb.0: 5159; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5160; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 5161; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] 5162; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5163; BITALG_NOVLX-NEXT: vzeroupper 5164; BITALG_NOVLX-NEXT: retq 5165; 5166; BITALG-LABEL: ult_13_v8i16: 5167; BITALG: # %bb.0: 5168; BITALG-NEXT: vpopcntw %xmm0, %xmm0 5169; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 5170; BITALG-NEXT: vpmovm2w %k0, %xmm0 5171; BITALG-NEXT: retq 5172 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 5173 %3 = icmp ult <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13> 5174 %4 = sext <8 x i1> %3 to <8 x i16> 5175 ret <8 x i16> %4 5176} 5177 5178define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { 5179; SSE2-LABEL: ugt_13_v8i16: 5180; SSE2: # %bb.0: 5181; SSE2-NEXT: movdqa %xmm0, %xmm1 5182; SSE2-NEXT: psrlw $1, %xmm1 5183; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5184; SSE2-NEXT: psubb %xmm1, %xmm0 5185; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5186; SSE2-NEXT: movdqa %xmm0, %xmm2 5187; SSE2-NEXT: pand %xmm1, %xmm2 5188; SSE2-NEXT: psrlw $2, %xmm0 5189; SSE2-NEXT: pand %xmm1, %xmm0 5190; SSE2-NEXT: paddb %xmm2, %xmm0 5191; SSE2-NEXT: movdqa %xmm0, %xmm1 5192; SSE2-NEXT: psrlw $4, %xmm1 5193; SSE2-NEXT: paddb %xmm0, %xmm1 5194; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5195; SSE2-NEXT: movdqa %xmm1, %xmm0 5196; SSE2-NEXT: psllw $8, %xmm0 5197; SSE2-NEXT: paddb %xmm1, %xmm0 5198; SSE2-NEXT: psrlw $8, %xmm0 5199; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5200; SSE2-NEXT: retq 5201; 5202; SSE3-LABEL: ugt_13_v8i16: 5203; SSE3: # %bb.0: 5204; SSE3-NEXT: movdqa %xmm0, %xmm1 5205; SSE3-NEXT: psrlw $1, %xmm1 5206; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5207; SSE3-NEXT: psubb %xmm1, %xmm0 5208; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5209; SSE3-NEXT: movdqa %xmm0, %xmm2 5210; SSE3-NEXT: pand %xmm1, %xmm2 5211; SSE3-NEXT: psrlw $2, %xmm0 5212; SSE3-NEXT: pand %xmm1, %xmm0 5213; SSE3-NEXT: paddb %xmm2, %xmm0 5214; SSE3-NEXT: movdqa %xmm0, %xmm1 5215; SSE3-NEXT: psrlw $4, %xmm1 5216; SSE3-NEXT: paddb %xmm0, %xmm1 5217; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5218; SSE3-NEXT: movdqa %xmm1, %xmm0 5219; SSE3-NEXT: psllw $8, %xmm0 5220; SSE3-NEXT: paddb %xmm1, %xmm0 5221; SSE3-NEXT: psrlw $8, %xmm0 5222; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5223; SSE3-NEXT: retq 5224; 5225; SSSE3-LABEL: ugt_13_v8i16: 5226; SSSE3: # %bb.0: 5227; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5228; SSSE3-NEXT: movdqa %xmm0, %xmm2 5229; SSSE3-NEXT: pand %xmm1, %xmm2 5230; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5231; SSSE3-NEXT: movdqa %xmm3, %xmm4 5232; SSSE3-NEXT: pshufb %xmm2, %xmm4 5233; SSSE3-NEXT: psrlw $4, %xmm0 5234; SSSE3-NEXT: pand %xmm1, %xmm0 5235; SSSE3-NEXT: pshufb %xmm0, %xmm3 5236; SSSE3-NEXT: paddb %xmm4, %xmm3 5237; SSSE3-NEXT: movdqa %xmm3, %xmm0 5238; SSSE3-NEXT: psllw $8, %xmm0 5239; SSSE3-NEXT: paddb %xmm3, %xmm0 5240; SSSE3-NEXT: psrlw $8, %xmm0 5241; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5242; SSSE3-NEXT: retq 5243; 5244; SSE41-LABEL: ugt_13_v8i16: 5245; SSE41: # %bb.0: 5246; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5247; SSE41-NEXT: movdqa %xmm0, %xmm2 5248; SSE41-NEXT: pand %xmm1, %xmm2 5249; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5250; SSE41-NEXT: movdqa %xmm3, %xmm4 5251; SSE41-NEXT: pshufb %xmm2, %xmm4 5252; SSE41-NEXT: psrlw $4, %xmm0 5253; SSE41-NEXT: pand %xmm1, %xmm0 5254; SSE41-NEXT: pshufb %xmm0, %xmm3 5255; SSE41-NEXT: paddb %xmm4, %xmm3 5256; SSE41-NEXT: movdqa %xmm3, %xmm0 5257; SSE41-NEXT: psllw $8, %xmm0 5258; SSE41-NEXT: paddb %xmm3, %xmm0 5259; SSE41-NEXT: psrlw $8, %xmm0 5260; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5261; SSE41-NEXT: retq 5262; 5263; AVX1-LABEL: ugt_13_v8i16: 5264; AVX1: # %bb.0: 5265; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5266; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 5267; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5268; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5269; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 5270; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 5271; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5272; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5273; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 5274; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5275; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 5276; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5277; AVX1-NEXT: retq 5278; 5279; AVX2-LABEL: ugt_13_v8i16: 5280; AVX2: # %bb.0: 5281; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5282; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 5283; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5284; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5285; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 5286; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 5287; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5288; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5289; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 5290; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5291; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 5292; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5293; AVX2-NEXT: retq 5294; 5295; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i16: 5296; AVX512VPOPCNTDQ: # %bb.0: 5297; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5298; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 5299; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 5300; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5301; AVX512VPOPCNTDQ-NEXT: vzeroupper 5302; AVX512VPOPCNTDQ-NEXT: retq 5303; 5304; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i16: 5305; AVX512VPOPCNTDQVL: # %bb.0: 5306; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5307; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 5308; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 5309; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5310; AVX512VPOPCNTDQVL-NEXT: vzeroupper 5311; AVX512VPOPCNTDQVL-NEXT: retq 5312; 5313; BITALG_NOVLX-LABEL: ugt_13_v8i16: 5314; BITALG_NOVLX: # %bb.0: 5315; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5316; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 5317; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5318; BITALG_NOVLX-NEXT: vzeroupper 5319; BITALG_NOVLX-NEXT: retq 5320; 5321; BITALG-LABEL: ugt_13_v8i16: 5322; BITALG: # %bb.0: 5323; BITALG-NEXT: vpopcntw %xmm0, %xmm0 5324; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 5325; BITALG-NEXT: vpmovm2w %k0, %xmm0 5326; BITALG-NEXT: retq 5327 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 5328 %3 = icmp ugt <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13> 5329 %4 = sext <8 x i1> %3 to <8 x i16> 5330 ret <8 x i16> %4 5331} 5332 5333define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { 5334; SSE2-LABEL: ult_14_v8i16: 5335; SSE2: # %bb.0: 5336; SSE2-NEXT: movdqa %xmm0, %xmm1 5337; SSE2-NEXT: psrlw $1, %xmm1 5338; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5339; SSE2-NEXT: psubb %xmm1, %xmm0 5340; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5341; SSE2-NEXT: movdqa %xmm0, %xmm2 5342; SSE2-NEXT: pand %xmm1, %xmm2 5343; SSE2-NEXT: psrlw $2, %xmm0 5344; SSE2-NEXT: pand %xmm1, %xmm0 5345; SSE2-NEXT: paddb %xmm2, %xmm0 5346; SSE2-NEXT: movdqa %xmm0, %xmm1 5347; SSE2-NEXT: psrlw $4, %xmm1 5348; SSE2-NEXT: paddb %xmm0, %xmm1 5349; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5350; SSE2-NEXT: movdqa %xmm1, %xmm2 5351; SSE2-NEXT: psllw $8, %xmm2 5352; SSE2-NEXT: paddb %xmm1, %xmm2 5353; SSE2-NEXT: psrlw $8, %xmm2 5354; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] 5355; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 5356; SSE2-NEXT: retq 5357; 5358; SSE3-LABEL: ult_14_v8i16: 5359; SSE3: # %bb.0: 5360; SSE3-NEXT: movdqa %xmm0, %xmm1 5361; SSE3-NEXT: psrlw $1, %xmm1 5362; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5363; SSE3-NEXT: psubb %xmm1, %xmm0 5364; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5365; SSE3-NEXT: movdqa %xmm0, %xmm2 5366; SSE3-NEXT: pand %xmm1, %xmm2 5367; SSE3-NEXT: psrlw $2, %xmm0 5368; SSE3-NEXT: pand %xmm1, %xmm0 5369; SSE3-NEXT: paddb %xmm2, %xmm0 5370; SSE3-NEXT: movdqa %xmm0, %xmm1 5371; SSE3-NEXT: psrlw $4, %xmm1 5372; SSE3-NEXT: paddb %xmm0, %xmm1 5373; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5374; SSE3-NEXT: movdqa %xmm1, %xmm2 5375; SSE3-NEXT: psllw $8, %xmm2 5376; SSE3-NEXT: paddb %xmm1, %xmm2 5377; SSE3-NEXT: psrlw $8, %xmm2 5378; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] 5379; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 5380; SSE3-NEXT: retq 5381; 5382; SSSE3-LABEL: ult_14_v8i16: 5383; SSSE3: # %bb.0: 5384; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5385; SSSE3-NEXT: movdqa %xmm0, %xmm2 5386; SSSE3-NEXT: pand %xmm1, %xmm2 5387; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5388; SSSE3-NEXT: movdqa %xmm3, %xmm4 5389; SSSE3-NEXT: pshufb %xmm2, %xmm4 5390; SSSE3-NEXT: psrlw $4, %xmm0 5391; SSSE3-NEXT: pand %xmm1, %xmm0 5392; SSSE3-NEXT: pshufb %xmm0, %xmm3 5393; SSSE3-NEXT: paddb %xmm4, %xmm3 5394; SSSE3-NEXT: movdqa %xmm3, %xmm1 5395; SSSE3-NEXT: psllw $8, %xmm1 5396; SSSE3-NEXT: paddb %xmm3, %xmm1 5397; SSSE3-NEXT: psrlw $8, %xmm1 5398; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] 5399; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 5400; SSSE3-NEXT: retq 5401; 5402; SSE41-LABEL: ult_14_v8i16: 5403; SSE41: # %bb.0: 5404; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5405; SSE41-NEXT: movdqa %xmm0, %xmm2 5406; SSE41-NEXT: pand %xmm1, %xmm2 5407; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5408; SSE41-NEXT: movdqa %xmm3, %xmm4 5409; SSE41-NEXT: pshufb %xmm2, %xmm4 5410; SSE41-NEXT: psrlw $4, %xmm0 5411; SSE41-NEXT: pand %xmm1, %xmm0 5412; SSE41-NEXT: pshufb %xmm0, %xmm3 5413; SSE41-NEXT: paddb %xmm4, %xmm3 5414; SSE41-NEXT: movdqa %xmm3, %xmm1 5415; SSE41-NEXT: psllw $8, %xmm1 5416; SSE41-NEXT: paddb %xmm3, %xmm1 5417; SSE41-NEXT: psrlw $8, %xmm1 5418; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] 5419; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 5420; SSE41-NEXT: retq 5421; 5422; AVX1-LABEL: ult_14_v8i16: 5423; AVX1: # %bb.0: 5424; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5425; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 5426; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5427; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5428; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 5429; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 5430; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5431; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5432; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 5433; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5434; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 5435; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] 5436; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5437; AVX1-NEXT: retq 5438; 5439; AVX2-LABEL: ult_14_v8i16: 5440; AVX2: # %bb.0: 5441; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5442; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 5443; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5444; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5445; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 5446; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 5447; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5448; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5449; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 5450; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5451; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 5452; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] 5453; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5454; AVX2-NEXT: retq 5455; 5456; AVX512VPOPCNTDQ-LABEL: ult_14_v8i16: 5457; AVX512VPOPCNTDQ: # %bb.0: 5458; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5459; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 5460; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 5461; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] 5462; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5463; AVX512VPOPCNTDQ-NEXT: vzeroupper 5464; AVX512VPOPCNTDQ-NEXT: retq 5465; 5466; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i16: 5467; AVX512VPOPCNTDQVL: # %bb.0: 5468; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5469; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 5470; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 5471; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] 5472; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5473; AVX512VPOPCNTDQVL-NEXT: vzeroupper 5474; AVX512VPOPCNTDQVL-NEXT: retq 5475; 5476; BITALG_NOVLX-LABEL: ult_14_v8i16: 5477; BITALG_NOVLX: # %bb.0: 5478; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5479; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 5480; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] 5481; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5482; BITALG_NOVLX-NEXT: vzeroupper 5483; BITALG_NOVLX-NEXT: retq 5484; 5485; BITALG-LABEL: ult_14_v8i16: 5486; BITALG: # %bb.0: 5487; BITALG-NEXT: vpopcntw %xmm0, %xmm0 5488; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 5489; BITALG-NEXT: vpmovm2w %k0, %xmm0 5490; BITALG-NEXT: retq 5491 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 5492 %3 = icmp ult <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14> 5493 %4 = sext <8 x i1> %3 to <8 x i16> 5494 ret <8 x i16> %4 5495} 5496 5497define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { 5498; SSE2-LABEL: ugt_14_v8i16: 5499; SSE2: # %bb.0: 5500; SSE2-NEXT: movdqa %xmm0, %xmm1 5501; SSE2-NEXT: psrlw $1, %xmm1 5502; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5503; SSE2-NEXT: psubb %xmm1, %xmm0 5504; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5505; SSE2-NEXT: movdqa %xmm0, %xmm2 5506; SSE2-NEXT: pand %xmm1, %xmm2 5507; SSE2-NEXT: psrlw $2, %xmm0 5508; SSE2-NEXT: pand %xmm1, %xmm0 5509; SSE2-NEXT: paddb %xmm2, %xmm0 5510; SSE2-NEXT: movdqa %xmm0, %xmm1 5511; SSE2-NEXT: psrlw $4, %xmm1 5512; SSE2-NEXT: paddb %xmm0, %xmm1 5513; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5514; SSE2-NEXT: movdqa %xmm1, %xmm0 5515; SSE2-NEXT: psllw $8, %xmm0 5516; SSE2-NEXT: paddb %xmm1, %xmm0 5517; SSE2-NEXT: psrlw $8, %xmm0 5518; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5519; SSE2-NEXT: retq 5520; 5521; SSE3-LABEL: ugt_14_v8i16: 5522; SSE3: # %bb.0: 5523; SSE3-NEXT: movdqa %xmm0, %xmm1 5524; SSE3-NEXT: psrlw $1, %xmm1 5525; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5526; SSE3-NEXT: psubb %xmm1, %xmm0 5527; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5528; SSE3-NEXT: movdqa %xmm0, %xmm2 5529; SSE3-NEXT: pand %xmm1, %xmm2 5530; SSE3-NEXT: psrlw $2, %xmm0 5531; SSE3-NEXT: pand %xmm1, %xmm0 5532; SSE3-NEXT: paddb %xmm2, %xmm0 5533; SSE3-NEXT: movdqa %xmm0, %xmm1 5534; SSE3-NEXT: psrlw $4, %xmm1 5535; SSE3-NEXT: paddb %xmm0, %xmm1 5536; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5537; SSE3-NEXT: movdqa %xmm1, %xmm0 5538; SSE3-NEXT: psllw $8, %xmm0 5539; SSE3-NEXT: paddb %xmm1, %xmm0 5540; SSE3-NEXT: psrlw $8, %xmm0 5541; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5542; SSE3-NEXT: retq 5543; 5544; SSSE3-LABEL: ugt_14_v8i16: 5545; SSSE3: # %bb.0: 5546; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5547; SSSE3-NEXT: movdqa %xmm0, %xmm2 5548; SSSE3-NEXT: pand %xmm1, %xmm2 5549; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5550; SSSE3-NEXT: movdqa %xmm3, %xmm4 5551; SSSE3-NEXT: pshufb %xmm2, %xmm4 5552; SSSE3-NEXT: psrlw $4, %xmm0 5553; SSSE3-NEXT: pand %xmm1, %xmm0 5554; SSSE3-NEXT: pshufb %xmm0, %xmm3 5555; SSSE3-NEXT: paddb %xmm4, %xmm3 5556; SSSE3-NEXT: movdqa %xmm3, %xmm0 5557; SSSE3-NEXT: psllw $8, %xmm0 5558; SSSE3-NEXT: paddb %xmm3, %xmm0 5559; SSSE3-NEXT: psrlw $8, %xmm0 5560; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5561; SSSE3-NEXT: retq 5562; 5563; SSE41-LABEL: ugt_14_v8i16: 5564; SSE41: # %bb.0: 5565; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5566; SSE41-NEXT: movdqa %xmm0, %xmm2 5567; SSE41-NEXT: pand %xmm1, %xmm2 5568; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5569; SSE41-NEXT: movdqa %xmm3, %xmm4 5570; SSE41-NEXT: pshufb %xmm2, %xmm4 5571; SSE41-NEXT: psrlw $4, %xmm0 5572; SSE41-NEXT: pand %xmm1, %xmm0 5573; SSE41-NEXT: pshufb %xmm0, %xmm3 5574; SSE41-NEXT: paddb %xmm4, %xmm3 5575; SSE41-NEXT: movdqa %xmm3, %xmm0 5576; SSE41-NEXT: psllw $8, %xmm0 5577; SSE41-NEXT: paddb %xmm3, %xmm0 5578; SSE41-NEXT: psrlw $8, %xmm0 5579; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 5580; SSE41-NEXT: retq 5581; 5582; AVX1-LABEL: ugt_14_v8i16: 5583; AVX1: # %bb.0: 5584; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5585; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 5586; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5587; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5588; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 5589; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 5590; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5591; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5592; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 5593; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5594; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 5595; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5596; AVX1-NEXT: retq 5597; 5598; AVX2-LABEL: ugt_14_v8i16: 5599; AVX2: # %bb.0: 5600; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5601; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 5602; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5603; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5604; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 5605; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 5606; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5607; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5608; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 5609; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5610; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 5611; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5612; AVX2-NEXT: retq 5613; 5614; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i16: 5615; AVX512VPOPCNTDQ: # %bb.0: 5616; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5617; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 5618; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 5619; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5620; AVX512VPOPCNTDQ-NEXT: vzeroupper 5621; AVX512VPOPCNTDQ-NEXT: retq 5622; 5623; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i16: 5624; AVX512VPOPCNTDQVL: # %bb.0: 5625; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5626; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 5627; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 5628; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5629; AVX512VPOPCNTDQVL-NEXT: vzeroupper 5630; AVX512VPOPCNTDQVL-NEXT: retq 5631; 5632; BITALG_NOVLX-LABEL: ugt_14_v8i16: 5633; BITALG_NOVLX: # %bb.0: 5634; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5635; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 5636; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 5637; BITALG_NOVLX-NEXT: vzeroupper 5638; BITALG_NOVLX-NEXT: retq 5639; 5640; BITALG-LABEL: ugt_14_v8i16: 5641; BITALG: # %bb.0: 5642; BITALG-NEXT: vpopcntw %xmm0, %xmm0 5643; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 5644; BITALG-NEXT: vpmovm2w %k0, %xmm0 5645; BITALG-NEXT: retq 5646 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 5647 %3 = icmp ugt <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14> 5648 %4 = sext <8 x i1> %3 to <8 x i16> 5649 ret <8 x i16> %4 5650} 5651 5652define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { 5653; SSE2-LABEL: ult_15_v8i16: 5654; SSE2: # %bb.0: 5655; SSE2-NEXT: movdqa %xmm0, %xmm1 5656; SSE2-NEXT: psrlw $1, %xmm1 5657; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5658; SSE2-NEXT: psubb %xmm1, %xmm0 5659; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5660; SSE2-NEXT: movdqa %xmm0, %xmm2 5661; SSE2-NEXT: pand %xmm1, %xmm2 5662; SSE2-NEXT: psrlw $2, %xmm0 5663; SSE2-NEXT: pand %xmm1, %xmm0 5664; SSE2-NEXT: paddb %xmm2, %xmm0 5665; SSE2-NEXT: movdqa %xmm0, %xmm1 5666; SSE2-NEXT: psrlw $4, %xmm1 5667; SSE2-NEXT: paddb %xmm0, %xmm1 5668; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5669; SSE2-NEXT: movdqa %xmm1, %xmm2 5670; SSE2-NEXT: psllw $8, %xmm2 5671; SSE2-NEXT: paddb %xmm1, %xmm2 5672; SSE2-NEXT: psrlw $8, %xmm2 5673; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] 5674; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 5675; SSE2-NEXT: retq 5676; 5677; SSE3-LABEL: ult_15_v8i16: 5678; SSE3: # %bb.0: 5679; SSE3-NEXT: movdqa %xmm0, %xmm1 5680; SSE3-NEXT: psrlw $1, %xmm1 5681; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5682; SSE3-NEXT: psubb %xmm1, %xmm0 5683; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5684; SSE3-NEXT: movdqa %xmm0, %xmm2 5685; SSE3-NEXT: pand %xmm1, %xmm2 5686; SSE3-NEXT: psrlw $2, %xmm0 5687; SSE3-NEXT: pand %xmm1, %xmm0 5688; SSE3-NEXT: paddb %xmm2, %xmm0 5689; SSE3-NEXT: movdqa %xmm0, %xmm1 5690; SSE3-NEXT: psrlw $4, %xmm1 5691; SSE3-NEXT: paddb %xmm0, %xmm1 5692; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5693; SSE3-NEXT: movdqa %xmm1, %xmm2 5694; SSE3-NEXT: psllw $8, %xmm2 5695; SSE3-NEXT: paddb %xmm1, %xmm2 5696; SSE3-NEXT: psrlw $8, %xmm2 5697; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] 5698; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 5699; SSE3-NEXT: retq 5700; 5701; SSSE3-LABEL: ult_15_v8i16: 5702; SSSE3: # %bb.0: 5703; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5704; SSSE3-NEXT: movdqa %xmm0, %xmm2 5705; SSSE3-NEXT: pand %xmm1, %xmm2 5706; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5707; SSSE3-NEXT: movdqa %xmm3, %xmm4 5708; SSSE3-NEXT: pshufb %xmm2, %xmm4 5709; SSSE3-NEXT: psrlw $4, %xmm0 5710; SSSE3-NEXT: pand %xmm1, %xmm0 5711; SSSE3-NEXT: pshufb %xmm0, %xmm3 5712; SSSE3-NEXT: paddb %xmm4, %xmm3 5713; SSSE3-NEXT: movdqa %xmm3, %xmm1 5714; SSSE3-NEXT: psllw $8, %xmm1 5715; SSSE3-NEXT: paddb %xmm3, %xmm1 5716; SSSE3-NEXT: psrlw $8, %xmm1 5717; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] 5718; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 5719; SSSE3-NEXT: retq 5720; 5721; SSE41-LABEL: ult_15_v8i16: 5722; SSE41: # %bb.0: 5723; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5724; SSE41-NEXT: movdqa %xmm0, %xmm2 5725; SSE41-NEXT: pand %xmm1, %xmm2 5726; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5727; SSE41-NEXT: movdqa %xmm3, %xmm4 5728; SSE41-NEXT: pshufb %xmm2, %xmm4 5729; SSE41-NEXT: psrlw $4, %xmm0 5730; SSE41-NEXT: pand %xmm1, %xmm0 5731; SSE41-NEXT: pshufb %xmm0, %xmm3 5732; SSE41-NEXT: paddb %xmm4, %xmm3 5733; SSE41-NEXT: movdqa %xmm3, %xmm1 5734; SSE41-NEXT: psllw $8, %xmm1 5735; SSE41-NEXT: paddb %xmm3, %xmm1 5736; SSE41-NEXT: psrlw $8, %xmm1 5737; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] 5738; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 5739; SSE41-NEXT: retq 5740; 5741; AVX1-LABEL: ult_15_v8i16: 5742; AVX1: # %bb.0: 5743; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5744; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 5745; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5746; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5747; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 5748; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 5749; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5750; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5751; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 5752; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5753; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 5754; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] 5755; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5756; AVX1-NEXT: retq 5757; 5758; AVX2-LABEL: ult_15_v8i16: 5759; AVX2: # %bb.0: 5760; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 5761; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 5762; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 5763; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 5764; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 5765; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 5766; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 5767; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5768; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 5769; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 5770; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 5771; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] 5772; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5773; AVX2-NEXT: retq 5774; 5775; AVX512VPOPCNTDQ-LABEL: ult_15_v8i16: 5776; AVX512VPOPCNTDQ: # %bb.0: 5777; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5778; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 5779; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 5780; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] 5781; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5782; AVX512VPOPCNTDQ-NEXT: vzeroupper 5783; AVX512VPOPCNTDQ-NEXT: retq 5784; 5785; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i16: 5786; AVX512VPOPCNTDQVL: # %bb.0: 5787; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5788; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 5789; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 5790; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] 5791; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5792; AVX512VPOPCNTDQVL-NEXT: vzeroupper 5793; AVX512VPOPCNTDQVL-NEXT: retq 5794; 5795; BITALG_NOVLX-LABEL: ult_15_v8i16: 5796; BITALG_NOVLX: # %bb.0: 5797; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5798; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 5799; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] 5800; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 5801; BITALG_NOVLX-NEXT: vzeroupper 5802; BITALG_NOVLX-NEXT: retq 5803; 5804; BITALG-LABEL: ult_15_v8i16: 5805; BITALG: # %bb.0: 5806; BITALG-NEXT: vpopcntw %xmm0, %xmm0 5807; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 5808; BITALG-NEXT: vpmovm2w %k0, %xmm0 5809; BITALG-NEXT: retq 5810 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 5811 %3 = icmp ult <8 x i16> %2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 5812 %4 = sext <8 x i1> %3 to <8 x i16> 5813 ret <8 x i16> %4 5814} 5815 5816define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { 5817; SSE-LABEL: ugt_1_v4i32: 5818; SSE: # %bb.0: 5819; SSE-NEXT: pcmpeqd %xmm2, %xmm2 5820; SSE-NEXT: movdqa %xmm0, %xmm1 5821; SSE-NEXT: paddd %xmm2, %xmm1 5822; SSE-NEXT: pand %xmm0, %xmm1 5823; SSE-NEXT: pxor %xmm0, %xmm0 5824; SSE-NEXT: pcmpeqd %xmm0, %xmm1 5825; SSE-NEXT: pxor %xmm2, %xmm1 5826; SSE-NEXT: movdqa %xmm1, %xmm0 5827; SSE-NEXT: retq 5828; 5829; AVX1-LABEL: ugt_1_v4i32: 5830; AVX1: # %bb.0: 5831; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5832; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 5833; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 5834; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 5835; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 5836; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 5837; AVX1-NEXT: retq 5838; 5839; AVX2-LABEL: ugt_1_v4i32: 5840; AVX2: # %bb.0: 5841; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5842; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm2 5843; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 5844; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 5845; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 5846; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 5847; AVX2-NEXT: retq 5848; 5849; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i32: 5850; AVX512VPOPCNTDQ: # %bb.0: 5851; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5852; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 5853; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 5854; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 5855; AVX512VPOPCNTDQ-NEXT: vzeroupper 5856; AVX512VPOPCNTDQ-NEXT: retq 5857; 5858; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i32: 5859; AVX512VPOPCNTDQVL: # %bb.0: 5860; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 5861; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 5862; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 5863; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 5864; AVX512VPOPCNTDQVL-NEXT: retq 5865; 5866; BITALG_NOVLX-LABEL: ugt_1_v4i32: 5867; BITALG_NOVLX: # %bb.0: 5868; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5869; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 5870; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 5871; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 5872; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 5873; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 5874; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5875; BITALG_NOVLX-NEXT: vzeroupper 5876; BITALG_NOVLX-NEXT: retq 5877; 5878; BITALG-LABEL: ugt_1_v4i32: 5879; BITALG: # %bb.0: 5880; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5881; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1 5882; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 5883; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 5884; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 5885; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 5886; BITALG-NEXT: retq 5887 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 5888 %3 = icmp ugt <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1> 5889 %4 = sext <4 x i1> %3 to <4 x i32> 5890 ret <4 x i32> %4 5891} 5892 5893define <4 x i32> @ult_2_v4i32(<4 x i32> %0) { 5894; SSE-LABEL: ult_2_v4i32: 5895; SSE: # %bb.0: 5896; SSE-NEXT: pcmpeqd %xmm1, %xmm1 5897; SSE-NEXT: paddd %xmm0, %xmm1 5898; SSE-NEXT: pand %xmm1, %xmm0 5899; SSE-NEXT: pxor %xmm1, %xmm1 5900; SSE-NEXT: pcmpeqd %xmm1, %xmm0 5901; SSE-NEXT: retq 5902; 5903; AVX1-LABEL: ult_2_v4i32: 5904; AVX1: # %bb.0: 5905; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5906; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 5907; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 5908; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 5909; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 5910; AVX1-NEXT: retq 5911; 5912; AVX2-LABEL: ult_2_v4i32: 5913; AVX2: # %bb.0: 5914; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5915; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 5916; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 5917; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 5918; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 5919; AVX2-NEXT: retq 5920; 5921; AVX512VPOPCNTDQ-LABEL: ult_2_v4i32: 5922; AVX512VPOPCNTDQ: # %bb.0: 5923; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5924; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 5925; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] 5926; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 5927; AVX512VPOPCNTDQ-NEXT: vzeroupper 5928; AVX512VPOPCNTDQ-NEXT: retq 5929; 5930; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i32: 5931; AVX512VPOPCNTDQVL: # %bb.0: 5932; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 5933; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 5934; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 5935; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 5936; AVX512VPOPCNTDQVL-NEXT: retq 5937; 5938; BITALG_NOVLX-LABEL: ult_2_v4i32: 5939; BITALG_NOVLX: # %bb.0: 5940; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5941; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 5942; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 5943; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 5944; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 5945; BITALG_NOVLX-NEXT: retq 5946; 5947; BITALG-LABEL: ult_2_v4i32: 5948; BITALG: # %bb.0: 5949; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 5950; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1 5951; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 5952; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 5953; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 5954; BITALG-NEXT: retq 5955 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 5956 %3 = icmp ult <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2> 5957 %4 = sext <4 x i1> %3 to <4 x i32> 5958 ret <4 x i32> %4 5959} 5960 5961define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { 5962; SSE2-LABEL: ugt_2_v4i32: 5963; SSE2: # %bb.0: 5964; SSE2-NEXT: movdqa %xmm0, %xmm1 5965; SSE2-NEXT: psrlw $1, %xmm1 5966; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5967; SSE2-NEXT: psubb %xmm1, %xmm0 5968; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5969; SSE2-NEXT: movdqa %xmm0, %xmm2 5970; SSE2-NEXT: pand %xmm1, %xmm2 5971; SSE2-NEXT: psrlw $2, %xmm0 5972; SSE2-NEXT: pand %xmm1, %xmm0 5973; SSE2-NEXT: paddb %xmm2, %xmm0 5974; SSE2-NEXT: movdqa %xmm0, %xmm1 5975; SSE2-NEXT: psrlw $4, %xmm1 5976; SSE2-NEXT: paddb %xmm0, %xmm1 5977; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 5978; SSE2-NEXT: pxor %xmm0, %xmm0 5979; SSE2-NEXT: movdqa %xmm1, %xmm2 5980; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 5981; SSE2-NEXT: psadbw %xmm0, %xmm2 5982; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 5983; SSE2-NEXT: psadbw %xmm0, %xmm1 5984; SSE2-NEXT: packuswb %xmm2, %xmm1 5985; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 5986; SSE2-NEXT: movdqa %xmm1, %xmm0 5987; SSE2-NEXT: retq 5988; 5989; SSE3-LABEL: ugt_2_v4i32: 5990; SSE3: # %bb.0: 5991; SSE3-NEXT: movdqa %xmm0, %xmm1 5992; SSE3-NEXT: psrlw $1, %xmm1 5993; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 5994; SSE3-NEXT: psubb %xmm1, %xmm0 5995; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 5996; SSE3-NEXT: movdqa %xmm0, %xmm2 5997; SSE3-NEXT: pand %xmm1, %xmm2 5998; SSE3-NEXT: psrlw $2, %xmm0 5999; SSE3-NEXT: pand %xmm1, %xmm0 6000; SSE3-NEXT: paddb %xmm2, %xmm0 6001; SSE3-NEXT: movdqa %xmm0, %xmm1 6002; SSE3-NEXT: psrlw $4, %xmm1 6003; SSE3-NEXT: paddb %xmm0, %xmm1 6004; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6005; SSE3-NEXT: pxor %xmm0, %xmm0 6006; SSE3-NEXT: movdqa %xmm1, %xmm2 6007; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6008; SSE3-NEXT: psadbw %xmm0, %xmm2 6009; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6010; SSE3-NEXT: psadbw %xmm0, %xmm1 6011; SSE3-NEXT: packuswb %xmm2, %xmm1 6012; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6013; SSE3-NEXT: movdqa %xmm1, %xmm0 6014; SSE3-NEXT: retq 6015; 6016; SSSE3-LABEL: ugt_2_v4i32: 6017; SSSE3: # %bb.0: 6018; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6019; SSSE3-NEXT: movdqa %xmm0, %xmm3 6020; SSSE3-NEXT: pand %xmm2, %xmm3 6021; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6022; SSSE3-NEXT: movdqa %xmm1, %xmm4 6023; SSSE3-NEXT: pshufb %xmm3, %xmm4 6024; SSSE3-NEXT: psrlw $4, %xmm0 6025; SSSE3-NEXT: pand %xmm2, %xmm0 6026; SSSE3-NEXT: pshufb %xmm0, %xmm1 6027; SSSE3-NEXT: paddb %xmm4, %xmm1 6028; SSSE3-NEXT: pxor %xmm0, %xmm0 6029; SSSE3-NEXT: movdqa %xmm1, %xmm2 6030; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6031; SSSE3-NEXT: psadbw %xmm0, %xmm2 6032; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6033; SSSE3-NEXT: psadbw %xmm0, %xmm1 6034; SSSE3-NEXT: packuswb %xmm2, %xmm1 6035; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6036; SSSE3-NEXT: movdqa %xmm1, %xmm0 6037; SSSE3-NEXT: retq 6038; 6039; SSE41-LABEL: ugt_2_v4i32: 6040; SSE41: # %bb.0: 6041; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6042; SSE41-NEXT: movdqa %xmm0, %xmm2 6043; SSE41-NEXT: pand %xmm1, %xmm2 6044; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6045; SSE41-NEXT: movdqa %xmm3, %xmm4 6046; SSE41-NEXT: pshufb %xmm2, %xmm4 6047; SSE41-NEXT: psrlw $4, %xmm0 6048; SSE41-NEXT: pand %xmm1, %xmm0 6049; SSE41-NEXT: pshufb %xmm0, %xmm3 6050; SSE41-NEXT: paddb %xmm4, %xmm3 6051; SSE41-NEXT: pxor %xmm1, %xmm1 6052; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 6053; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 6054; SSE41-NEXT: psadbw %xmm1, %xmm3 6055; SSE41-NEXT: psadbw %xmm1, %xmm0 6056; SSE41-NEXT: packuswb %xmm3, %xmm0 6057; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 6058; SSE41-NEXT: retq 6059; 6060; AVX1-LABEL: ugt_2_v4i32: 6061; AVX1: # %bb.0: 6062; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6063; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 6064; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6065; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6066; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 6067; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 6068; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6069; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6070; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 6071; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6072; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6073; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6074; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6075; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6076; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 6077; AVX1-NEXT: retq 6078; 6079; AVX2-LABEL: ugt_2_v4i32: 6080; AVX2: # %bb.0: 6081; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6082; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 6083; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6084; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6085; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 6086; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 6087; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6088; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6089; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 6090; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6091; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6092; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6093; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6094; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6095; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] 6096; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6097; AVX2-NEXT: retq 6098; 6099; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i32: 6100; AVX512VPOPCNTDQ: # %bb.0: 6101; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6102; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 6103; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] 6104; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6105; AVX512VPOPCNTDQ-NEXT: vzeroupper 6106; AVX512VPOPCNTDQ-NEXT: retq 6107; 6108; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i32: 6109; AVX512VPOPCNTDQVL: # %bb.0: 6110; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 6111; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 6112; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6113; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6114; AVX512VPOPCNTDQVL-NEXT: retq 6115; 6116; BITALG_NOVLX-LABEL: ugt_2_v4i32: 6117; BITALG_NOVLX: # %bb.0: 6118; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6119; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 6120; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 6121; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6122; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6123; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6124; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6125; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6126; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] 6127; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6128; BITALG_NOVLX-NEXT: vzeroupper 6129; BITALG_NOVLX-NEXT: retq 6130; 6131; BITALG-LABEL: ugt_2_v4i32: 6132; BITALG: # %bb.0: 6133; BITALG-NEXT: vpopcntb %xmm0, %xmm0 6134; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 6135; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6136; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6137; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6138; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6139; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6140; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 6141; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6142; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6143; BITALG-NEXT: retq 6144 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 6145 %3 = icmp ugt <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2> 6146 %4 = sext <4 x i1> %3 to <4 x i32> 6147 ret <4 x i32> %4 6148} 6149 6150define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { 6151; SSE2-LABEL: ult_3_v4i32: 6152; SSE2: # %bb.0: 6153; SSE2-NEXT: movdqa %xmm0, %xmm1 6154; SSE2-NEXT: psrlw $1, %xmm1 6155; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6156; SSE2-NEXT: psubb %xmm1, %xmm0 6157; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6158; SSE2-NEXT: movdqa %xmm0, %xmm2 6159; SSE2-NEXT: pand %xmm1, %xmm2 6160; SSE2-NEXT: psrlw $2, %xmm0 6161; SSE2-NEXT: pand %xmm1, %xmm0 6162; SSE2-NEXT: paddb %xmm2, %xmm0 6163; SSE2-NEXT: movdqa %xmm0, %xmm1 6164; SSE2-NEXT: psrlw $4, %xmm1 6165; SSE2-NEXT: paddb %xmm0, %xmm1 6166; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6167; SSE2-NEXT: pxor %xmm0, %xmm0 6168; SSE2-NEXT: movdqa %xmm1, %xmm2 6169; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6170; SSE2-NEXT: psadbw %xmm0, %xmm2 6171; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6172; SSE2-NEXT: psadbw %xmm0, %xmm1 6173; SSE2-NEXT: packuswb %xmm2, %xmm1 6174; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] 6175; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 6176; SSE2-NEXT: retq 6177; 6178; SSE3-LABEL: ult_3_v4i32: 6179; SSE3: # %bb.0: 6180; SSE3-NEXT: movdqa %xmm0, %xmm1 6181; SSE3-NEXT: psrlw $1, %xmm1 6182; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6183; SSE3-NEXT: psubb %xmm1, %xmm0 6184; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6185; SSE3-NEXT: movdqa %xmm0, %xmm2 6186; SSE3-NEXT: pand %xmm1, %xmm2 6187; SSE3-NEXT: psrlw $2, %xmm0 6188; SSE3-NEXT: pand %xmm1, %xmm0 6189; SSE3-NEXT: paddb %xmm2, %xmm0 6190; SSE3-NEXT: movdqa %xmm0, %xmm1 6191; SSE3-NEXT: psrlw $4, %xmm1 6192; SSE3-NEXT: paddb %xmm0, %xmm1 6193; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6194; SSE3-NEXT: pxor %xmm0, %xmm0 6195; SSE3-NEXT: movdqa %xmm1, %xmm2 6196; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6197; SSE3-NEXT: psadbw %xmm0, %xmm2 6198; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6199; SSE3-NEXT: psadbw %xmm0, %xmm1 6200; SSE3-NEXT: packuswb %xmm2, %xmm1 6201; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] 6202; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 6203; SSE3-NEXT: retq 6204; 6205; SSSE3-LABEL: ult_3_v4i32: 6206; SSSE3: # %bb.0: 6207; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6208; SSSE3-NEXT: movdqa %xmm0, %xmm2 6209; SSSE3-NEXT: pand %xmm1, %xmm2 6210; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6211; SSSE3-NEXT: movdqa %xmm3, %xmm4 6212; SSSE3-NEXT: pshufb %xmm2, %xmm4 6213; SSSE3-NEXT: psrlw $4, %xmm0 6214; SSSE3-NEXT: pand %xmm1, %xmm0 6215; SSSE3-NEXT: pshufb %xmm0, %xmm3 6216; SSSE3-NEXT: paddb %xmm4, %xmm3 6217; SSSE3-NEXT: pxor %xmm0, %xmm0 6218; SSSE3-NEXT: movdqa %xmm3, %xmm1 6219; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 6220; SSSE3-NEXT: psadbw %xmm0, %xmm1 6221; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 6222; SSSE3-NEXT: psadbw %xmm0, %xmm3 6223; SSSE3-NEXT: packuswb %xmm1, %xmm3 6224; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] 6225; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 6226; SSSE3-NEXT: retq 6227; 6228; SSE41-LABEL: ult_3_v4i32: 6229; SSE41: # %bb.0: 6230; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6231; SSE41-NEXT: movdqa %xmm0, %xmm2 6232; SSE41-NEXT: pand %xmm1, %xmm2 6233; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6234; SSE41-NEXT: movdqa %xmm3, %xmm4 6235; SSE41-NEXT: pshufb %xmm2, %xmm4 6236; SSE41-NEXT: psrlw $4, %xmm0 6237; SSE41-NEXT: pand %xmm1, %xmm0 6238; SSE41-NEXT: pshufb %xmm0, %xmm3 6239; SSE41-NEXT: paddb %xmm4, %xmm3 6240; SSE41-NEXT: pxor %xmm0, %xmm0 6241; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 6242; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 6243; SSE41-NEXT: psadbw %xmm0, %xmm3 6244; SSE41-NEXT: psadbw %xmm0, %xmm1 6245; SSE41-NEXT: packuswb %xmm3, %xmm1 6246; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] 6247; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 6248; SSE41-NEXT: retq 6249; 6250; AVX1-LABEL: ult_3_v4i32: 6251; AVX1: # %bb.0: 6252; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6253; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 6254; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6255; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6256; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 6257; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 6258; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6259; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6260; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 6261; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6262; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6263; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6264; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6265; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6266; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3] 6267; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6268; AVX1-NEXT: retq 6269; 6270; AVX2-LABEL: ult_3_v4i32: 6271; AVX2: # %bb.0: 6272; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6273; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 6274; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6275; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6276; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 6277; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 6278; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6279; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6280; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 6281; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6282; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6283; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6284; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6285; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6286; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 6287; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6288; AVX2-NEXT: retq 6289; 6290; AVX512VPOPCNTDQ-LABEL: ult_3_v4i32: 6291; AVX512VPOPCNTDQ: # %bb.0: 6292; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6293; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 6294; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 6295; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6296; AVX512VPOPCNTDQ-NEXT: vzeroupper 6297; AVX512VPOPCNTDQ-NEXT: retq 6298; 6299; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i32: 6300; AVX512VPOPCNTDQVL: # %bb.0: 6301; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 6302; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 6303; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6304; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6305; AVX512VPOPCNTDQVL-NEXT: retq 6306; 6307; BITALG_NOVLX-LABEL: ult_3_v4i32: 6308; BITALG_NOVLX: # %bb.0: 6309; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6310; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 6311; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 6312; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6313; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6314; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6315; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6316; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6317; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 6318; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6319; BITALG_NOVLX-NEXT: vzeroupper 6320; BITALG_NOVLX-NEXT: retq 6321; 6322; BITALG-LABEL: ult_3_v4i32: 6323; BITALG: # %bb.0: 6324; BITALG-NEXT: vpopcntb %xmm0, %xmm0 6325; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 6326; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6327; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6328; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6329; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6330; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6331; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 6332; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6333; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6334; BITALG-NEXT: retq 6335 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 6336 %3 = icmp ult <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3> 6337 %4 = sext <4 x i1> %3 to <4 x i32> 6338 ret <4 x i32> %4 6339} 6340 6341define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { 6342; SSE2-LABEL: ugt_3_v4i32: 6343; SSE2: # %bb.0: 6344; SSE2-NEXT: movdqa %xmm0, %xmm1 6345; SSE2-NEXT: psrlw $1, %xmm1 6346; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6347; SSE2-NEXT: psubb %xmm1, %xmm0 6348; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6349; SSE2-NEXT: movdqa %xmm0, %xmm2 6350; SSE2-NEXT: pand %xmm1, %xmm2 6351; SSE2-NEXT: psrlw $2, %xmm0 6352; SSE2-NEXT: pand %xmm1, %xmm0 6353; SSE2-NEXT: paddb %xmm2, %xmm0 6354; SSE2-NEXT: movdqa %xmm0, %xmm1 6355; SSE2-NEXT: psrlw $4, %xmm1 6356; SSE2-NEXT: paddb %xmm0, %xmm1 6357; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6358; SSE2-NEXT: pxor %xmm0, %xmm0 6359; SSE2-NEXT: movdqa %xmm1, %xmm2 6360; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6361; SSE2-NEXT: psadbw %xmm0, %xmm2 6362; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6363; SSE2-NEXT: psadbw %xmm0, %xmm1 6364; SSE2-NEXT: packuswb %xmm2, %xmm1 6365; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6366; SSE2-NEXT: movdqa %xmm1, %xmm0 6367; SSE2-NEXT: retq 6368; 6369; SSE3-LABEL: ugt_3_v4i32: 6370; SSE3: # %bb.0: 6371; SSE3-NEXT: movdqa %xmm0, %xmm1 6372; SSE3-NEXT: psrlw $1, %xmm1 6373; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6374; SSE3-NEXT: psubb %xmm1, %xmm0 6375; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6376; SSE3-NEXT: movdqa %xmm0, %xmm2 6377; SSE3-NEXT: pand %xmm1, %xmm2 6378; SSE3-NEXT: psrlw $2, %xmm0 6379; SSE3-NEXT: pand %xmm1, %xmm0 6380; SSE3-NEXT: paddb %xmm2, %xmm0 6381; SSE3-NEXT: movdqa %xmm0, %xmm1 6382; SSE3-NEXT: psrlw $4, %xmm1 6383; SSE3-NEXT: paddb %xmm0, %xmm1 6384; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6385; SSE3-NEXT: pxor %xmm0, %xmm0 6386; SSE3-NEXT: movdqa %xmm1, %xmm2 6387; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6388; SSE3-NEXT: psadbw %xmm0, %xmm2 6389; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6390; SSE3-NEXT: psadbw %xmm0, %xmm1 6391; SSE3-NEXT: packuswb %xmm2, %xmm1 6392; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6393; SSE3-NEXT: movdqa %xmm1, %xmm0 6394; SSE3-NEXT: retq 6395; 6396; SSSE3-LABEL: ugt_3_v4i32: 6397; SSSE3: # %bb.0: 6398; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6399; SSSE3-NEXT: movdqa %xmm0, %xmm3 6400; SSSE3-NEXT: pand %xmm2, %xmm3 6401; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6402; SSSE3-NEXT: movdqa %xmm1, %xmm4 6403; SSSE3-NEXT: pshufb %xmm3, %xmm4 6404; SSSE3-NEXT: psrlw $4, %xmm0 6405; SSSE3-NEXT: pand %xmm2, %xmm0 6406; SSSE3-NEXT: pshufb %xmm0, %xmm1 6407; SSSE3-NEXT: paddb %xmm4, %xmm1 6408; SSSE3-NEXT: pxor %xmm0, %xmm0 6409; SSSE3-NEXT: movdqa %xmm1, %xmm2 6410; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6411; SSSE3-NEXT: psadbw %xmm0, %xmm2 6412; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6413; SSSE3-NEXT: psadbw %xmm0, %xmm1 6414; SSSE3-NEXT: packuswb %xmm2, %xmm1 6415; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6416; SSSE3-NEXT: movdqa %xmm1, %xmm0 6417; SSSE3-NEXT: retq 6418; 6419; SSE41-LABEL: ugt_3_v4i32: 6420; SSE41: # %bb.0: 6421; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6422; SSE41-NEXT: movdqa %xmm0, %xmm2 6423; SSE41-NEXT: pand %xmm1, %xmm2 6424; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6425; SSE41-NEXT: movdqa %xmm3, %xmm4 6426; SSE41-NEXT: pshufb %xmm2, %xmm4 6427; SSE41-NEXT: psrlw $4, %xmm0 6428; SSE41-NEXT: pand %xmm1, %xmm0 6429; SSE41-NEXT: pshufb %xmm0, %xmm3 6430; SSE41-NEXT: paddb %xmm4, %xmm3 6431; SSE41-NEXT: pxor %xmm1, %xmm1 6432; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 6433; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 6434; SSE41-NEXT: psadbw %xmm1, %xmm3 6435; SSE41-NEXT: psadbw %xmm1, %xmm0 6436; SSE41-NEXT: packuswb %xmm3, %xmm0 6437; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 6438; SSE41-NEXT: retq 6439; 6440; AVX1-LABEL: ugt_3_v4i32: 6441; AVX1: # %bb.0: 6442; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6443; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 6444; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6445; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6446; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 6447; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 6448; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6449; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6450; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 6451; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6452; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6453; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6454; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6455; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6456; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 6457; AVX1-NEXT: retq 6458; 6459; AVX2-LABEL: ugt_3_v4i32: 6460; AVX2: # %bb.0: 6461; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6462; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 6463; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6464; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6465; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 6466; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 6467; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6468; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6469; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 6470; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6471; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6472; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6473; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6474; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6475; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 6476; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6477; AVX2-NEXT: retq 6478; 6479; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i32: 6480; AVX512VPOPCNTDQ: # %bb.0: 6481; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6482; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 6483; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 6484; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6485; AVX512VPOPCNTDQ-NEXT: vzeroupper 6486; AVX512VPOPCNTDQ-NEXT: retq 6487; 6488; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i32: 6489; AVX512VPOPCNTDQVL: # %bb.0: 6490; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 6491; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 6492; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6493; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6494; AVX512VPOPCNTDQVL-NEXT: retq 6495; 6496; BITALG_NOVLX-LABEL: ugt_3_v4i32: 6497; BITALG_NOVLX: # %bb.0: 6498; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6499; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 6500; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 6501; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6502; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6503; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6504; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6505; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6506; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] 6507; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6508; BITALG_NOVLX-NEXT: vzeroupper 6509; BITALG_NOVLX-NEXT: retq 6510; 6511; BITALG-LABEL: ugt_3_v4i32: 6512; BITALG: # %bb.0: 6513; BITALG-NEXT: vpopcntb %xmm0, %xmm0 6514; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 6515; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6516; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6517; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6518; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6519; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6520; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 6521; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6522; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6523; BITALG-NEXT: retq 6524 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 6525 %3 = icmp ugt <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3> 6526 %4 = sext <4 x i1> %3 to <4 x i32> 6527 ret <4 x i32> %4 6528} 6529 6530define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { 6531; SSE2-LABEL: ult_4_v4i32: 6532; SSE2: # %bb.0: 6533; SSE2-NEXT: movdqa %xmm0, %xmm1 6534; SSE2-NEXT: psrlw $1, %xmm1 6535; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6536; SSE2-NEXT: psubb %xmm1, %xmm0 6537; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6538; SSE2-NEXT: movdqa %xmm0, %xmm2 6539; SSE2-NEXT: pand %xmm1, %xmm2 6540; SSE2-NEXT: psrlw $2, %xmm0 6541; SSE2-NEXT: pand %xmm1, %xmm0 6542; SSE2-NEXT: paddb %xmm2, %xmm0 6543; SSE2-NEXT: movdqa %xmm0, %xmm1 6544; SSE2-NEXT: psrlw $4, %xmm1 6545; SSE2-NEXT: paddb %xmm0, %xmm1 6546; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6547; SSE2-NEXT: pxor %xmm0, %xmm0 6548; SSE2-NEXT: movdqa %xmm1, %xmm2 6549; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6550; SSE2-NEXT: psadbw %xmm0, %xmm2 6551; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6552; SSE2-NEXT: psadbw %xmm0, %xmm1 6553; SSE2-NEXT: packuswb %xmm2, %xmm1 6554; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] 6555; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 6556; SSE2-NEXT: retq 6557; 6558; SSE3-LABEL: ult_4_v4i32: 6559; SSE3: # %bb.0: 6560; SSE3-NEXT: movdqa %xmm0, %xmm1 6561; SSE3-NEXT: psrlw $1, %xmm1 6562; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6563; SSE3-NEXT: psubb %xmm1, %xmm0 6564; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6565; SSE3-NEXT: movdqa %xmm0, %xmm2 6566; SSE3-NEXT: pand %xmm1, %xmm2 6567; SSE3-NEXT: psrlw $2, %xmm0 6568; SSE3-NEXT: pand %xmm1, %xmm0 6569; SSE3-NEXT: paddb %xmm2, %xmm0 6570; SSE3-NEXT: movdqa %xmm0, %xmm1 6571; SSE3-NEXT: psrlw $4, %xmm1 6572; SSE3-NEXT: paddb %xmm0, %xmm1 6573; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6574; SSE3-NEXT: pxor %xmm0, %xmm0 6575; SSE3-NEXT: movdqa %xmm1, %xmm2 6576; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6577; SSE3-NEXT: psadbw %xmm0, %xmm2 6578; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6579; SSE3-NEXT: psadbw %xmm0, %xmm1 6580; SSE3-NEXT: packuswb %xmm2, %xmm1 6581; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] 6582; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 6583; SSE3-NEXT: retq 6584; 6585; SSSE3-LABEL: ult_4_v4i32: 6586; SSSE3: # %bb.0: 6587; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6588; SSSE3-NEXT: movdqa %xmm0, %xmm2 6589; SSSE3-NEXT: pand %xmm1, %xmm2 6590; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6591; SSSE3-NEXT: movdqa %xmm3, %xmm4 6592; SSSE3-NEXT: pshufb %xmm2, %xmm4 6593; SSSE3-NEXT: psrlw $4, %xmm0 6594; SSSE3-NEXT: pand %xmm1, %xmm0 6595; SSSE3-NEXT: pshufb %xmm0, %xmm3 6596; SSSE3-NEXT: paddb %xmm4, %xmm3 6597; SSSE3-NEXT: pxor %xmm0, %xmm0 6598; SSSE3-NEXT: movdqa %xmm3, %xmm1 6599; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 6600; SSSE3-NEXT: psadbw %xmm0, %xmm1 6601; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 6602; SSSE3-NEXT: psadbw %xmm0, %xmm3 6603; SSSE3-NEXT: packuswb %xmm1, %xmm3 6604; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] 6605; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 6606; SSSE3-NEXT: retq 6607; 6608; SSE41-LABEL: ult_4_v4i32: 6609; SSE41: # %bb.0: 6610; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6611; SSE41-NEXT: movdqa %xmm0, %xmm2 6612; SSE41-NEXT: pand %xmm1, %xmm2 6613; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6614; SSE41-NEXT: movdqa %xmm3, %xmm4 6615; SSE41-NEXT: pshufb %xmm2, %xmm4 6616; SSE41-NEXT: psrlw $4, %xmm0 6617; SSE41-NEXT: pand %xmm1, %xmm0 6618; SSE41-NEXT: pshufb %xmm0, %xmm3 6619; SSE41-NEXT: paddb %xmm4, %xmm3 6620; SSE41-NEXT: pxor %xmm0, %xmm0 6621; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 6622; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 6623; SSE41-NEXT: psadbw %xmm0, %xmm3 6624; SSE41-NEXT: psadbw %xmm0, %xmm1 6625; SSE41-NEXT: packuswb %xmm3, %xmm1 6626; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] 6627; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 6628; SSE41-NEXT: retq 6629; 6630; AVX1-LABEL: ult_4_v4i32: 6631; AVX1: # %bb.0: 6632; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6633; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 6634; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6635; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6636; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 6637; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 6638; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6639; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6640; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 6641; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6642; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6643; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6644; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6645; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6646; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4] 6647; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6648; AVX1-NEXT: retq 6649; 6650; AVX2-LABEL: ult_4_v4i32: 6651; AVX2: # %bb.0: 6652; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6653; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 6654; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6655; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6656; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 6657; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 6658; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6659; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6660; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 6661; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6662; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6663; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6664; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6665; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6666; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] 6667; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6668; AVX2-NEXT: retq 6669; 6670; AVX512VPOPCNTDQ-LABEL: ult_4_v4i32: 6671; AVX512VPOPCNTDQ: # %bb.0: 6672; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6673; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 6674; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] 6675; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6676; AVX512VPOPCNTDQ-NEXT: vzeroupper 6677; AVX512VPOPCNTDQ-NEXT: retq 6678; 6679; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i32: 6680; AVX512VPOPCNTDQVL: # %bb.0: 6681; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 6682; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 6683; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6684; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6685; AVX512VPOPCNTDQVL-NEXT: retq 6686; 6687; BITALG_NOVLX-LABEL: ult_4_v4i32: 6688; BITALG_NOVLX: # %bb.0: 6689; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6690; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 6691; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 6692; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6693; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6694; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6695; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6696; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6697; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] 6698; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 6699; BITALG_NOVLX-NEXT: vzeroupper 6700; BITALG_NOVLX-NEXT: retq 6701; 6702; BITALG-LABEL: ult_4_v4i32: 6703; BITALG: # %bb.0: 6704; BITALG-NEXT: vpopcntb %xmm0, %xmm0 6705; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 6706; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6707; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6708; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6709; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6710; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6711; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 6712; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6713; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6714; BITALG-NEXT: retq 6715 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 6716 %3 = icmp ult <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4> 6717 %4 = sext <4 x i1> %3 to <4 x i32> 6718 ret <4 x i32> %4 6719} 6720 6721define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { 6722; SSE2-LABEL: ugt_4_v4i32: 6723; SSE2: # %bb.0: 6724; SSE2-NEXT: movdqa %xmm0, %xmm1 6725; SSE2-NEXT: psrlw $1, %xmm1 6726; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6727; SSE2-NEXT: psubb %xmm1, %xmm0 6728; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6729; SSE2-NEXT: movdqa %xmm0, %xmm2 6730; SSE2-NEXT: pand %xmm1, %xmm2 6731; SSE2-NEXT: psrlw $2, %xmm0 6732; SSE2-NEXT: pand %xmm1, %xmm0 6733; SSE2-NEXT: paddb %xmm2, %xmm0 6734; SSE2-NEXT: movdqa %xmm0, %xmm1 6735; SSE2-NEXT: psrlw $4, %xmm1 6736; SSE2-NEXT: paddb %xmm0, %xmm1 6737; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6738; SSE2-NEXT: pxor %xmm0, %xmm0 6739; SSE2-NEXT: movdqa %xmm1, %xmm2 6740; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6741; SSE2-NEXT: psadbw %xmm0, %xmm2 6742; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6743; SSE2-NEXT: psadbw %xmm0, %xmm1 6744; SSE2-NEXT: packuswb %xmm2, %xmm1 6745; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6746; SSE2-NEXT: movdqa %xmm1, %xmm0 6747; SSE2-NEXT: retq 6748; 6749; SSE3-LABEL: ugt_4_v4i32: 6750; SSE3: # %bb.0: 6751; SSE3-NEXT: movdqa %xmm0, %xmm1 6752; SSE3-NEXT: psrlw $1, %xmm1 6753; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6754; SSE3-NEXT: psubb %xmm1, %xmm0 6755; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6756; SSE3-NEXT: movdqa %xmm0, %xmm2 6757; SSE3-NEXT: pand %xmm1, %xmm2 6758; SSE3-NEXT: psrlw $2, %xmm0 6759; SSE3-NEXT: pand %xmm1, %xmm0 6760; SSE3-NEXT: paddb %xmm2, %xmm0 6761; SSE3-NEXT: movdqa %xmm0, %xmm1 6762; SSE3-NEXT: psrlw $4, %xmm1 6763; SSE3-NEXT: paddb %xmm0, %xmm1 6764; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6765; SSE3-NEXT: pxor %xmm0, %xmm0 6766; SSE3-NEXT: movdqa %xmm1, %xmm2 6767; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6768; SSE3-NEXT: psadbw %xmm0, %xmm2 6769; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6770; SSE3-NEXT: psadbw %xmm0, %xmm1 6771; SSE3-NEXT: packuswb %xmm2, %xmm1 6772; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6773; SSE3-NEXT: movdqa %xmm1, %xmm0 6774; SSE3-NEXT: retq 6775; 6776; SSSE3-LABEL: ugt_4_v4i32: 6777; SSSE3: # %bb.0: 6778; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6779; SSSE3-NEXT: movdqa %xmm0, %xmm3 6780; SSSE3-NEXT: pand %xmm2, %xmm3 6781; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6782; SSSE3-NEXT: movdqa %xmm1, %xmm4 6783; SSSE3-NEXT: pshufb %xmm3, %xmm4 6784; SSSE3-NEXT: psrlw $4, %xmm0 6785; SSSE3-NEXT: pand %xmm2, %xmm0 6786; SSSE3-NEXT: pshufb %xmm0, %xmm1 6787; SSSE3-NEXT: paddb %xmm4, %xmm1 6788; SSSE3-NEXT: pxor %xmm0, %xmm0 6789; SSSE3-NEXT: movdqa %xmm1, %xmm2 6790; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6791; SSSE3-NEXT: psadbw %xmm0, %xmm2 6792; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6793; SSSE3-NEXT: psadbw %xmm0, %xmm1 6794; SSSE3-NEXT: packuswb %xmm2, %xmm1 6795; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 6796; SSSE3-NEXT: movdqa %xmm1, %xmm0 6797; SSSE3-NEXT: retq 6798; 6799; SSE41-LABEL: ugt_4_v4i32: 6800; SSE41: # %bb.0: 6801; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6802; SSE41-NEXT: movdqa %xmm0, %xmm2 6803; SSE41-NEXT: pand %xmm1, %xmm2 6804; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6805; SSE41-NEXT: movdqa %xmm3, %xmm4 6806; SSE41-NEXT: pshufb %xmm2, %xmm4 6807; SSE41-NEXT: psrlw $4, %xmm0 6808; SSE41-NEXT: pand %xmm1, %xmm0 6809; SSE41-NEXT: pshufb %xmm0, %xmm3 6810; SSE41-NEXT: paddb %xmm4, %xmm3 6811; SSE41-NEXT: pxor %xmm1, %xmm1 6812; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 6813; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 6814; SSE41-NEXT: psadbw %xmm1, %xmm3 6815; SSE41-NEXT: psadbw %xmm1, %xmm0 6816; SSE41-NEXT: packuswb %xmm3, %xmm0 6817; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 6818; SSE41-NEXT: retq 6819; 6820; AVX1-LABEL: ugt_4_v4i32: 6821; AVX1: # %bb.0: 6822; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6823; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 6824; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6825; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6826; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 6827; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 6828; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6829; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6830; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 6831; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6832; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6833; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6834; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6835; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6836; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 6837; AVX1-NEXT: retq 6838; 6839; AVX2-LABEL: ugt_4_v4i32: 6840; AVX2: # %bb.0: 6841; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6842; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 6843; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6844; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 6845; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 6846; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 6847; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 6848; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 6849; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 6850; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6851; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6852; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6853; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6854; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6855; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] 6856; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6857; AVX2-NEXT: retq 6858; 6859; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i32: 6860; AVX512VPOPCNTDQ: # %bb.0: 6861; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6862; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 6863; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] 6864; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6865; AVX512VPOPCNTDQ-NEXT: vzeroupper 6866; AVX512VPOPCNTDQ-NEXT: retq 6867; 6868; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i32: 6869; AVX512VPOPCNTDQVL: # %bb.0: 6870; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 6871; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 6872; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6873; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6874; AVX512VPOPCNTDQVL-NEXT: retq 6875; 6876; BITALG_NOVLX-LABEL: ugt_4_v4i32: 6877; BITALG_NOVLX: # %bb.0: 6878; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6879; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 6880; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 6881; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6882; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6883; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6884; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6885; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6886; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] 6887; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 6888; BITALG_NOVLX-NEXT: vzeroupper 6889; BITALG_NOVLX-NEXT: retq 6890; 6891; BITALG-LABEL: ugt_4_v4i32: 6892; BITALG: # %bb.0: 6893; BITALG-NEXT: vpopcntb %xmm0, %xmm0 6894; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 6895; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6896; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 6897; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6898; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 6899; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 6900; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 6901; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 6902; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 6903; BITALG-NEXT: retq 6904 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 6905 %3 = icmp ugt <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4> 6906 %4 = sext <4 x i1> %3 to <4 x i32> 6907 ret <4 x i32> %4 6908} 6909 6910define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { 6911; SSE2-LABEL: ult_5_v4i32: 6912; SSE2: # %bb.0: 6913; SSE2-NEXT: movdqa %xmm0, %xmm1 6914; SSE2-NEXT: psrlw $1, %xmm1 6915; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6916; SSE2-NEXT: psubb %xmm1, %xmm0 6917; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6918; SSE2-NEXT: movdqa %xmm0, %xmm2 6919; SSE2-NEXT: pand %xmm1, %xmm2 6920; SSE2-NEXT: psrlw $2, %xmm0 6921; SSE2-NEXT: pand %xmm1, %xmm0 6922; SSE2-NEXT: paddb %xmm2, %xmm0 6923; SSE2-NEXT: movdqa %xmm0, %xmm1 6924; SSE2-NEXT: psrlw $4, %xmm1 6925; SSE2-NEXT: paddb %xmm0, %xmm1 6926; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 6927; SSE2-NEXT: pxor %xmm0, %xmm0 6928; SSE2-NEXT: movdqa %xmm1, %xmm2 6929; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6930; SSE2-NEXT: psadbw %xmm0, %xmm2 6931; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6932; SSE2-NEXT: psadbw %xmm0, %xmm1 6933; SSE2-NEXT: packuswb %xmm2, %xmm1 6934; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] 6935; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 6936; SSE2-NEXT: retq 6937; 6938; SSE3-LABEL: ult_5_v4i32: 6939; SSE3: # %bb.0: 6940; SSE3-NEXT: movdqa %xmm0, %xmm1 6941; SSE3-NEXT: psrlw $1, %xmm1 6942; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6943; SSE3-NEXT: psubb %xmm1, %xmm0 6944; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 6945; SSE3-NEXT: movdqa %xmm0, %xmm2 6946; SSE3-NEXT: pand %xmm1, %xmm2 6947; SSE3-NEXT: psrlw $2, %xmm0 6948; SSE3-NEXT: pand %xmm1, %xmm0 6949; SSE3-NEXT: paddb %xmm2, %xmm0 6950; SSE3-NEXT: movdqa %xmm0, %xmm1 6951; SSE3-NEXT: psrlw $4, %xmm1 6952; SSE3-NEXT: paddb %xmm0, %xmm1 6953; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 6954; SSE3-NEXT: pxor %xmm0, %xmm0 6955; SSE3-NEXT: movdqa %xmm1, %xmm2 6956; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 6957; SSE3-NEXT: psadbw %xmm0, %xmm2 6958; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6959; SSE3-NEXT: psadbw %xmm0, %xmm1 6960; SSE3-NEXT: packuswb %xmm2, %xmm1 6961; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] 6962; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 6963; SSE3-NEXT: retq 6964; 6965; SSSE3-LABEL: ult_5_v4i32: 6966; SSSE3: # %bb.0: 6967; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6968; SSSE3-NEXT: movdqa %xmm0, %xmm2 6969; SSSE3-NEXT: pand %xmm1, %xmm2 6970; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6971; SSSE3-NEXT: movdqa %xmm3, %xmm4 6972; SSSE3-NEXT: pshufb %xmm2, %xmm4 6973; SSSE3-NEXT: psrlw $4, %xmm0 6974; SSSE3-NEXT: pand %xmm1, %xmm0 6975; SSSE3-NEXT: pshufb %xmm0, %xmm3 6976; SSSE3-NEXT: paddb %xmm4, %xmm3 6977; SSSE3-NEXT: pxor %xmm0, %xmm0 6978; SSSE3-NEXT: movdqa %xmm3, %xmm1 6979; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 6980; SSSE3-NEXT: psadbw %xmm0, %xmm1 6981; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 6982; SSSE3-NEXT: psadbw %xmm0, %xmm3 6983; SSSE3-NEXT: packuswb %xmm1, %xmm3 6984; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] 6985; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 6986; SSSE3-NEXT: retq 6987; 6988; SSE41-LABEL: ult_5_v4i32: 6989; SSE41: # %bb.0: 6990; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 6991; SSE41-NEXT: movdqa %xmm0, %xmm2 6992; SSE41-NEXT: pand %xmm1, %xmm2 6993; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 6994; SSE41-NEXT: movdqa %xmm3, %xmm4 6995; SSE41-NEXT: pshufb %xmm2, %xmm4 6996; SSE41-NEXT: psrlw $4, %xmm0 6997; SSE41-NEXT: pand %xmm1, %xmm0 6998; SSE41-NEXT: pshufb %xmm0, %xmm3 6999; SSE41-NEXT: paddb %xmm4, %xmm3 7000; SSE41-NEXT: pxor %xmm0, %xmm0 7001; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 7002; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 7003; SSE41-NEXT: psadbw %xmm0, %xmm3 7004; SSE41-NEXT: psadbw %xmm0, %xmm1 7005; SSE41-NEXT: packuswb %xmm3, %xmm1 7006; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] 7007; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 7008; SSE41-NEXT: retq 7009; 7010; AVX1-LABEL: ult_5_v4i32: 7011; AVX1: # %bb.0: 7012; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7013; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 7014; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7015; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7016; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 7017; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 7018; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7019; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7020; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7021; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7022; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7023; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7024; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7025; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7026; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5] 7027; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7028; AVX1-NEXT: retq 7029; 7030; AVX2-LABEL: ult_5_v4i32: 7031; AVX2: # %bb.0: 7032; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7033; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 7034; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7035; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7036; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 7037; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 7038; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7039; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7040; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 7041; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7042; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7043; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7044; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7045; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7046; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] 7047; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7048; AVX2-NEXT: retq 7049; 7050; AVX512VPOPCNTDQ-LABEL: ult_5_v4i32: 7051; AVX512VPOPCNTDQ: # %bb.0: 7052; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7053; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 7054; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] 7055; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7056; AVX512VPOPCNTDQ-NEXT: vzeroupper 7057; AVX512VPOPCNTDQ-NEXT: retq 7058; 7059; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i32: 7060; AVX512VPOPCNTDQVL: # %bb.0: 7061; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 7062; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 7063; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7064; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7065; AVX512VPOPCNTDQVL-NEXT: retq 7066; 7067; BITALG_NOVLX-LABEL: ult_5_v4i32: 7068; BITALG_NOVLX: # %bb.0: 7069; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7070; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 7071; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 7072; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7073; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7074; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7075; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7076; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7077; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] 7078; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7079; BITALG_NOVLX-NEXT: vzeroupper 7080; BITALG_NOVLX-NEXT: retq 7081; 7082; BITALG-LABEL: ult_5_v4i32: 7083; BITALG: # %bb.0: 7084; BITALG-NEXT: vpopcntb %xmm0, %xmm0 7085; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 7086; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7087; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7088; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7089; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7090; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7091; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 7092; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7093; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7094; BITALG-NEXT: retq 7095 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 7096 %3 = icmp ult <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5> 7097 %4 = sext <4 x i1> %3 to <4 x i32> 7098 ret <4 x i32> %4 7099} 7100 7101define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { 7102; SSE2-LABEL: ugt_5_v4i32: 7103; SSE2: # %bb.0: 7104; SSE2-NEXT: movdqa %xmm0, %xmm1 7105; SSE2-NEXT: psrlw $1, %xmm1 7106; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7107; SSE2-NEXT: psubb %xmm1, %xmm0 7108; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7109; SSE2-NEXT: movdqa %xmm0, %xmm2 7110; SSE2-NEXT: pand %xmm1, %xmm2 7111; SSE2-NEXT: psrlw $2, %xmm0 7112; SSE2-NEXT: pand %xmm1, %xmm0 7113; SSE2-NEXT: paddb %xmm2, %xmm0 7114; SSE2-NEXT: movdqa %xmm0, %xmm1 7115; SSE2-NEXT: psrlw $4, %xmm1 7116; SSE2-NEXT: paddb %xmm0, %xmm1 7117; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7118; SSE2-NEXT: pxor %xmm0, %xmm0 7119; SSE2-NEXT: movdqa %xmm1, %xmm2 7120; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7121; SSE2-NEXT: psadbw %xmm0, %xmm2 7122; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7123; SSE2-NEXT: psadbw %xmm0, %xmm1 7124; SSE2-NEXT: packuswb %xmm2, %xmm1 7125; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7126; SSE2-NEXT: movdqa %xmm1, %xmm0 7127; SSE2-NEXT: retq 7128; 7129; SSE3-LABEL: ugt_5_v4i32: 7130; SSE3: # %bb.0: 7131; SSE3-NEXT: movdqa %xmm0, %xmm1 7132; SSE3-NEXT: psrlw $1, %xmm1 7133; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7134; SSE3-NEXT: psubb %xmm1, %xmm0 7135; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7136; SSE3-NEXT: movdqa %xmm0, %xmm2 7137; SSE3-NEXT: pand %xmm1, %xmm2 7138; SSE3-NEXT: psrlw $2, %xmm0 7139; SSE3-NEXT: pand %xmm1, %xmm0 7140; SSE3-NEXT: paddb %xmm2, %xmm0 7141; SSE3-NEXT: movdqa %xmm0, %xmm1 7142; SSE3-NEXT: psrlw $4, %xmm1 7143; SSE3-NEXT: paddb %xmm0, %xmm1 7144; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7145; SSE3-NEXT: pxor %xmm0, %xmm0 7146; SSE3-NEXT: movdqa %xmm1, %xmm2 7147; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7148; SSE3-NEXT: psadbw %xmm0, %xmm2 7149; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7150; SSE3-NEXT: psadbw %xmm0, %xmm1 7151; SSE3-NEXT: packuswb %xmm2, %xmm1 7152; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7153; SSE3-NEXT: movdqa %xmm1, %xmm0 7154; SSE3-NEXT: retq 7155; 7156; SSSE3-LABEL: ugt_5_v4i32: 7157; SSSE3: # %bb.0: 7158; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7159; SSSE3-NEXT: movdqa %xmm0, %xmm3 7160; SSSE3-NEXT: pand %xmm2, %xmm3 7161; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7162; SSSE3-NEXT: movdqa %xmm1, %xmm4 7163; SSSE3-NEXT: pshufb %xmm3, %xmm4 7164; SSSE3-NEXT: psrlw $4, %xmm0 7165; SSSE3-NEXT: pand %xmm2, %xmm0 7166; SSSE3-NEXT: pshufb %xmm0, %xmm1 7167; SSSE3-NEXT: paddb %xmm4, %xmm1 7168; SSSE3-NEXT: pxor %xmm0, %xmm0 7169; SSSE3-NEXT: movdqa %xmm1, %xmm2 7170; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7171; SSSE3-NEXT: psadbw %xmm0, %xmm2 7172; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7173; SSSE3-NEXT: psadbw %xmm0, %xmm1 7174; SSSE3-NEXT: packuswb %xmm2, %xmm1 7175; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7176; SSSE3-NEXT: movdqa %xmm1, %xmm0 7177; SSSE3-NEXT: retq 7178; 7179; SSE41-LABEL: ugt_5_v4i32: 7180; SSE41: # %bb.0: 7181; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7182; SSE41-NEXT: movdqa %xmm0, %xmm2 7183; SSE41-NEXT: pand %xmm1, %xmm2 7184; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7185; SSE41-NEXT: movdqa %xmm3, %xmm4 7186; SSE41-NEXT: pshufb %xmm2, %xmm4 7187; SSE41-NEXT: psrlw $4, %xmm0 7188; SSE41-NEXT: pand %xmm1, %xmm0 7189; SSE41-NEXT: pshufb %xmm0, %xmm3 7190; SSE41-NEXT: paddb %xmm4, %xmm3 7191; SSE41-NEXT: pxor %xmm1, %xmm1 7192; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 7193; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 7194; SSE41-NEXT: psadbw %xmm1, %xmm3 7195; SSE41-NEXT: psadbw %xmm1, %xmm0 7196; SSE41-NEXT: packuswb %xmm3, %xmm0 7197; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 7198; SSE41-NEXT: retq 7199; 7200; AVX1-LABEL: ugt_5_v4i32: 7201; AVX1: # %bb.0: 7202; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7203; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 7204; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7205; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7206; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 7207; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 7208; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7209; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7210; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7211; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7212; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7213; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7214; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7215; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7216; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 7217; AVX1-NEXT: retq 7218; 7219; AVX2-LABEL: ugt_5_v4i32: 7220; AVX2: # %bb.0: 7221; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7222; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 7223; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7224; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7225; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 7226; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 7227; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7228; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7229; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 7230; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7231; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7232; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7233; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7234; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7235; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] 7236; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 7237; AVX2-NEXT: retq 7238; 7239; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i32: 7240; AVX512VPOPCNTDQ: # %bb.0: 7241; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7242; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 7243; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] 7244; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 7245; AVX512VPOPCNTDQ-NEXT: vzeroupper 7246; AVX512VPOPCNTDQ-NEXT: retq 7247; 7248; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i32: 7249; AVX512VPOPCNTDQVL: # %bb.0: 7250; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 7251; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 7252; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7253; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7254; AVX512VPOPCNTDQVL-NEXT: retq 7255; 7256; BITALG_NOVLX-LABEL: ugt_5_v4i32: 7257; BITALG_NOVLX: # %bb.0: 7258; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7259; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 7260; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 7261; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7262; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7263; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7264; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7265; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7266; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] 7267; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 7268; BITALG_NOVLX-NEXT: vzeroupper 7269; BITALG_NOVLX-NEXT: retq 7270; 7271; BITALG-LABEL: ugt_5_v4i32: 7272; BITALG: # %bb.0: 7273; BITALG-NEXT: vpopcntb %xmm0, %xmm0 7274; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 7275; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7276; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7277; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7278; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7279; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7280; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 7281; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7282; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7283; BITALG-NEXT: retq 7284 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 7285 %3 = icmp ugt <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5> 7286 %4 = sext <4 x i1> %3 to <4 x i32> 7287 ret <4 x i32> %4 7288} 7289 7290define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { 7291; SSE2-LABEL: ult_6_v4i32: 7292; SSE2: # %bb.0: 7293; SSE2-NEXT: movdqa %xmm0, %xmm1 7294; SSE2-NEXT: psrlw $1, %xmm1 7295; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7296; SSE2-NEXT: psubb %xmm1, %xmm0 7297; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7298; SSE2-NEXT: movdqa %xmm0, %xmm2 7299; SSE2-NEXT: pand %xmm1, %xmm2 7300; SSE2-NEXT: psrlw $2, %xmm0 7301; SSE2-NEXT: pand %xmm1, %xmm0 7302; SSE2-NEXT: paddb %xmm2, %xmm0 7303; SSE2-NEXT: movdqa %xmm0, %xmm1 7304; SSE2-NEXT: psrlw $4, %xmm1 7305; SSE2-NEXT: paddb %xmm0, %xmm1 7306; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7307; SSE2-NEXT: pxor %xmm0, %xmm0 7308; SSE2-NEXT: movdqa %xmm1, %xmm2 7309; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7310; SSE2-NEXT: psadbw %xmm0, %xmm2 7311; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7312; SSE2-NEXT: psadbw %xmm0, %xmm1 7313; SSE2-NEXT: packuswb %xmm2, %xmm1 7314; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] 7315; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 7316; SSE2-NEXT: retq 7317; 7318; SSE3-LABEL: ult_6_v4i32: 7319; SSE3: # %bb.0: 7320; SSE3-NEXT: movdqa %xmm0, %xmm1 7321; SSE3-NEXT: psrlw $1, %xmm1 7322; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7323; SSE3-NEXT: psubb %xmm1, %xmm0 7324; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7325; SSE3-NEXT: movdqa %xmm0, %xmm2 7326; SSE3-NEXT: pand %xmm1, %xmm2 7327; SSE3-NEXT: psrlw $2, %xmm0 7328; SSE3-NEXT: pand %xmm1, %xmm0 7329; SSE3-NEXT: paddb %xmm2, %xmm0 7330; SSE3-NEXT: movdqa %xmm0, %xmm1 7331; SSE3-NEXT: psrlw $4, %xmm1 7332; SSE3-NEXT: paddb %xmm0, %xmm1 7333; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7334; SSE3-NEXT: pxor %xmm0, %xmm0 7335; SSE3-NEXT: movdqa %xmm1, %xmm2 7336; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7337; SSE3-NEXT: psadbw %xmm0, %xmm2 7338; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7339; SSE3-NEXT: psadbw %xmm0, %xmm1 7340; SSE3-NEXT: packuswb %xmm2, %xmm1 7341; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] 7342; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 7343; SSE3-NEXT: retq 7344; 7345; SSSE3-LABEL: ult_6_v4i32: 7346; SSSE3: # %bb.0: 7347; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7348; SSSE3-NEXT: movdqa %xmm0, %xmm2 7349; SSSE3-NEXT: pand %xmm1, %xmm2 7350; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7351; SSSE3-NEXT: movdqa %xmm3, %xmm4 7352; SSSE3-NEXT: pshufb %xmm2, %xmm4 7353; SSSE3-NEXT: psrlw $4, %xmm0 7354; SSSE3-NEXT: pand %xmm1, %xmm0 7355; SSSE3-NEXT: pshufb %xmm0, %xmm3 7356; SSSE3-NEXT: paddb %xmm4, %xmm3 7357; SSSE3-NEXT: pxor %xmm0, %xmm0 7358; SSSE3-NEXT: movdqa %xmm3, %xmm1 7359; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 7360; SSSE3-NEXT: psadbw %xmm0, %xmm1 7361; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 7362; SSSE3-NEXT: psadbw %xmm0, %xmm3 7363; SSSE3-NEXT: packuswb %xmm1, %xmm3 7364; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] 7365; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 7366; SSSE3-NEXT: retq 7367; 7368; SSE41-LABEL: ult_6_v4i32: 7369; SSE41: # %bb.0: 7370; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7371; SSE41-NEXT: movdqa %xmm0, %xmm2 7372; SSE41-NEXT: pand %xmm1, %xmm2 7373; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7374; SSE41-NEXT: movdqa %xmm3, %xmm4 7375; SSE41-NEXT: pshufb %xmm2, %xmm4 7376; SSE41-NEXT: psrlw $4, %xmm0 7377; SSE41-NEXT: pand %xmm1, %xmm0 7378; SSE41-NEXT: pshufb %xmm0, %xmm3 7379; SSE41-NEXT: paddb %xmm4, %xmm3 7380; SSE41-NEXT: pxor %xmm0, %xmm0 7381; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 7382; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 7383; SSE41-NEXT: psadbw %xmm0, %xmm3 7384; SSE41-NEXT: psadbw %xmm0, %xmm1 7385; SSE41-NEXT: packuswb %xmm3, %xmm1 7386; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] 7387; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 7388; SSE41-NEXT: retq 7389; 7390; AVX1-LABEL: ult_6_v4i32: 7391; AVX1: # %bb.0: 7392; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7393; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 7394; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7395; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7396; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 7397; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 7398; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7399; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7400; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7401; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7402; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7403; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7404; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7405; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7406; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6] 7407; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7408; AVX1-NEXT: retq 7409; 7410; AVX2-LABEL: ult_6_v4i32: 7411; AVX2: # %bb.0: 7412; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7413; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 7414; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7415; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7416; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 7417; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 7418; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7419; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7420; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 7421; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7422; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7423; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7424; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7425; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7426; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] 7427; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7428; AVX2-NEXT: retq 7429; 7430; AVX512VPOPCNTDQ-LABEL: ult_6_v4i32: 7431; AVX512VPOPCNTDQ: # %bb.0: 7432; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7433; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 7434; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] 7435; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7436; AVX512VPOPCNTDQ-NEXT: vzeroupper 7437; AVX512VPOPCNTDQ-NEXT: retq 7438; 7439; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i32: 7440; AVX512VPOPCNTDQVL: # %bb.0: 7441; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 7442; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 7443; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7444; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7445; AVX512VPOPCNTDQVL-NEXT: retq 7446; 7447; BITALG_NOVLX-LABEL: ult_6_v4i32: 7448; BITALG_NOVLX: # %bb.0: 7449; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7450; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 7451; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 7452; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7453; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7454; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7455; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7456; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7457; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] 7458; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7459; BITALG_NOVLX-NEXT: vzeroupper 7460; BITALG_NOVLX-NEXT: retq 7461; 7462; BITALG-LABEL: ult_6_v4i32: 7463; BITALG: # %bb.0: 7464; BITALG-NEXT: vpopcntb %xmm0, %xmm0 7465; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 7466; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7467; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7468; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7469; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7470; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7471; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 7472; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7473; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7474; BITALG-NEXT: retq 7475 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 7476 %3 = icmp ult <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6> 7477 %4 = sext <4 x i1> %3 to <4 x i32> 7478 ret <4 x i32> %4 7479} 7480 7481define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { 7482; SSE2-LABEL: ugt_6_v4i32: 7483; SSE2: # %bb.0: 7484; SSE2-NEXT: movdqa %xmm0, %xmm1 7485; SSE2-NEXT: psrlw $1, %xmm1 7486; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7487; SSE2-NEXT: psubb %xmm1, %xmm0 7488; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7489; SSE2-NEXT: movdqa %xmm0, %xmm2 7490; SSE2-NEXT: pand %xmm1, %xmm2 7491; SSE2-NEXT: psrlw $2, %xmm0 7492; SSE2-NEXT: pand %xmm1, %xmm0 7493; SSE2-NEXT: paddb %xmm2, %xmm0 7494; SSE2-NEXT: movdqa %xmm0, %xmm1 7495; SSE2-NEXT: psrlw $4, %xmm1 7496; SSE2-NEXT: paddb %xmm0, %xmm1 7497; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7498; SSE2-NEXT: pxor %xmm0, %xmm0 7499; SSE2-NEXT: movdqa %xmm1, %xmm2 7500; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7501; SSE2-NEXT: psadbw %xmm0, %xmm2 7502; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7503; SSE2-NEXT: psadbw %xmm0, %xmm1 7504; SSE2-NEXT: packuswb %xmm2, %xmm1 7505; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7506; SSE2-NEXT: movdqa %xmm1, %xmm0 7507; SSE2-NEXT: retq 7508; 7509; SSE3-LABEL: ugt_6_v4i32: 7510; SSE3: # %bb.0: 7511; SSE3-NEXT: movdqa %xmm0, %xmm1 7512; SSE3-NEXT: psrlw $1, %xmm1 7513; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7514; SSE3-NEXT: psubb %xmm1, %xmm0 7515; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7516; SSE3-NEXT: movdqa %xmm0, %xmm2 7517; SSE3-NEXT: pand %xmm1, %xmm2 7518; SSE3-NEXT: psrlw $2, %xmm0 7519; SSE3-NEXT: pand %xmm1, %xmm0 7520; SSE3-NEXT: paddb %xmm2, %xmm0 7521; SSE3-NEXT: movdqa %xmm0, %xmm1 7522; SSE3-NEXT: psrlw $4, %xmm1 7523; SSE3-NEXT: paddb %xmm0, %xmm1 7524; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7525; SSE3-NEXT: pxor %xmm0, %xmm0 7526; SSE3-NEXT: movdqa %xmm1, %xmm2 7527; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7528; SSE3-NEXT: psadbw %xmm0, %xmm2 7529; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7530; SSE3-NEXT: psadbw %xmm0, %xmm1 7531; SSE3-NEXT: packuswb %xmm2, %xmm1 7532; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7533; SSE3-NEXT: movdqa %xmm1, %xmm0 7534; SSE3-NEXT: retq 7535; 7536; SSSE3-LABEL: ugt_6_v4i32: 7537; SSSE3: # %bb.0: 7538; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7539; SSSE3-NEXT: movdqa %xmm0, %xmm3 7540; SSSE3-NEXT: pand %xmm2, %xmm3 7541; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7542; SSSE3-NEXT: movdqa %xmm1, %xmm4 7543; SSSE3-NEXT: pshufb %xmm3, %xmm4 7544; SSSE3-NEXT: psrlw $4, %xmm0 7545; SSSE3-NEXT: pand %xmm2, %xmm0 7546; SSSE3-NEXT: pshufb %xmm0, %xmm1 7547; SSSE3-NEXT: paddb %xmm4, %xmm1 7548; SSSE3-NEXT: pxor %xmm0, %xmm0 7549; SSSE3-NEXT: movdqa %xmm1, %xmm2 7550; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7551; SSSE3-NEXT: psadbw %xmm0, %xmm2 7552; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7553; SSSE3-NEXT: psadbw %xmm0, %xmm1 7554; SSSE3-NEXT: packuswb %xmm2, %xmm1 7555; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7556; SSSE3-NEXT: movdqa %xmm1, %xmm0 7557; SSSE3-NEXT: retq 7558; 7559; SSE41-LABEL: ugt_6_v4i32: 7560; SSE41: # %bb.0: 7561; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7562; SSE41-NEXT: movdqa %xmm0, %xmm2 7563; SSE41-NEXT: pand %xmm1, %xmm2 7564; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7565; SSE41-NEXT: movdqa %xmm3, %xmm4 7566; SSE41-NEXT: pshufb %xmm2, %xmm4 7567; SSE41-NEXT: psrlw $4, %xmm0 7568; SSE41-NEXT: pand %xmm1, %xmm0 7569; SSE41-NEXT: pshufb %xmm0, %xmm3 7570; SSE41-NEXT: paddb %xmm4, %xmm3 7571; SSE41-NEXT: pxor %xmm1, %xmm1 7572; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 7573; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 7574; SSE41-NEXT: psadbw %xmm1, %xmm3 7575; SSE41-NEXT: psadbw %xmm1, %xmm0 7576; SSE41-NEXT: packuswb %xmm3, %xmm0 7577; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 7578; SSE41-NEXT: retq 7579; 7580; AVX1-LABEL: ugt_6_v4i32: 7581; AVX1: # %bb.0: 7582; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7583; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 7584; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7585; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7586; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 7587; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 7588; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7589; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7590; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7591; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7592; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7593; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7594; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7595; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7596; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 7597; AVX1-NEXT: retq 7598; 7599; AVX2-LABEL: ugt_6_v4i32: 7600; AVX2: # %bb.0: 7601; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7602; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 7603; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7604; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7605; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 7606; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 7607; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7608; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7609; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 7610; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7611; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7612; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7613; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7614; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7615; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] 7616; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 7617; AVX2-NEXT: retq 7618; 7619; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i32: 7620; AVX512VPOPCNTDQ: # %bb.0: 7621; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7622; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 7623; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] 7624; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 7625; AVX512VPOPCNTDQ-NEXT: vzeroupper 7626; AVX512VPOPCNTDQ-NEXT: retq 7627; 7628; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i32: 7629; AVX512VPOPCNTDQVL: # %bb.0: 7630; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 7631; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 7632; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7633; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7634; AVX512VPOPCNTDQVL-NEXT: retq 7635; 7636; BITALG_NOVLX-LABEL: ugt_6_v4i32: 7637; BITALG_NOVLX: # %bb.0: 7638; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7639; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 7640; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 7641; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7642; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7643; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7644; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7645; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7646; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] 7647; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 7648; BITALG_NOVLX-NEXT: vzeroupper 7649; BITALG_NOVLX-NEXT: retq 7650; 7651; BITALG-LABEL: ugt_6_v4i32: 7652; BITALG: # %bb.0: 7653; BITALG-NEXT: vpopcntb %xmm0, %xmm0 7654; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 7655; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7656; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7657; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7658; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7659; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7660; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 7661; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7662; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7663; BITALG-NEXT: retq 7664 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 7665 %3 = icmp ugt <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6> 7666 %4 = sext <4 x i1> %3 to <4 x i32> 7667 ret <4 x i32> %4 7668} 7669 7670define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { 7671; SSE2-LABEL: ult_7_v4i32: 7672; SSE2: # %bb.0: 7673; SSE2-NEXT: movdqa %xmm0, %xmm1 7674; SSE2-NEXT: psrlw $1, %xmm1 7675; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7676; SSE2-NEXT: psubb %xmm1, %xmm0 7677; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7678; SSE2-NEXT: movdqa %xmm0, %xmm2 7679; SSE2-NEXT: pand %xmm1, %xmm2 7680; SSE2-NEXT: psrlw $2, %xmm0 7681; SSE2-NEXT: pand %xmm1, %xmm0 7682; SSE2-NEXT: paddb %xmm2, %xmm0 7683; SSE2-NEXT: movdqa %xmm0, %xmm1 7684; SSE2-NEXT: psrlw $4, %xmm1 7685; SSE2-NEXT: paddb %xmm0, %xmm1 7686; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7687; SSE2-NEXT: pxor %xmm0, %xmm0 7688; SSE2-NEXT: movdqa %xmm1, %xmm2 7689; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7690; SSE2-NEXT: psadbw %xmm0, %xmm2 7691; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7692; SSE2-NEXT: psadbw %xmm0, %xmm1 7693; SSE2-NEXT: packuswb %xmm2, %xmm1 7694; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] 7695; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 7696; SSE2-NEXT: retq 7697; 7698; SSE3-LABEL: ult_7_v4i32: 7699; SSE3: # %bb.0: 7700; SSE3-NEXT: movdqa %xmm0, %xmm1 7701; SSE3-NEXT: psrlw $1, %xmm1 7702; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7703; SSE3-NEXT: psubb %xmm1, %xmm0 7704; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7705; SSE3-NEXT: movdqa %xmm0, %xmm2 7706; SSE3-NEXT: pand %xmm1, %xmm2 7707; SSE3-NEXT: psrlw $2, %xmm0 7708; SSE3-NEXT: pand %xmm1, %xmm0 7709; SSE3-NEXT: paddb %xmm2, %xmm0 7710; SSE3-NEXT: movdqa %xmm0, %xmm1 7711; SSE3-NEXT: psrlw $4, %xmm1 7712; SSE3-NEXT: paddb %xmm0, %xmm1 7713; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7714; SSE3-NEXT: pxor %xmm0, %xmm0 7715; SSE3-NEXT: movdqa %xmm1, %xmm2 7716; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7717; SSE3-NEXT: psadbw %xmm0, %xmm2 7718; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7719; SSE3-NEXT: psadbw %xmm0, %xmm1 7720; SSE3-NEXT: packuswb %xmm2, %xmm1 7721; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] 7722; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 7723; SSE3-NEXT: retq 7724; 7725; SSSE3-LABEL: ult_7_v4i32: 7726; SSSE3: # %bb.0: 7727; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7728; SSSE3-NEXT: movdqa %xmm0, %xmm2 7729; SSSE3-NEXT: pand %xmm1, %xmm2 7730; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7731; SSSE3-NEXT: movdqa %xmm3, %xmm4 7732; SSSE3-NEXT: pshufb %xmm2, %xmm4 7733; SSSE3-NEXT: psrlw $4, %xmm0 7734; SSSE3-NEXT: pand %xmm1, %xmm0 7735; SSSE3-NEXT: pshufb %xmm0, %xmm3 7736; SSSE3-NEXT: paddb %xmm4, %xmm3 7737; SSSE3-NEXT: pxor %xmm0, %xmm0 7738; SSSE3-NEXT: movdqa %xmm3, %xmm1 7739; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 7740; SSSE3-NEXT: psadbw %xmm0, %xmm1 7741; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 7742; SSSE3-NEXT: psadbw %xmm0, %xmm3 7743; SSSE3-NEXT: packuswb %xmm1, %xmm3 7744; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] 7745; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 7746; SSSE3-NEXT: retq 7747; 7748; SSE41-LABEL: ult_7_v4i32: 7749; SSE41: # %bb.0: 7750; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7751; SSE41-NEXT: movdqa %xmm0, %xmm2 7752; SSE41-NEXT: pand %xmm1, %xmm2 7753; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7754; SSE41-NEXT: movdqa %xmm3, %xmm4 7755; SSE41-NEXT: pshufb %xmm2, %xmm4 7756; SSE41-NEXT: psrlw $4, %xmm0 7757; SSE41-NEXT: pand %xmm1, %xmm0 7758; SSE41-NEXT: pshufb %xmm0, %xmm3 7759; SSE41-NEXT: paddb %xmm4, %xmm3 7760; SSE41-NEXT: pxor %xmm0, %xmm0 7761; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 7762; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 7763; SSE41-NEXT: psadbw %xmm0, %xmm3 7764; SSE41-NEXT: psadbw %xmm0, %xmm1 7765; SSE41-NEXT: packuswb %xmm3, %xmm1 7766; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] 7767; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 7768; SSE41-NEXT: retq 7769; 7770; AVX1-LABEL: ult_7_v4i32: 7771; AVX1: # %bb.0: 7772; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7773; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 7774; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7775; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7776; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 7777; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 7778; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7779; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7780; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7781; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7782; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7783; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7784; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7785; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7786; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] 7787; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7788; AVX1-NEXT: retq 7789; 7790; AVX2-LABEL: ult_7_v4i32: 7791; AVX2: # %bb.0: 7792; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7793; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 7794; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7795; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7796; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 7797; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 7798; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7799; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7800; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 7801; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7802; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7803; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7804; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7805; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7806; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 7807; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7808; AVX2-NEXT: retq 7809; 7810; AVX512VPOPCNTDQ-LABEL: ult_7_v4i32: 7811; AVX512VPOPCNTDQ: # %bb.0: 7812; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7813; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 7814; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 7815; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7816; AVX512VPOPCNTDQ-NEXT: vzeroupper 7817; AVX512VPOPCNTDQ-NEXT: retq 7818; 7819; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i32: 7820; AVX512VPOPCNTDQVL: # %bb.0: 7821; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 7822; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 7823; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7824; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7825; AVX512VPOPCNTDQVL-NEXT: retq 7826; 7827; BITALG_NOVLX-LABEL: ult_7_v4i32: 7828; BITALG_NOVLX: # %bb.0: 7829; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7830; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 7831; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 7832; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7833; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7834; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7835; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7836; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7837; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 7838; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 7839; BITALG_NOVLX-NEXT: vzeroupper 7840; BITALG_NOVLX-NEXT: retq 7841; 7842; BITALG-LABEL: ult_7_v4i32: 7843; BITALG: # %bb.0: 7844; BITALG-NEXT: vpopcntb %xmm0, %xmm0 7845; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 7846; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7847; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7848; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7849; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7850; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7851; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 7852; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 7853; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 7854; BITALG-NEXT: retq 7855 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 7856 %3 = icmp ult <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7> 7857 %4 = sext <4 x i1> %3 to <4 x i32> 7858 ret <4 x i32> %4 7859} 7860 7861define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { 7862; SSE2-LABEL: ugt_7_v4i32: 7863; SSE2: # %bb.0: 7864; SSE2-NEXT: movdqa %xmm0, %xmm1 7865; SSE2-NEXT: psrlw $1, %xmm1 7866; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7867; SSE2-NEXT: psubb %xmm1, %xmm0 7868; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7869; SSE2-NEXT: movdqa %xmm0, %xmm2 7870; SSE2-NEXT: pand %xmm1, %xmm2 7871; SSE2-NEXT: psrlw $2, %xmm0 7872; SSE2-NEXT: pand %xmm1, %xmm0 7873; SSE2-NEXT: paddb %xmm2, %xmm0 7874; SSE2-NEXT: movdqa %xmm0, %xmm1 7875; SSE2-NEXT: psrlw $4, %xmm1 7876; SSE2-NEXT: paddb %xmm0, %xmm1 7877; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 7878; SSE2-NEXT: pxor %xmm0, %xmm0 7879; SSE2-NEXT: movdqa %xmm1, %xmm2 7880; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7881; SSE2-NEXT: psadbw %xmm0, %xmm2 7882; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7883; SSE2-NEXT: psadbw %xmm0, %xmm1 7884; SSE2-NEXT: packuswb %xmm2, %xmm1 7885; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7886; SSE2-NEXT: movdqa %xmm1, %xmm0 7887; SSE2-NEXT: retq 7888; 7889; SSE3-LABEL: ugt_7_v4i32: 7890; SSE3: # %bb.0: 7891; SSE3-NEXT: movdqa %xmm0, %xmm1 7892; SSE3-NEXT: psrlw $1, %xmm1 7893; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7894; SSE3-NEXT: psubb %xmm1, %xmm0 7895; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 7896; SSE3-NEXT: movdqa %xmm0, %xmm2 7897; SSE3-NEXT: pand %xmm1, %xmm2 7898; SSE3-NEXT: psrlw $2, %xmm0 7899; SSE3-NEXT: pand %xmm1, %xmm0 7900; SSE3-NEXT: paddb %xmm2, %xmm0 7901; SSE3-NEXT: movdqa %xmm0, %xmm1 7902; SSE3-NEXT: psrlw $4, %xmm1 7903; SSE3-NEXT: paddb %xmm0, %xmm1 7904; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 7905; SSE3-NEXT: pxor %xmm0, %xmm0 7906; SSE3-NEXT: movdqa %xmm1, %xmm2 7907; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7908; SSE3-NEXT: psadbw %xmm0, %xmm2 7909; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7910; SSE3-NEXT: psadbw %xmm0, %xmm1 7911; SSE3-NEXT: packuswb %xmm2, %xmm1 7912; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7913; SSE3-NEXT: movdqa %xmm1, %xmm0 7914; SSE3-NEXT: retq 7915; 7916; SSSE3-LABEL: ugt_7_v4i32: 7917; SSSE3: # %bb.0: 7918; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7919; SSSE3-NEXT: movdqa %xmm0, %xmm3 7920; SSSE3-NEXT: pand %xmm2, %xmm3 7921; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7922; SSSE3-NEXT: movdqa %xmm1, %xmm4 7923; SSSE3-NEXT: pshufb %xmm3, %xmm4 7924; SSSE3-NEXT: psrlw $4, %xmm0 7925; SSSE3-NEXT: pand %xmm2, %xmm0 7926; SSSE3-NEXT: pshufb %xmm0, %xmm1 7927; SSSE3-NEXT: paddb %xmm4, %xmm1 7928; SSSE3-NEXT: pxor %xmm0, %xmm0 7929; SSSE3-NEXT: movdqa %xmm1, %xmm2 7930; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 7931; SSSE3-NEXT: psadbw %xmm0, %xmm2 7932; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7933; SSSE3-NEXT: psadbw %xmm0, %xmm1 7934; SSSE3-NEXT: packuswb %xmm2, %xmm1 7935; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 7936; SSSE3-NEXT: movdqa %xmm1, %xmm0 7937; SSSE3-NEXT: retq 7938; 7939; SSE41-LABEL: ugt_7_v4i32: 7940; SSE41: # %bb.0: 7941; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7942; SSE41-NEXT: movdqa %xmm0, %xmm2 7943; SSE41-NEXT: pand %xmm1, %xmm2 7944; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7945; SSE41-NEXT: movdqa %xmm3, %xmm4 7946; SSE41-NEXT: pshufb %xmm2, %xmm4 7947; SSE41-NEXT: psrlw $4, %xmm0 7948; SSE41-NEXT: pand %xmm1, %xmm0 7949; SSE41-NEXT: pshufb %xmm0, %xmm3 7950; SSE41-NEXT: paddb %xmm4, %xmm3 7951; SSE41-NEXT: pxor %xmm1, %xmm1 7952; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 7953; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 7954; SSE41-NEXT: psadbw %xmm1, %xmm3 7955; SSE41-NEXT: psadbw %xmm1, %xmm0 7956; SSE41-NEXT: packuswb %xmm3, %xmm0 7957; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 7958; SSE41-NEXT: retq 7959; 7960; AVX1-LABEL: ugt_7_v4i32: 7961; AVX1: # %bb.0: 7962; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7963; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 7964; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7965; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7966; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 7967; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 7968; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7969; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7970; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7971; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7972; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7973; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7974; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7975; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7976; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 7977; AVX1-NEXT: retq 7978; 7979; AVX2-LABEL: ugt_7_v4i32: 7980; AVX2: # %bb.0: 7981; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 7982; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 7983; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 7984; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 7985; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 7986; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 7987; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 7988; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 7989; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 7990; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7991; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 7992; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7993; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 7994; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 7995; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 7996; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 7997; AVX2-NEXT: retq 7998; 7999; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i32: 8000; AVX512VPOPCNTDQ: # %bb.0: 8001; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8002; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 8003; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 8004; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8005; AVX512VPOPCNTDQ-NEXT: vzeroupper 8006; AVX512VPOPCNTDQ-NEXT: retq 8007; 8008; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i32: 8009; AVX512VPOPCNTDQVL: # %bb.0: 8010; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 8011; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 8012; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8013; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8014; AVX512VPOPCNTDQVL-NEXT: retq 8015; 8016; BITALG_NOVLX-LABEL: ugt_7_v4i32: 8017; BITALG_NOVLX: # %bb.0: 8018; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8019; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 8020; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 8021; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8022; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8023; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8024; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8025; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8026; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 8027; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8028; BITALG_NOVLX-NEXT: vzeroupper 8029; BITALG_NOVLX-NEXT: retq 8030; 8031; BITALG-LABEL: ugt_7_v4i32: 8032; BITALG: # %bb.0: 8033; BITALG-NEXT: vpopcntb %xmm0, %xmm0 8034; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 8035; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8036; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8037; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8038; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8039; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8040; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 8041; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8042; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8043; BITALG-NEXT: retq 8044 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 8045 %3 = icmp ugt <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7> 8046 %4 = sext <4 x i1> %3 to <4 x i32> 8047 ret <4 x i32> %4 8048} 8049 8050define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { 8051; SSE2-LABEL: ult_8_v4i32: 8052; SSE2: # %bb.0: 8053; SSE2-NEXT: movdqa %xmm0, %xmm1 8054; SSE2-NEXT: psrlw $1, %xmm1 8055; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8056; SSE2-NEXT: psubb %xmm1, %xmm0 8057; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8058; SSE2-NEXT: movdqa %xmm0, %xmm2 8059; SSE2-NEXT: pand %xmm1, %xmm2 8060; SSE2-NEXT: psrlw $2, %xmm0 8061; SSE2-NEXT: pand %xmm1, %xmm0 8062; SSE2-NEXT: paddb %xmm2, %xmm0 8063; SSE2-NEXT: movdqa %xmm0, %xmm1 8064; SSE2-NEXT: psrlw $4, %xmm1 8065; SSE2-NEXT: paddb %xmm0, %xmm1 8066; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8067; SSE2-NEXT: pxor %xmm0, %xmm0 8068; SSE2-NEXT: movdqa %xmm1, %xmm2 8069; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8070; SSE2-NEXT: psadbw %xmm0, %xmm2 8071; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8072; SSE2-NEXT: psadbw %xmm0, %xmm1 8073; SSE2-NEXT: packuswb %xmm2, %xmm1 8074; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] 8075; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 8076; SSE2-NEXT: retq 8077; 8078; SSE3-LABEL: ult_8_v4i32: 8079; SSE3: # %bb.0: 8080; SSE3-NEXT: movdqa %xmm0, %xmm1 8081; SSE3-NEXT: psrlw $1, %xmm1 8082; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8083; SSE3-NEXT: psubb %xmm1, %xmm0 8084; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8085; SSE3-NEXT: movdqa %xmm0, %xmm2 8086; SSE3-NEXT: pand %xmm1, %xmm2 8087; SSE3-NEXT: psrlw $2, %xmm0 8088; SSE3-NEXT: pand %xmm1, %xmm0 8089; SSE3-NEXT: paddb %xmm2, %xmm0 8090; SSE3-NEXT: movdqa %xmm0, %xmm1 8091; SSE3-NEXT: psrlw $4, %xmm1 8092; SSE3-NEXT: paddb %xmm0, %xmm1 8093; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8094; SSE3-NEXT: pxor %xmm0, %xmm0 8095; SSE3-NEXT: movdqa %xmm1, %xmm2 8096; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8097; SSE3-NEXT: psadbw %xmm0, %xmm2 8098; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8099; SSE3-NEXT: psadbw %xmm0, %xmm1 8100; SSE3-NEXT: packuswb %xmm2, %xmm1 8101; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] 8102; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 8103; SSE3-NEXT: retq 8104; 8105; SSSE3-LABEL: ult_8_v4i32: 8106; SSSE3: # %bb.0: 8107; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8108; SSSE3-NEXT: movdqa %xmm0, %xmm2 8109; SSSE3-NEXT: pand %xmm1, %xmm2 8110; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8111; SSSE3-NEXT: movdqa %xmm3, %xmm4 8112; SSSE3-NEXT: pshufb %xmm2, %xmm4 8113; SSSE3-NEXT: psrlw $4, %xmm0 8114; SSSE3-NEXT: pand %xmm1, %xmm0 8115; SSSE3-NEXT: pshufb %xmm0, %xmm3 8116; SSSE3-NEXT: paddb %xmm4, %xmm3 8117; SSSE3-NEXT: pxor %xmm0, %xmm0 8118; SSSE3-NEXT: movdqa %xmm3, %xmm1 8119; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 8120; SSSE3-NEXT: psadbw %xmm0, %xmm1 8121; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 8122; SSSE3-NEXT: psadbw %xmm0, %xmm3 8123; SSSE3-NEXT: packuswb %xmm1, %xmm3 8124; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] 8125; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 8126; SSSE3-NEXT: retq 8127; 8128; SSE41-LABEL: ult_8_v4i32: 8129; SSE41: # %bb.0: 8130; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8131; SSE41-NEXT: movdqa %xmm0, %xmm2 8132; SSE41-NEXT: pand %xmm1, %xmm2 8133; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8134; SSE41-NEXT: movdqa %xmm3, %xmm4 8135; SSE41-NEXT: pshufb %xmm2, %xmm4 8136; SSE41-NEXT: psrlw $4, %xmm0 8137; SSE41-NEXT: pand %xmm1, %xmm0 8138; SSE41-NEXT: pshufb %xmm0, %xmm3 8139; SSE41-NEXT: paddb %xmm4, %xmm3 8140; SSE41-NEXT: pxor %xmm0, %xmm0 8141; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 8142; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 8143; SSE41-NEXT: psadbw %xmm0, %xmm3 8144; SSE41-NEXT: psadbw %xmm0, %xmm1 8145; SSE41-NEXT: packuswb %xmm3, %xmm1 8146; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] 8147; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 8148; SSE41-NEXT: retq 8149; 8150; AVX1-LABEL: ult_8_v4i32: 8151; AVX1: # %bb.0: 8152; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8153; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 8154; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8155; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8156; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 8157; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 8158; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8159; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8160; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 8161; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8162; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8163; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8164; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8165; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8166; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8] 8167; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8168; AVX1-NEXT: retq 8169; 8170; AVX2-LABEL: ult_8_v4i32: 8171; AVX2: # %bb.0: 8172; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8173; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 8174; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8175; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8176; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 8177; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 8178; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8179; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8180; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 8181; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8182; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8183; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8184; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8185; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8186; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] 8187; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8188; AVX2-NEXT: retq 8189; 8190; AVX512VPOPCNTDQ-LABEL: ult_8_v4i32: 8191; AVX512VPOPCNTDQ: # %bb.0: 8192; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8193; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 8194; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] 8195; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8196; AVX512VPOPCNTDQ-NEXT: vzeroupper 8197; AVX512VPOPCNTDQ-NEXT: retq 8198; 8199; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i32: 8200; AVX512VPOPCNTDQVL: # %bb.0: 8201; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 8202; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 8203; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8204; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8205; AVX512VPOPCNTDQVL-NEXT: retq 8206; 8207; BITALG_NOVLX-LABEL: ult_8_v4i32: 8208; BITALG_NOVLX: # %bb.0: 8209; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8210; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 8211; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 8212; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8213; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8214; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8215; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8216; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8217; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] 8218; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8219; BITALG_NOVLX-NEXT: vzeroupper 8220; BITALG_NOVLX-NEXT: retq 8221; 8222; BITALG-LABEL: ult_8_v4i32: 8223; BITALG: # %bb.0: 8224; BITALG-NEXT: vpopcntb %xmm0, %xmm0 8225; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 8226; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8227; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8228; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8229; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8230; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8231; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 8232; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8233; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8234; BITALG-NEXT: retq 8235 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 8236 %3 = icmp ult <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8> 8237 %4 = sext <4 x i1> %3 to <4 x i32> 8238 ret <4 x i32> %4 8239} 8240 8241define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { 8242; SSE2-LABEL: ugt_8_v4i32: 8243; SSE2: # %bb.0: 8244; SSE2-NEXT: movdqa %xmm0, %xmm1 8245; SSE2-NEXT: psrlw $1, %xmm1 8246; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8247; SSE2-NEXT: psubb %xmm1, %xmm0 8248; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8249; SSE2-NEXT: movdqa %xmm0, %xmm2 8250; SSE2-NEXT: pand %xmm1, %xmm2 8251; SSE2-NEXT: psrlw $2, %xmm0 8252; SSE2-NEXT: pand %xmm1, %xmm0 8253; SSE2-NEXT: paddb %xmm2, %xmm0 8254; SSE2-NEXT: movdqa %xmm0, %xmm1 8255; SSE2-NEXT: psrlw $4, %xmm1 8256; SSE2-NEXT: paddb %xmm0, %xmm1 8257; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8258; SSE2-NEXT: pxor %xmm0, %xmm0 8259; SSE2-NEXT: movdqa %xmm1, %xmm2 8260; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8261; SSE2-NEXT: psadbw %xmm0, %xmm2 8262; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8263; SSE2-NEXT: psadbw %xmm0, %xmm1 8264; SSE2-NEXT: packuswb %xmm2, %xmm1 8265; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 8266; SSE2-NEXT: movdqa %xmm1, %xmm0 8267; SSE2-NEXT: retq 8268; 8269; SSE3-LABEL: ugt_8_v4i32: 8270; SSE3: # %bb.0: 8271; SSE3-NEXT: movdqa %xmm0, %xmm1 8272; SSE3-NEXT: psrlw $1, %xmm1 8273; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8274; SSE3-NEXT: psubb %xmm1, %xmm0 8275; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8276; SSE3-NEXT: movdqa %xmm0, %xmm2 8277; SSE3-NEXT: pand %xmm1, %xmm2 8278; SSE3-NEXT: psrlw $2, %xmm0 8279; SSE3-NEXT: pand %xmm1, %xmm0 8280; SSE3-NEXT: paddb %xmm2, %xmm0 8281; SSE3-NEXT: movdqa %xmm0, %xmm1 8282; SSE3-NEXT: psrlw $4, %xmm1 8283; SSE3-NEXT: paddb %xmm0, %xmm1 8284; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8285; SSE3-NEXT: pxor %xmm0, %xmm0 8286; SSE3-NEXT: movdqa %xmm1, %xmm2 8287; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8288; SSE3-NEXT: psadbw %xmm0, %xmm2 8289; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8290; SSE3-NEXT: psadbw %xmm0, %xmm1 8291; SSE3-NEXT: packuswb %xmm2, %xmm1 8292; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 8293; SSE3-NEXT: movdqa %xmm1, %xmm0 8294; SSE3-NEXT: retq 8295; 8296; SSSE3-LABEL: ugt_8_v4i32: 8297; SSSE3: # %bb.0: 8298; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8299; SSSE3-NEXT: movdqa %xmm0, %xmm3 8300; SSSE3-NEXT: pand %xmm2, %xmm3 8301; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8302; SSSE3-NEXT: movdqa %xmm1, %xmm4 8303; SSSE3-NEXT: pshufb %xmm3, %xmm4 8304; SSSE3-NEXT: psrlw $4, %xmm0 8305; SSSE3-NEXT: pand %xmm2, %xmm0 8306; SSSE3-NEXT: pshufb %xmm0, %xmm1 8307; SSSE3-NEXT: paddb %xmm4, %xmm1 8308; SSSE3-NEXT: pxor %xmm0, %xmm0 8309; SSSE3-NEXT: movdqa %xmm1, %xmm2 8310; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8311; SSSE3-NEXT: psadbw %xmm0, %xmm2 8312; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8313; SSSE3-NEXT: psadbw %xmm0, %xmm1 8314; SSSE3-NEXT: packuswb %xmm2, %xmm1 8315; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 8316; SSSE3-NEXT: movdqa %xmm1, %xmm0 8317; SSSE3-NEXT: retq 8318; 8319; SSE41-LABEL: ugt_8_v4i32: 8320; SSE41: # %bb.0: 8321; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8322; SSE41-NEXT: movdqa %xmm0, %xmm2 8323; SSE41-NEXT: pand %xmm1, %xmm2 8324; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8325; SSE41-NEXT: movdqa %xmm3, %xmm4 8326; SSE41-NEXT: pshufb %xmm2, %xmm4 8327; SSE41-NEXT: psrlw $4, %xmm0 8328; SSE41-NEXT: pand %xmm1, %xmm0 8329; SSE41-NEXT: pshufb %xmm0, %xmm3 8330; SSE41-NEXT: paddb %xmm4, %xmm3 8331; SSE41-NEXT: pxor %xmm1, %xmm1 8332; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 8333; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 8334; SSE41-NEXT: psadbw %xmm1, %xmm3 8335; SSE41-NEXT: psadbw %xmm1, %xmm0 8336; SSE41-NEXT: packuswb %xmm3, %xmm0 8337; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 8338; SSE41-NEXT: retq 8339; 8340; AVX1-LABEL: ugt_8_v4i32: 8341; AVX1: # %bb.0: 8342; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8343; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 8344; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8345; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8346; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 8347; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 8348; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8349; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8350; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 8351; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8352; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8353; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8354; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8355; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8356; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 8357; AVX1-NEXT: retq 8358; 8359; AVX2-LABEL: ugt_8_v4i32: 8360; AVX2: # %bb.0: 8361; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8362; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 8363; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8364; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8365; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 8366; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 8367; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8368; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8369; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 8370; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8371; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8372; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8373; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8374; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8375; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] 8376; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8377; AVX2-NEXT: retq 8378; 8379; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i32: 8380; AVX512VPOPCNTDQ: # %bb.0: 8381; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8382; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 8383; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] 8384; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8385; AVX512VPOPCNTDQ-NEXT: vzeroupper 8386; AVX512VPOPCNTDQ-NEXT: retq 8387; 8388; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i32: 8389; AVX512VPOPCNTDQVL: # %bb.0: 8390; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 8391; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 8392; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8393; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8394; AVX512VPOPCNTDQVL-NEXT: retq 8395; 8396; BITALG_NOVLX-LABEL: ugt_8_v4i32: 8397; BITALG_NOVLX: # %bb.0: 8398; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8399; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 8400; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 8401; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8402; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8403; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8404; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8405; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8406; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] 8407; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8408; BITALG_NOVLX-NEXT: vzeroupper 8409; BITALG_NOVLX-NEXT: retq 8410; 8411; BITALG-LABEL: ugt_8_v4i32: 8412; BITALG: # %bb.0: 8413; BITALG-NEXT: vpopcntb %xmm0, %xmm0 8414; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 8415; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8416; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8417; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8418; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8419; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8420; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 8421; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8422; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8423; BITALG-NEXT: retq 8424 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 8425 %3 = icmp ugt <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8> 8426 %4 = sext <4 x i1> %3 to <4 x i32> 8427 ret <4 x i32> %4 8428} 8429 8430define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { 8431; SSE2-LABEL: ult_9_v4i32: 8432; SSE2: # %bb.0: 8433; SSE2-NEXT: movdqa %xmm0, %xmm1 8434; SSE2-NEXT: psrlw $1, %xmm1 8435; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8436; SSE2-NEXT: psubb %xmm1, %xmm0 8437; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8438; SSE2-NEXT: movdqa %xmm0, %xmm2 8439; SSE2-NEXT: pand %xmm1, %xmm2 8440; SSE2-NEXT: psrlw $2, %xmm0 8441; SSE2-NEXT: pand %xmm1, %xmm0 8442; SSE2-NEXT: paddb %xmm2, %xmm0 8443; SSE2-NEXT: movdqa %xmm0, %xmm1 8444; SSE2-NEXT: psrlw $4, %xmm1 8445; SSE2-NEXT: paddb %xmm0, %xmm1 8446; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8447; SSE2-NEXT: pxor %xmm0, %xmm0 8448; SSE2-NEXT: movdqa %xmm1, %xmm2 8449; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8450; SSE2-NEXT: psadbw %xmm0, %xmm2 8451; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8452; SSE2-NEXT: psadbw %xmm0, %xmm1 8453; SSE2-NEXT: packuswb %xmm2, %xmm1 8454; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] 8455; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 8456; SSE2-NEXT: retq 8457; 8458; SSE3-LABEL: ult_9_v4i32: 8459; SSE3: # %bb.0: 8460; SSE3-NEXT: movdqa %xmm0, %xmm1 8461; SSE3-NEXT: psrlw $1, %xmm1 8462; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8463; SSE3-NEXT: psubb %xmm1, %xmm0 8464; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8465; SSE3-NEXT: movdqa %xmm0, %xmm2 8466; SSE3-NEXT: pand %xmm1, %xmm2 8467; SSE3-NEXT: psrlw $2, %xmm0 8468; SSE3-NEXT: pand %xmm1, %xmm0 8469; SSE3-NEXT: paddb %xmm2, %xmm0 8470; SSE3-NEXT: movdqa %xmm0, %xmm1 8471; SSE3-NEXT: psrlw $4, %xmm1 8472; SSE3-NEXT: paddb %xmm0, %xmm1 8473; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8474; SSE3-NEXT: pxor %xmm0, %xmm0 8475; SSE3-NEXT: movdqa %xmm1, %xmm2 8476; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8477; SSE3-NEXT: psadbw %xmm0, %xmm2 8478; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8479; SSE3-NEXT: psadbw %xmm0, %xmm1 8480; SSE3-NEXT: packuswb %xmm2, %xmm1 8481; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] 8482; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 8483; SSE3-NEXT: retq 8484; 8485; SSSE3-LABEL: ult_9_v4i32: 8486; SSSE3: # %bb.0: 8487; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8488; SSSE3-NEXT: movdqa %xmm0, %xmm2 8489; SSSE3-NEXT: pand %xmm1, %xmm2 8490; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8491; SSSE3-NEXT: movdqa %xmm3, %xmm4 8492; SSSE3-NEXT: pshufb %xmm2, %xmm4 8493; SSSE3-NEXT: psrlw $4, %xmm0 8494; SSSE3-NEXT: pand %xmm1, %xmm0 8495; SSSE3-NEXT: pshufb %xmm0, %xmm3 8496; SSSE3-NEXT: paddb %xmm4, %xmm3 8497; SSSE3-NEXT: pxor %xmm0, %xmm0 8498; SSSE3-NEXT: movdqa %xmm3, %xmm1 8499; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 8500; SSSE3-NEXT: psadbw %xmm0, %xmm1 8501; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 8502; SSSE3-NEXT: psadbw %xmm0, %xmm3 8503; SSSE3-NEXT: packuswb %xmm1, %xmm3 8504; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] 8505; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 8506; SSSE3-NEXT: retq 8507; 8508; SSE41-LABEL: ult_9_v4i32: 8509; SSE41: # %bb.0: 8510; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8511; SSE41-NEXT: movdqa %xmm0, %xmm2 8512; SSE41-NEXT: pand %xmm1, %xmm2 8513; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8514; SSE41-NEXT: movdqa %xmm3, %xmm4 8515; SSE41-NEXT: pshufb %xmm2, %xmm4 8516; SSE41-NEXT: psrlw $4, %xmm0 8517; SSE41-NEXT: pand %xmm1, %xmm0 8518; SSE41-NEXT: pshufb %xmm0, %xmm3 8519; SSE41-NEXT: paddb %xmm4, %xmm3 8520; SSE41-NEXT: pxor %xmm0, %xmm0 8521; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 8522; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 8523; SSE41-NEXT: psadbw %xmm0, %xmm3 8524; SSE41-NEXT: psadbw %xmm0, %xmm1 8525; SSE41-NEXT: packuswb %xmm3, %xmm1 8526; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] 8527; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 8528; SSE41-NEXT: retq 8529; 8530; AVX1-LABEL: ult_9_v4i32: 8531; AVX1: # %bb.0: 8532; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8533; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 8534; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8535; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8536; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 8537; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 8538; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8539; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8540; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 8541; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8542; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8543; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8544; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8545; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8546; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9] 8547; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8548; AVX1-NEXT: retq 8549; 8550; AVX2-LABEL: ult_9_v4i32: 8551; AVX2: # %bb.0: 8552; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8553; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 8554; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8555; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8556; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 8557; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 8558; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8559; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8560; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 8561; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8562; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8563; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8564; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8565; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8566; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] 8567; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8568; AVX2-NEXT: retq 8569; 8570; AVX512VPOPCNTDQ-LABEL: ult_9_v4i32: 8571; AVX512VPOPCNTDQ: # %bb.0: 8572; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8573; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 8574; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] 8575; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8576; AVX512VPOPCNTDQ-NEXT: vzeroupper 8577; AVX512VPOPCNTDQ-NEXT: retq 8578; 8579; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i32: 8580; AVX512VPOPCNTDQVL: # %bb.0: 8581; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 8582; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 8583; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8584; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8585; AVX512VPOPCNTDQVL-NEXT: retq 8586; 8587; BITALG_NOVLX-LABEL: ult_9_v4i32: 8588; BITALG_NOVLX: # %bb.0: 8589; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8590; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 8591; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 8592; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8593; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8594; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8595; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8596; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8597; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] 8598; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8599; BITALG_NOVLX-NEXT: vzeroupper 8600; BITALG_NOVLX-NEXT: retq 8601; 8602; BITALG-LABEL: ult_9_v4i32: 8603; BITALG: # %bb.0: 8604; BITALG-NEXT: vpopcntb %xmm0, %xmm0 8605; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 8606; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8607; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8608; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8609; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8610; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8611; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 8612; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8613; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8614; BITALG-NEXT: retq 8615 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 8616 %3 = icmp ult <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9> 8617 %4 = sext <4 x i1> %3 to <4 x i32> 8618 ret <4 x i32> %4 8619} 8620 8621define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { 8622; SSE2-LABEL: ugt_9_v4i32: 8623; SSE2: # %bb.0: 8624; SSE2-NEXT: movdqa %xmm0, %xmm1 8625; SSE2-NEXT: psrlw $1, %xmm1 8626; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8627; SSE2-NEXT: psubb %xmm1, %xmm0 8628; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8629; SSE2-NEXT: movdqa %xmm0, %xmm2 8630; SSE2-NEXT: pand %xmm1, %xmm2 8631; SSE2-NEXT: psrlw $2, %xmm0 8632; SSE2-NEXT: pand %xmm1, %xmm0 8633; SSE2-NEXT: paddb %xmm2, %xmm0 8634; SSE2-NEXT: movdqa %xmm0, %xmm1 8635; SSE2-NEXT: psrlw $4, %xmm1 8636; SSE2-NEXT: paddb %xmm0, %xmm1 8637; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8638; SSE2-NEXT: pxor %xmm0, %xmm0 8639; SSE2-NEXT: movdqa %xmm1, %xmm2 8640; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8641; SSE2-NEXT: psadbw %xmm0, %xmm2 8642; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8643; SSE2-NEXT: psadbw %xmm0, %xmm1 8644; SSE2-NEXT: packuswb %xmm2, %xmm1 8645; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 8646; SSE2-NEXT: movdqa %xmm1, %xmm0 8647; SSE2-NEXT: retq 8648; 8649; SSE3-LABEL: ugt_9_v4i32: 8650; SSE3: # %bb.0: 8651; SSE3-NEXT: movdqa %xmm0, %xmm1 8652; SSE3-NEXT: psrlw $1, %xmm1 8653; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8654; SSE3-NEXT: psubb %xmm1, %xmm0 8655; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8656; SSE3-NEXT: movdqa %xmm0, %xmm2 8657; SSE3-NEXT: pand %xmm1, %xmm2 8658; SSE3-NEXT: psrlw $2, %xmm0 8659; SSE3-NEXT: pand %xmm1, %xmm0 8660; SSE3-NEXT: paddb %xmm2, %xmm0 8661; SSE3-NEXT: movdqa %xmm0, %xmm1 8662; SSE3-NEXT: psrlw $4, %xmm1 8663; SSE3-NEXT: paddb %xmm0, %xmm1 8664; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8665; SSE3-NEXT: pxor %xmm0, %xmm0 8666; SSE3-NEXT: movdqa %xmm1, %xmm2 8667; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8668; SSE3-NEXT: psadbw %xmm0, %xmm2 8669; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8670; SSE3-NEXT: psadbw %xmm0, %xmm1 8671; SSE3-NEXT: packuswb %xmm2, %xmm1 8672; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 8673; SSE3-NEXT: movdqa %xmm1, %xmm0 8674; SSE3-NEXT: retq 8675; 8676; SSSE3-LABEL: ugt_9_v4i32: 8677; SSSE3: # %bb.0: 8678; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8679; SSSE3-NEXT: movdqa %xmm0, %xmm3 8680; SSSE3-NEXT: pand %xmm2, %xmm3 8681; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8682; SSSE3-NEXT: movdqa %xmm1, %xmm4 8683; SSSE3-NEXT: pshufb %xmm3, %xmm4 8684; SSSE3-NEXT: psrlw $4, %xmm0 8685; SSSE3-NEXT: pand %xmm2, %xmm0 8686; SSSE3-NEXT: pshufb %xmm0, %xmm1 8687; SSSE3-NEXT: paddb %xmm4, %xmm1 8688; SSSE3-NEXT: pxor %xmm0, %xmm0 8689; SSSE3-NEXT: movdqa %xmm1, %xmm2 8690; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8691; SSSE3-NEXT: psadbw %xmm0, %xmm2 8692; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8693; SSSE3-NEXT: psadbw %xmm0, %xmm1 8694; SSSE3-NEXT: packuswb %xmm2, %xmm1 8695; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 8696; SSSE3-NEXT: movdqa %xmm1, %xmm0 8697; SSSE3-NEXT: retq 8698; 8699; SSE41-LABEL: ugt_9_v4i32: 8700; SSE41: # %bb.0: 8701; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8702; SSE41-NEXT: movdqa %xmm0, %xmm2 8703; SSE41-NEXT: pand %xmm1, %xmm2 8704; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8705; SSE41-NEXT: movdqa %xmm3, %xmm4 8706; SSE41-NEXT: pshufb %xmm2, %xmm4 8707; SSE41-NEXT: psrlw $4, %xmm0 8708; SSE41-NEXT: pand %xmm1, %xmm0 8709; SSE41-NEXT: pshufb %xmm0, %xmm3 8710; SSE41-NEXT: paddb %xmm4, %xmm3 8711; SSE41-NEXT: pxor %xmm1, %xmm1 8712; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 8713; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 8714; SSE41-NEXT: psadbw %xmm1, %xmm3 8715; SSE41-NEXT: psadbw %xmm1, %xmm0 8716; SSE41-NEXT: packuswb %xmm3, %xmm0 8717; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 8718; SSE41-NEXT: retq 8719; 8720; AVX1-LABEL: ugt_9_v4i32: 8721; AVX1: # %bb.0: 8722; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8723; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 8724; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8725; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8726; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 8727; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 8728; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8729; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8730; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 8731; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8732; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8733; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8734; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8735; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8736; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 8737; AVX1-NEXT: retq 8738; 8739; AVX2-LABEL: ugt_9_v4i32: 8740; AVX2: # %bb.0: 8741; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8742; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 8743; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8744; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8745; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 8746; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 8747; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8748; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8749; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 8750; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8751; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8752; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8753; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8754; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8755; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] 8756; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8757; AVX2-NEXT: retq 8758; 8759; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i32: 8760; AVX512VPOPCNTDQ: # %bb.0: 8761; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8762; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 8763; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] 8764; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8765; AVX512VPOPCNTDQ-NEXT: vzeroupper 8766; AVX512VPOPCNTDQ-NEXT: retq 8767; 8768; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i32: 8769; AVX512VPOPCNTDQVL: # %bb.0: 8770; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 8771; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 8772; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8773; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8774; AVX512VPOPCNTDQVL-NEXT: retq 8775; 8776; BITALG_NOVLX-LABEL: ugt_9_v4i32: 8777; BITALG_NOVLX: # %bb.0: 8778; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8779; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 8780; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 8781; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8782; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8783; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8784; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8785; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8786; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] 8787; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 8788; BITALG_NOVLX-NEXT: vzeroupper 8789; BITALG_NOVLX-NEXT: retq 8790; 8791; BITALG-LABEL: ugt_9_v4i32: 8792; BITALG: # %bb.0: 8793; BITALG-NEXT: vpopcntb %xmm0, %xmm0 8794; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 8795; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8796; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8797; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8798; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8799; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8800; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 8801; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8802; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8803; BITALG-NEXT: retq 8804 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 8805 %3 = icmp ugt <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9> 8806 %4 = sext <4 x i1> %3 to <4 x i32> 8807 ret <4 x i32> %4 8808} 8809 8810define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { 8811; SSE2-LABEL: ult_10_v4i32: 8812; SSE2: # %bb.0: 8813; SSE2-NEXT: movdqa %xmm0, %xmm1 8814; SSE2-NEXT: psrlw $1, %xmm1 8815; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8816; SSE2-NEXT: psubb %xmm1, %xmm0 8817; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8818; SSE2-NEXT: movdqa %xmm0, %xmm2 8819; SSE2-NEXT: pand %xmm1, %xmm2 8820; SSE2-NEXT: psrlw $2, %xmm0 8821; SSE2-NEXT: pand %xmm1, %xmm0 8822; SSE2-NEXT: paddb %xmm2, %xmm0 8823; SSE2-NEXT: movdqa %xmm0, %xmm1 8824; SSE2-NEXT: psrlw $4, %xmm1 8825; SSE2-NEXT: paddb %xmm0, %xmm1 8826; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 8827; SSE2-NEXT: pxor %xmm0, %xmm0 8828; SSE2-NEXT: movdqa %xmm1, %xmm2 8829; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8830; SSE2-NEXT: psadbw %xmm0, %xmm2 8831; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8832; SSE2-NEXT: psadbw %xmm0, %xmm1 8833; SSE2-NEXT: packuswb %xmm2, %xmm1 8834; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] 8835; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 8836; SSE2-NEXT: retq 8837; 8838; SSE3-LABEL: ult_10_v4i32: 8839; SSE3: # %bb.0: 8840; SSE3-NEXT: movdqa %xmm0, %xmm1 8841; SSE3-NEXT: psrlw $1, %xmm1 8842; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8843; SSE3-NEXT: psubb %xmm1, %xmm0 8844; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 8845; SSE3-NEXT: movdqa %xmm0, %xmm2 8846; SSE3-NEXT: pand %xmm1, %xmm2 8847; SSE3-NEXT: psrlw $2, %xmm0 8848; SSE3-NEXT: pand %xmm1, %xmm0 8849; SSE3-NEXT: paddb %xmm2, %xmm0 8850; SSE3-NEXT: movdqa %xmm0, %xmm1 8851; SSE3-NEXT: psrlw $4, %xmm1 8852; SSE3-NEXT: paddb %xmm0, %xmm1 8853; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 8854; SSE3-NEXT: pxor %xmm0, %xmm0 8855; SSE3-NEXT: movdqa %xmm1, %xmm2 8856; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 8857; SSE3-NEXT: psadbw %xmm0, %xmm2 8858; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8859; SSE3-NEXT: psadbw %xmm0, %xmm1 8860; SSE3-NEXT: packuswb %xmm2, %xmm1 8861; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] 8862; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 8863; SSE3-NEXT: retq 8864; 8865; SSSE3-LABEL: ult_10_v4i32: 8866; SSSE3: # %bb.0: 8867; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8868; SSSE3-NEXT: movdqa %xmm0, %xmm2 8869; SSSE3-NEXT: pand %xmm1, %xmm2 8870; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8871; SSSE3-NEXT: movdqa %xmm3, %xmm4 8872; SSSE3-NEXT: pshufb %xmm2, %xmm4 8873; SSSE3-NEXT: psrlw $4, %xmm0 8874; SSSE3-NEXT: pand %xmm1, %xmm0 8875; SSSE3-NEXT: pshufb %xmm0, %xmm3 8876; SSSE3-NEXT: paddb %xmm4, %xmm3 8877; SSSE3-NEXT: pxor %xmm0, %xmm0 8878; SSSE3-NEXT: movdqa %xmm3, %xmm1 8879; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 8880; SSSE3-NEXT: psadbw %xmm0, %xmm1 8881; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 8882; SSSE3-NEXT: psadbw %xmm0, %xmm3 8883; SSSE3-NEXT: packuswb %xmm1, %xmm3 8884; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] 8885; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 8886; SSSE3-NEXT: retq 8887; 8888; SSE41-LABEL: ult_10_v4i32: 8889; SSE41: # %bb.0: 8890; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8891; SSE41-NEXT: movdqa %xmm0, %xmm2 8892; SSE41-NEXT: pand %xmm1, %xmm2 8893; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8894; SSE41-NEXT: movdqa %xmm3, %xmm4 8895; SSE41-NEXT: pshufb %xmm2, %xmm4 8896; SSE41-NEXT: psrlw $4, %xmm0 8897; SSE41-NEXT: pand %xmm1, %xmm0 8898; SSE41-NEXT: pshufb %xmm0, %xmm3 8899; SSE41-NEXT: paddb %xmm4, %xmm3 8900; SSE41-NEXT: pxor %xmm0, %xmm0 8901; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 8902; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 8903; SSE41-NEXT: psadbw %xmm0, %xmm3 8904; SSE41-NEXT: psadbw %xmm0, %xmm1 8905; SSE41-NEXT: packuswb %xmm3, %xmm1 8906; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] 8907; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 8908; SSE41-NEXT: retq 8909; 8910; AVX1-LABEL: ult_10_v4i32: 8911; AVX1: # %bb.0: 8912; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8913; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 8914; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8915; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8916; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 8917; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 8918; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8919; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8920; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 8921; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8922; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8923; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8924; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8925; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8926; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10] 8927; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8928; AVX1-NEXT: retq 8929; 8930; AVX2-LABEL: ult_10_v4i32: 8931; AVX2: # %bb.0: 8932; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 8933; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 8934; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 8935; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 8936; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 8937; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 8938; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 8939; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 8940; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 8941; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8942; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8943; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8944; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8945; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8946; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] 8947; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8948; AVX2-NEXT: retq 8949; 8950; AVX512VPOPCNTDQ-LABEL: ult_10_v4i32: 8951; AVX512VPOPCNTDQ: # %bb.0: 8952; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8953; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 8954; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] 8955; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8956; AVX512VPOPCNTDQ-NEXT: vzeroupper 8957; AVX512VPOPCNTDQ-NEXT: retq 8958; 8959; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i32: 8960; AVX512VPOPCNTDQVL: # %bb.0: 8961; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 8962; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 8963; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8964; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8965; AVX512VPOPCNTDQVL-NEXT: retq 8966; 8967; BITALG_NOVLX-LABEL: ult_10_v4i32: 8968; BITALG_NOVLX: # %bb.0: 8969; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8970; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 8971; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 8972; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8973; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8974; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8975; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8976; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8977; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] 8978; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 8979; BITALG_NOVLX-NEXT: vzeroupper 8980; BITALG_NOVLX-NEXT: retq 8981; 8982; BITALG-LABEL: ult_10_v4i32: 8983; BITALG: # %bb.0: 8984; BITALG-NEXT: vpopcntb %xmm0, %xmm0 8985; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 8986; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 8987; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 8988; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 8989; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 8990; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 8991; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 8992; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 8993; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 8994; BITALG-NEXT: retq 8995 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 8996 %3 = icmp ult <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10> 8997 %4 = sext <4 x i1> %3 to <4 x i32> 8998 ret <4 x i32> %4 8999} 9000 9001define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { 9002; SSE2-LABEL: ugt_10_v4i32: 9003; SSE2: # %bb.0: 9004; SSE2-NEXT: movdqa %xmm0, %xmm1 9005; SSE2-NEXT: psrlw $1, %xmm1 9006; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9007; SSE2-NEXT: psubb %xmm1, %xmm0 9008; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9009; SSE2-NEXT: movdqa %xmm0, %xmm2 9010; SSE2-NEXT: pand %xmm1, %xmm2 9011; SSE2-NEXT: psrlw $2, %xmm0 9012; SSE2-NEXT: pand %xmm1, %xmm0 9013; SSE2-NEXT: paddb %xmm2, %xmm0 9014; SSE2-NEXT: movdqa %xmm0, %xmm1 9015; SSE2-NEXT: psrlw $4, %xmm1 9016; SSE2-NEXT: paddb %xmm0, %xmm1 9017; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9018; SSE2-NEXT: pxor %xmm0, %xmm0 9019; SSE2-NEXT: movdqa %xmm1, %xmm2 9020; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9021; SSE2-NEXT: psadbw %xmm0, %xmm2 9022; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9023; SSE2-NEXT: psadbw %xmm0, %xmm1 9024; SSE2-NEXT: packuswb %xmm2, %xmm1 9025; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9026; SSE2-NEXT: movdqa %xmm1, %xmm0 9027; SSE2-NEXT: retq 9028; 9029; SSE3-LABEL: ugt_10_v4i32: 9030; SSE3: # %bb.0: 9031; SSE3-NEXT: movdqa %xmm0, %xmm1 9032; SSE3-NEXT: psrlw $1, %xmm1 9033; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9034; SSE3-NEXT: psubb %xmm1, %xmm0 9035; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9036; SSE3-NEXT: movdqa %xmm0, %xmm2 9037; SSE3-NEXT: pand %xmm1, %xmm2 9038; SSE3-NEXT: psrlw $2, %xmm0 9039; SSE3-NEXT: pand %xmm1, %xmm0 9040; SSE3-NEXT: paddb %xmm2, %xmm0 9041; SSE3-NEXT: movdqa %xmm0, %xmm1 9042; SSE3-NEXT: psrlw $4, %xmm1 9043; SSE3-NEXT: paddb %xmm0, %xmm1 9044; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9045; SSE3-NEXT: pxor %xmm0, %xmm0 9046; SSE3-NEXT: movdqa %xmm1, %xmm2 9047; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9048; SSE3-NEXT: psadbw %xmm0, %xmm2 9049; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9050; SSE3-NEXT: psadbw %xmm0, %xmm1 9051; SSE3-NEXT: packuswb %xmm2, %xmm1 9052; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9053; SSE3-NEXT: movdqa %xmm1, %xmm0 9054; SSE3-NEXT: retq 9055; 9056; SSSE3-LABEL: ugt_10_v4i32: 9057; SSSE3: # %bb.0: 9058; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9059; SSSE3-NEXT: movdqa %xmm0, %xmm3 9060; SSSE3-NEXT: pand %xmm2, %xmm3 9061; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9062; SSSE3-NEXT: movdqa %xmm1, %xmm4 9063; SSSE3-NEXT: pshufb %xmm3, %xmm4 9064; SSSE3-NEXT: psrlw $4, %xmm0 9065; SSSE3-NEXT: pand %xmm2, %xmm0 9066; SSSE3-NEXT: pshufb %xmm0, %xmm1 9067; SSSE3-NEXT: paddb %xmm4, %xmm1 9068; SSSE3-NEXT: pxor %xmm0, %xmm0 9069; SSSE3-NEXT: movdqa %xmm1, %xmm2 9070; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9071; SSSE3-NEXT: psadbw %xmm0, %xmm2 9072; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9073; SSSE3-NEXT: psadbw %xmm0, %xmm1 9074; SSSE3-NEXT: packuswb %xmm2, %xmm1 9075; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9076; SSSE3-NEXT: movdqa %xmm1, %xmm0 9077; SSSE3-NEXT: retq 9078; 9079; SSE41-LABEL: ugt_10_v4i32: 9080; SSE41: # %bb.0: 9081; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9082; SSE41-NEXT: movdqa %xmm0, %xmm2 9083; SSE41-NEXT: pand %xmm1, %xmm2 9084; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9085; SSE41-NEXT: movdqa %xmm3, %xmm4 9086; SSE41-NEXT: pshufb %xmm2, %xmm4 9087; SSE41-NEXT: psrlw $4, %xmm0 9088; SSE41-NEXT: pand %xmm1, %xmm0 9089; SSE41-NEXT: pshufb %xmm0, %xmm3 9090; SSE41-NEXT: paddb %xmm4, %xmm3 9091; SSE41-NEXT: pxor %xmm1, %xmm1 9092; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 9093; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 9094; SSE41-NEXT: psadbw %xmm1, %xmm3 9095; SSE41-NEXT: psadbw %xmm1, %xmm0 9096; SSE41-NEXT: packuswb %xmm3, %xmm0 9097; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 9098; SSE41-NEXT: retq 9099; 9100; AVX1-LABEL: ugt_10_v4i32: 9101; AVX1: # %bb.0: 9102; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9103; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 9104; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9105; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9106; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 9107; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 9108; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9109; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9110; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 9111; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9112; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9113; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9114; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9115; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9116; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 9117; AVX1-NEXT: retq 9118; 9119; AVX2-LABEL: ugt_10_v4i32: 9120; AVX2: # %bb.0: 9121; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9122; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 9123; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9124; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9125; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 9126; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 9127; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9128; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9129; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 9130; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9131; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9132; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9133; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9134; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9135; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] 9136; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9137; AVX2-NEXT: retq 9138; 9139; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i32: 9140; AVX512VPOPCNTDQ: # %bb.0: 9141; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9142; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 9143; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] 9144; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9145; AVX512VPOPCNTDQ-NEXT: vzeroupper 9146; AVX512VPOPCNTDQ-NEXT: retq 9147; 9148; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i32: 9149; AVX512VPOPCNTDQVL: # %bb.0: 9150; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 9151; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 9152; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9153; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9154; AVX512VPOPCNTDQVL-NEXT: retq 9155; 9156; BITALG_NOVLX-LABEL: ugt_10_v4i32: 9157; BITALG_NOVLX: # %bb.0: 9158; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9159; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 9160; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 9161; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9162; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9163; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9164; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9165; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9166; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] 9167; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9168; BITALG_NOVLX-NEXT: vzeroupper 9169; BITALG_NOVLX-NEXT: retq 9170; 9171; BITALG-LABEL: ugt_10_v4i32: 9172; BITALG: # %bb.0: 9173; BITALG-NEXT: vpopcntb %xmm0, %xmm0 9174; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 9175; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9176; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9177; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9178; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9179; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9180; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 9181; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9182; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9183; BITALG-NEXT: retq 9184 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 9185 %3 = icmp ugt <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10> 9186 %4 = sext <4 x i1> %3 to <4 x i32> 9187 ret <4 x i32> %4 9188} 9189 9190define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { 9191; SSE2-LABEL: ult_11_v4i32: 9192; SSE2: # %bb.0: 9193; SSE2-NEXT: movdqa %xmm0, %xmm1 9194; SSE2-NEXT: psrlw $1, %xmm1 9195; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9196; SSE2-NEXT: psubb %xmm1, %xmm0 9197; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9198; SSE2-NEXT: movdqa %xmm0, %xmm2 9199; SSE2-NEXT: pand %xmm1, %xmm2 9200; SSE2-NEXT: psrlw $2, %xmm0 9201; SSE2-NEXT: pand %xmm1, %xmm0 9202; SSE2-NEXT: paddb %xmm2, %xmm0 9203; SSE2-NEXT: movdqa %xmm0, %xmm1 9204; SSE2-NEXT: psrlw $4, %xmm1 9205; SSE2-NEXT: paddb %xmm0, %xmm1 9206; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9207; SSE2-NEXT: pxor %xmm0, %xmm0 9208; SSE2-NEXT: movdqa %xmm1, %xmm2 9209; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9210; SSE2-NEXT: psadbw %xmm0, %xmm2 9211; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9212; SSE2-NEXT: psadbw %xmm0, %xmm1 9213; SSE2-NEXT: packuswb %xmm2, %xmm1 9214; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] 9215; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 9216; SSE2-NEXT: retq 9217; 9218; SSE3-LABEL: ult_11_v4i32: 9219; SSE3: # %bb.0: 9220; SSE3-NEXT: movdqa %xmm0, %xmm1 9221; SSE3-NEXT: psrlw $1, %xmm1 9222; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9223; SSE3-NEXT: psubb %xmm1, %xmm0 9224; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9225; SSE3-NEXT: movdqa %xmm0, %xmm2 9226; SSE3-NEXT: pand %xmm1, %xmm2 9227; SSE3-NEXT: psrlw $2, %xmm0 9228; SSE3-NEXT: pand %xmm1, %xmm0 9229; SSE3-NEXT: paddb %xmm2, %xmm0 9230; SSE3-NEXT: movdqa %xmm0, %xmm1 9231; SSE3-NEXT: psrlw $4, %xmm1 9232; SSE3-NEXT: paddb %xmm0, %xmm1 9233; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9234; SSE3-NEXT: pxor %xmm0, %xmm0 9235; SSE3-NEXT: movdqa %xmm1, %xmm2 9236; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9237; SSE3-NEXT: psadbw %xmm0, %xmm2 9238; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9239; SSE3-NEXT: psadbw %xmm0, %xmm1 9240; SSE3-NEXT: packuswb %xmm2, %xmm1 9241; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] 9242; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 9243; SSE3-NEXT: retq 9244; 9245; SSSE3-LABEL: ult_11_v4i32: 9246; SSSE3: # %bb.0: 9247; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9248; SSSE3-NEXT: movdqa %xmm0, %xmm2 9249; SSSE3-NEXT: pand %xmm1, %xmm2 9250; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9251; SSSE3-NEXT: movdqa %xmm3, %xmm4 9252; SSSE3-NEXT: pshufb %xmm2, %xmm4 9253; SSSE3-NEXT: psrlw $4, %xmm0 9254; SSSE3-NEXT: pand %xmm1, %xmm0 9255; SSSE3-NEXT: pshufb %xmm0, %xmm3 9256; SSSE3-NEXT: paddb %xmm4, %xmm3 9257; SSSE3-NEXT: pxor %xmm0, %xmm0 9258; SSSE3-NEXT: movdqa %xmm3, %xmm1 9259; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 9260; SSSE3-NEXT: psadbw %xmm0, %xmm1 9261; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 9262; SSSE3-NEXT: psadbw %xmm0, %xmm3 9263; SSSE3-NEXT: packuswb %xmm1, %xmm3 9264; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] 9265; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 9266; SSSE3-NEXT: retq 9267; 9268; SSE41-LABEL: ult_11_v4i32: 9269; SSE41: # %bb.0: 9270; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9271; SSE41-NEXT: movdqa %xmm0, %xmm2 9272; SSE41-NEXT: pand %xmm1, %xmm2 9273; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9274; SSE41-NEXT: movdqa %xmm3, %xmm4 9275; SSE41-NEXT: pshufb %xmm2, %xmm4 9276; SSE41-NEXT: psrlw $4, %xmm0 9277; SSE41-NEXT: pand %xmm1, %xmm0 9278; SSE41-NEXT: pshufb %xmm0, %xmm3 9279; SSE41-NEXT: paddb %xmm4, %xmm3 9280; SSE41-NEXT: pxor %xmm0, %xmm0 9281; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 9282; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 9283; SSE41-NEXT: psadbw %xmm0, %xmm3 9284; SSE41-NEXT: psadbw %xmm0, %xmm1 9285; SSE41-NEXT: packuswb %xmm3, %xmm1 9286; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] 9287; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 9288; SSE41-NEXT: retq 9289; 9290; AVX1-LABEL: ult_11_v4i32: 9291; AVX1: # %bb.0: 9292; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9293; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 9294; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9295; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9296; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 9297; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 9298; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9299; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9300; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 9301; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9302; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9303; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9304; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9305; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9306; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11] 9307; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9308; AVX1-NEXT: retq 9309; 9310; AVX2-LABEL: ult_11_v4i32: 9311; AVX2: # %bb.0: 9312; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9313; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 9314; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9315; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9316; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 9317; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 9318; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9319; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9320; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 9321; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9322; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9323; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9324; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9325; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9326; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] 9327; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9328; AVX2-NEXT: retq 9329; 9330; AVX512VPOPCNTDQ-LABEL: ult_11_v4i32: 9331; AVX512VPOPCNTDQ: # %bb.0: 9332; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9333; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 9334; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] 9335; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9336; AVX512VPOPCNTDQ-NEXT: vzeroupper 9337; AVX512VPOPCNTDQ-NEXT: retq 9338; 9339; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i32: 9340; AVX512VPOPCNTDQVL: # %bb.0: 9341; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 9342; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 9343; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9344; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9345; AVX512VPOPCNTDQVL-NEXT: retq 9346; 9347; BITALG_NOVLX-LABEL: ult_11_v4i32: 9348; BITALG_NOVLX: # %bb.0: 9349; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9350; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 9351; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 9352; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9353; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9354; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9355; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9356; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9357; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] 9358; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9359; BITALG_NOVLX-NEXT: vzeroupper 9360; BITALG_NOVLX-NEXT: retq 9361; 9362; BITALG-LABEL: ult_11_v4i32: 9363; BITALG: # %bb.0: 9364; BITALG-NEXT: vpopcntb %xmm0, %xmm0 9365; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 9366; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9367; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9368; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9369; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9370; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9371; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 9372; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9373; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9374; BITALG-NEXT: retq 9375 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 9376 %3 = icmp ult <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11> 9377 %4 = sext <4 x i1> %3 to <4 x i32> 9378 ret <4 x i32> %4 9379} 9380 9381define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { 9382; SSE2-LABEL: ugt_11_v4i32: 9383; SSE2: # %bb.0: 9384; SSE2-NEXT: movdqa %xmm0, %xmm1 9385; SSE2-NEXT: psrlw $1, %xmm1 9386; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9387; SSE2-NEXT: psubb %xmm1, %xmm0 9388; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9389; SSE2-NEXT: movdqa %xmm0, %xmm2 9390; SSE2-NEXT: pand %xmm1, %xmm2 9391; SSE2-NEXT: psrlw $2, %xmm0 9392; SSE2-NEXT: pand %xmm1, %xmm0 9393; SSE2-NEXT: paddb %xmm2, %xmm0 9394; SSE2-NEXT: movdqa %xmm0, %xmm1 9395; SSE2-NEXT: psrlw $4, %xmm1 9396; SSE2-NEXT: paddb %xmm0, %xmm1 9397; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9398; SSE2-NEXT: pxor %xmm0, %xmm0 9399; SSE2-NEXT: movdqa %xmm1, %xmm2 9400; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9401; SSE2-NEXT: psadbw %xmm0, %xmm2 9402; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9403; SSE2-NEXT: psadbw %xmm0, %xmm1 9404; SSE2-NEXT: packuswb %xmm2, %xmm1 9405; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9406; SSE2-NEXT: movdqa %xmm1, %xmm0 9407; SSE2-NEXT: retq 9408; 9409; SSE3-LABEL: ugt_11_v4i32: 9410; SSE3: # %bb.0: 9411; SSE3-NEXT: movdqa %xmm0, %xmm1 9412; SSE3-NEXT: psrlw $1, %xmm1 9413; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9414; SSE3-NEXT: psubb %xmm1, %xmm0 9415; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9416; SSE3-NEXT: movdqa %xmm0, %xmm2 9417; SSE3-NEXT: pand %xmm1, %xmm2 9418; SSE3-NEXT: psrlw $2, %xmm0 9419; SSE3-NEXT: pand %xmm1, %xmm0 9420; SSE3-NEXT: paddb %xmm2, %xmm0 9421; SSE3-NEXT: movdqa %xmm0, %xmm1 9422; SSE3-NEXT: psrlw $4, %xmm1 9423; SSE3-NEXT: paddb %xmm0, %xmm1 9424; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9425; SSE3-NEXT: pxor %xmm0, %xmm0 9426; SSE3-NEXT: movdqa %xmm1, %xmm2 9427; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9428; SSE3-NEXT: psadbw %xmm0, %xmm2 9429; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9430; SSE3-NEXT: psadbw %xmm0, %xmm1 9431; SSE3-NEXT: packuswb %xmm2, %xmm1 9432; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9433; SSE3-NEXT: movdqa %xmm1, %xmm0 9434; SSE3-NEXT: retq 9435; 9436; SSSE3-LABEL: ugt_11_v4i32: 9437; SSSE3: # %bb.0: 9438; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9439; SSSE3-NEXT: movdqa %xmm0, %xmm3 9440; SSSE3-NEXT: pand %xmm2, %xmm3 9441; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9442; SSSE3-NEXT: movdqa %xmm1, %xmm4 9443; SSSE3-NEXT: pshufb %xmm3, %xmm4 9444; SSSE3-NEXT: psrlw $4, %xmm0 9445; SSSE3-NEXT: pand %xmm2, %xmm0 9446; SSSE3-NEXT: pshufb %xmm0, %xmm1 9447; SSSE3-NEXT: paddb %xmm4, %xmm1 9448; SSSE3-NEXT: pxor %xmm0, %xmm0 9449; SSSE3-NEXT: movdqa %xmm1, %xmm2 9450; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9451; SSSE3-NEXT: psadbw %xmm0, %xmm2 9452; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9453; SSSE3-NEXT: psadbw %xmm0, %xmm1 9454; SSSE3-NEXT: packuswb %xmm2, %xmm1 9455; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9456; SSSE3-NEXT: movdqa %xmm1, %xmm0 9457; SSSE3-NEXT: retq 9458; 9459; SSE41-LABEL: ugt_11_v4i32: 9460; SSE41: # %bb.0: 9461; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9462; SSE41-NEXT: movdqa %xmm0, %xmm2 9463; SSE41-NEXT: pand %xmm1, %xmm2 9464; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9465; SSE41-NEXT: movdqa %xmm3, %xmm4 9466; SSE41-NEXT: pshufb %xmm2, %xmm4 9467; SSE41-NEXT: psrlw $4, %xmm0 9468; SSE41-NEXT: pand %xmm1, %xmm0 9469; SSE41-NEXT: pshufb %xmm0, %xmm3 9470; SSE41-NEXT: paddb %xmm4, %xmm3 9471; SSE41-NEXT: pxor %xmm1, %xmm1 9472; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 9473; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 9474; SSE41-NEXT: psadbw %xmm1, %xmm3 9475; SSE41-NEXT: psadbw %xmm1, %xmm0 9476; SSE41-NEXT: packuswb %xmm3, %xmm0 9477; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 9478; SSE41-NEXT: retq 9479; 9480; AVX1-LABEL: ugt_11_v4i32: 9481; AVX1: # %bb.0: 9482; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9483; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 9484; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9485; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9486; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 9487; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 9488; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9489; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9490; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 9491; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9492; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9493; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9494; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9495; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9496; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 9497; AVX1-NEXT: retq 9498; 9499; AVX2-LABEL: ugt_11_v4i32: 9500; AVX2: # %bb.0: 9501; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9502; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 9503; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9504; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9505; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 9506; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 9507; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9508; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9509; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 9510; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9511; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9512; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9513; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9514; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9515; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] 9516; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9517; AVX2-NEXT: retq 9518; 9519; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i32: 9520; AVX512VPOPCNTDQ: # %bb.0: 9521; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9522; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 9523; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] 9524; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9525; AVX512VPOPCNTDQ-NEXT: vzeroupper 9526; AVX512VPOPCNTDQ-NEXT: retq 9527; 9528; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i32: 9529; AVX512VPOPCNTDQVL: # %bb.0: 9530; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 9531; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 9532; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9533; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9534; AVX512VPOPCNTDQVL-NEXT: retq 9535; 9536; BITALG_NOVLX-LABEL: ugt_11_v4i32: 9537; BITALG_NOVLX: # %bb.0: 9538; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9539; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 9540; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 9541; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9542; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9543; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9544; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9545; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9546; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] 9547; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9548; BITALG_NOVLX-NEXT: vzeroupper 9549; BITALG_NOVLX-NEXT: retq 9550; 9551; BITALG-LABEL: ugt_11_v4i32: 9552; BITALG: # %bb.0: 9553; BITALG-NEXT: vpopcntb %xmm0, %xmm0 9554; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 9555; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9556; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9557; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9558; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9559; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9560; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 9561; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9562; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9563; BITALG-NEXT: retq 9564 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 9565 %3 = icmp ugt <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11> 9566 %4 = sext <4 x i1> %3 to <4 x i32> 9567 ret <4 x i32> %4 9568} 9569 9570define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { 9571; SSE2-LABEL: ult_12_v4i32: 9572; SSE2: # %bb.0: 9573; SSE2-NEXT: movdqa %xmm0, %xmm1 9574; SSE2-NEXT: psrlw $1, %xmm1 9575; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9576; SSE2-NEXT: psubb %xmm1, %xmm0 9577; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9578; SSE2-NEXT: movdqa %xmm0, %xmm2 9579; SSE2-NEXT: pand %xmm1, %xmm2 9580; SSE2-NEXT: psrlw $2, %xmm0 9581; SSE2-NEXT: pand %xmm1, %xmm0 9582; SSE2-NEXT: paddb %xmm2, %xmm0 9583; SSE2-NEXT: movdqa %xmm0, %xmm1 9584; SSE2-NEXT: psrlw $4, %xmm1 9585; SSE2-NEXT: paddb %xmm0, %xmm1 9586; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9587; SSE2-NEXT: pxor %xmm0, %xmm0 9588; SSE2-NEXT: movdqa %xmm1, %xmm2 9589; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9590; SSE2-NEXT: psadbw %xmm0, %xmm2 9591; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9592; SSE2-NEXT: psadbw %xmm0, %xmm1 9593; SSE2-NEXT: packuswb %xmm2, %xmm1 9594; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] 9595; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 9596; SSE2-NEXT: retq 9597; 9598; SSE3-LABEL: ult_12_v4i32: 9599; SSE3: # %bb.0: 9600; SSE3-NEXT: movdqa %xmm0, %xmm1 9601; SSE3-NEXT: psrlw $1, %xmm1 9602; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9603; SSE3-NEXT: psubb %xmm1, %xmm0 9604; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9605; SSE3-NEXT: movdqa %xmm0, %xmm2 9606; SSE3-NEXT: pand %xmm1, %xmm2 9607; SSE3-NEXT: psrlw $2, %xmm0 9608; SSE3-NEXT: pand %xmm1, %xmm0 9609; SSE3-NEXT: paddb %xmm2, %xmm0 9610; SSE3-NEXT: movdqa %xmm0, %xmm1 9611; SSE3-NEXT: psrlw $4, %xmm1 9612; SSE3-NEXT: paddb %xmm0, %xmm1 9613; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9614; SSE3-NEXT: pxor %xmm0, %xmm0 9615; SSE3-NEXT: movdqa %xmm1, %xmm2 9616; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9617; SSE3-NEXT: psadbw %xmm0, %xmm2 9618; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9619; SSE3-NEXT: psadbw %xmm0, %xmm1 9620; SSE3-NEXT: packuswb %xmm2, %xmm1 9621; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] 9622; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 9623; SSE3-NEXT: retq 9624; 9625; SSSE3-LABEL: ult_12_v4i32: 9626; SSSE3: # %bb.0: 9627; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9628; SSSE3-NEXT: movdqa %xmm0, %xmm2 9629; SSSE3-NEXT: pand %xmm1, %xmm2 9630; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9631; SSSE3-NEXT: movdqa %xmm3, %xmm4 9632; SSSE3-NEXT: pshufb %xmm2, %xmm4 9633; SSSE3-NEXT: psrlw $4, %xmm0 9634; SSSE3-NEXT: pand %xmm1, %xmm0 9635; SSSE3-NEXT: pshufb %xmm0, %xmm3 9636; SSSE3-NEXT: paddb %xmm4, %xmm3 9637; SSSE3-NEXT: pxor %xmm0, %xmm0 9638; SSSE3-NEXT: movdqa %xmm3, %xmm1 9639; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 9640; SSSE3-NEXT: psadbw %xmm0, %xmm1 9641; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 9642; SSSE3-NEXT: psadbw %xmm0, %xmm3 9643; SSSE3-NEXT: packuswb %xmm1, %xmm3 9644; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] 9645; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 9646; SSSE3-NEXT: retq 9647; 9648; SSE41-LABEL: ult_12_v4i32: 9649; SSE41: # %bb.0: 9650; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9651; SSE41-NEXT: movdqa %xmm0, %xmm2 9652; SSE41-NEXT: pand %xmm1, %xmm2 9653; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9654; SSE41-NEXT: movdqa %xmm3, %xmm4 9655; SSE41-NEXT: pshufb %xmm2, %xmm4 9656; SSE41-NEXT: psrlw $4, %xmm0 9657; SSE41-NEXT: pand %xmm1, %xmm0 9658; SSE41-NEXT: pshufb %xmm0, %xmm3 9659; SSE41-NEXT: paddb %xmm4, %xmm3 9660; SSE41-NEXT: pxor %xmm0, %xmm0 9661; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 9662; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 9663; SSE41-NEXT: psadbw %xmm0, %xmm3 9664; SSE41-NEXT: psadbw %xmm0, %xmm1 9665; SSE41-NEXT: packuswb %xmm3, %xmm1 9666; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] 9667; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 9668; SSE41-NEXT: retq 9669; 9670; AVX1-LABEL: ult_12_v4i32: 9671; AVX1: # %bb.0: 9672; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9673; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 9674; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9675; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9676; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 9677; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 9678; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9679; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9680; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 9681; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9682; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9683; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9684; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9685; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9686; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12] 9687; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9688; AVX1-NEXT: retq 9689; 9690; AVX2-LABEL: ult_12_v4i32: 9691; AVX2: # %bb.0: 9692; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9693; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 9694; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9695; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9696; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 9697; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 9698; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9699; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9700; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 9701; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9702; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9703; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9704; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9705; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9706; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] 9707; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9708; AVX2-NEXT: retq 9709; 9710; AVX512VPOPCNTDQ-LABEL: ult_12_v4i32: 9711; AVX512VPOPCNTDQ: # %bb.0: 9712; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9713; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 9714; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] 9715; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9716; AVX512VPOPCNTDQ-NEXT: vzeroupper 9717; AVX512VPOPCNTDQ-NEXT: retq 9718; 9719; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i32: 9720; AVX512VPOPCNTDQVL: # %bb.0: 9721; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 9722; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 9723; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9724; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9725; AVX512VPOPCNTDQVL-NEXT: retq 9726; 9727; BITALG_NOVLX-LABEL: ult_12_v4i32: 9728; BITALG_NOVLX: # %bb.0: 9729; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9730; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 9731; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 9732; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9733; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9734; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9735; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9736; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9737; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] 9738; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 9739; BITALG_NOVLX-NEXT: vzeroupper 9740; BITALG_NOVLX-NEXT: retq 9741; 9742; BITALG-LABEL: ult_12_v4i32: 9743; BITALG: # %bb.0: 9744; BITALG-NEXT: vpopcntb %xmm0, %xmm0 9745; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 9746; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9747; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9748; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9749; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9750; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9751; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 9752; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9753; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9754; BITALG-NEXT: retq 9755 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 9756 %3 = icmp ult <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12> 9757 %4 = sext <4 x i1> %3 to <4 x i32> 9758 ret <4 x i32> %4 9759} 9760 9761define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { 9762; SSE2-LABEL: ugt_12_v4i32: 9763; SSE2: # %bb.0: 9764; SSE2-NEXT: movdqa %xmm0, %xmm1 9765; SSE2-NEXT: psrlw $1, %xmm1 9766; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9767; SSE2-NEXT: psubb %xmm1, %xmm0 9768; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9769; SSE2-NEXT: movdqa %xmm0, %xmm2 9770; SSE2-NEXT: pand %xmm1, %xmm2 9771; SSE2-NEXT: psrlw $2, %xmm0 9772; SSE2-NEXT: pand %xmm1, %xmm0 9773; SSE2-NEXT: paddb %xmm2, %xmm0 9774; SSE2-NEXT: movdqa %xmm0, %xmm1 9775; SSE2-NEXT: psrlw $4, %xmm1 9776; SSE2-NEXT: paddb %xmm0, %xmm1 9777; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9778; SSE2-NEXT: pxor %xmm0, %xmm0 9779; SSE2-NEXT: movdqa %xmm1, %xmm2 9780; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9781; SSE2-NEXT: psadbw %xmm0, %xmm2 9782; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9783; SSE2-NEXT: psadbw %xmm0, %xmm1 9784; SSE2-NEXT: packuswb %xmm2, %xmm1 9785; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9786; SSE2-NEXT: movdqa %xmm1, %xmm0 9787; SSE2-NEXT: retq 9788; 9789; SSE3-LABEL: ugt_12_v4i32: 9790; SSE3: # %bb.0: 9791; SSE3-NEXT: movdqa %xmm0, %xmm1 9792; SSE3-NEXT: psrlw $1, %xmm1 9793; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9794; SSE3-NEXT: psubb %xmm1, %xmm0 9795; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9796; SSE3-NEXT: movdqa %xmm0, %xmm2 9797; SSE3-NEXT: pand %xmm1, %xmm2 9798; SSE3-NEXT: psrlw $2, %xmm0 9799; SSE3-NEXT: pand %xmm1, %xmm0 9800; SSE3-NEXT: paddb %xmm2, %xmm0 9801; SSE3-NEXT: movdqa %xmm0, %xmm1 9802; SSE3-NEXT: psrlw $4, %xmm1 9803; SSE3-NEXT: paddb %xmm0, %xmm1 9804; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9805; SSE3-NEXT: pxor %xmm0, %xmm0 9806; SSE3-NEXT: movdqa %xmm1, %xmm2 9807; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9808; SSE3-NEXT: psadbw %xmm0, %xmm2 9809; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9810; SSE3-NEXT: psadbw %xmm0, %xmm1 9811; SSE3-NEXT: packuswb %xmm2, %xmm1 9812; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9813; SSE3-NEXT: movdqa %xmm1, %xmm0 9814; SSE3-NEXT: retq 9815; 9816; SSSE3-LABEL: ugt_12_v4i32: 9817; SSSE3: # %bb.0: 9818; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9819; SSSE3-NEXT: movdqa %xmm0, %xmm3 9820; SSSE3-NEXT: pand %xmm2, %xmm3 9821; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9822; SSSE3-NEXT: movdqa %xmm1, %xmm4 9823; SSSE3-NEXT: pshufb %xmm3, %xmm4 9824; SSSE3-NEXT: psrlw $4, %xmm0 9825; SSSE3-NEXT: pand %xmm2, %xmm0 9826; SSSE3-NEXT: pshufb %xmm0, %xmm1 9827; SSSE3-NEXT: paddb %xmm4, %xmm1 9828; SSSE3-NEXT: pxor %xmm0, %xmm0 9829; SSSE3-NEXT: movdqa %xmm1, %xmm2 9830; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9831; SSSE3-NEXT: psadbw %xmm0, %xmm2 9832; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9833; SSSE3-NEXT: psadbw %xmm0, %xmm1 9834; SSSE3-NEXT: packuswb %xmm2, %xmm1 9835; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 9836; SSSE3-NEXT: movdqa %xmm1, %xmm0 9837; SSSE3-NEXT: retq 9838; 9839; SSE41-LABEL: ugt_12_v4i32: 9840; SSE41: # %bb.0: 9841; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9842; SSE41-NEXT: movdqa %xmm0, %xmm2 9843; SSE41-NEXT: pand %xmm1, %xmm2 9844; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9845; SSE41-NEXT: movdqa %xmm3, %xmm4 9846; SSE41-NEXT: pshufb %xmm2, %xmm4 9847; SSE41-NEXT: psrlw $4, %xmm0 9848; SSE41-NEXT: pand %xmm1, %xmm0 9849; SSE41-NEXT: pshufb %xmm0, %xmm3 9850; SSE41-NEXT: paddb %xmm4, %xmm3 9851; SSE41-NEXT: pxor %xmm1, %xmm1 9852; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 9853; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 9854; SSE41-NEXT: psadbw %xmm1, %xmm3 9855; SSE41-NEXT: psadbw %xmm1, %xmm0 9856; SSE41-NEXT: packuswb %xmm3, %xmm0 9857; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 9858; SSE41-NEXT: retq 9859; 9860; AVX1-LABEL: ugt_12_v4i32: 9861; AVX1: # %bb.0: 9862; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9863; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 9864; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9865; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9866; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 9867; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 9868; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9869; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9870; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 9871; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9872; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9873; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9874; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9875; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9876; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 9877; AVX1-NEXT: retq 9878; 9879; AVX2-LABEL: ugt_12_v4i32: 9880; AVX2: # %bb.0: 9881; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 9882; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 9883; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 9884; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 9885; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 9886; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 9887; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 9888; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 9889; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 9890; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9891; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9892; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9893; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9894; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9895; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] 9896; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9897; AVX2-NEXT: retq 9898; 9899; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i32: 9900; AVX512VPOPCNTDQ: # %bb.0: 9901; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9902; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 9903; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] 9904; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9905; AVX512VPOPCNTDQ-NEXT: vzeroupper 9906; AVX512VPOPCNTDQ-NEXT: retq 9907; 9908; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i32: 9909; AVX512VPOPCNTDQVL: # %bb.0: 9910; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 9911; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 9912; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9913; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9914; AVX512VPOPCNTDQVL-NEXT: retq 9915; 9916; BITALG_NOVLX-LABEL: ugt_12_v4i32: 9917; BITALG_NOVLX: # %bb.0: 9918; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 9919; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 9920; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 9921; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9922; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9923; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9924; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9925; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9926; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] 9927; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 9928; BITALG_NOVLX-NEXT: vzeroupper 9929; BITALG_NOVLX-NEXT: retq 9930; 9931; BITALG-LABEL: ugt_12_v4i32: 9932; BITALG: # %bb.0: 9933; BITALG-NEXT: vpopcntb %xmm0, %xmm0 9934; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 9935; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 9936; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 9937; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 9938; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 9939; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 9940; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 9941; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 9942; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 9943; BITALG-NEXT: retq 9944 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 9945 %3 = icmp ugt <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12> 9946 %4 = sext <4 x i1> %3 to <4 x i32> 9947 ret <4 x i32> %4 9948} 9949 9950define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { 9951; SSE2-LABEL: ult_13_v4i32: 9952; SSE2: # %bb.0: 9953; SSE2-NEXT: movdqa %xmm0, %xmm1 9954; SSE2-NEXT: psrlw $1, %xmm1 9955; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9956; SSE2-NEXT: psubb %xmm1, %xmm0 9957; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9958; SSE2-NEXT: movdqa %xmm0, %xmm2 9959; SSE2-NEXT: pand %xmm1, %xmm2 9960; SSE2-NEXT: psrlw $2, %xmm0 9961; SSE2-NEXT: pand %xmm1, %xmm0 9962; SSE2-NEXT: paddb %xmm2, %xmm0 9963; SSE2-NEXT: movdqa %xmm0, %xmm1 9964; SSE2-NEXT: psrlw $4, %xmm1 9965; SSE2-NEXT: paddb %xmm0, %xmm1 9966; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 9967; SSE2-NEXT: pxor %xmm0, %xmm0 9968; SSE2-NEXT: movdqa %xmm1, %xmm2 9969; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9970; SSE2-NEXT: psadbw %xmm0, %xmm2 9971; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9972; SSE2-NEXT: psadbw %xmm0, %xmm1 9973; SSE2-NEXT: packuswb %xmm2, %xmm1 9974; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] 9975; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 9976; SSE2-NEXT: retq 9977; 9978; SSE3-LABEL: ult_13_v4i32: 9979; SSE3: # %bb.0: 9980; SSE3-NEXT: movdqa %xmm0, %xmm1 9981; SSE3-NEXT: psrlw $1, %xmm1 9982; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9983; SSE3-NEXT: psubb %xmm1, %xmm0 9984; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 9985; SSE3-NEXT: movdqa %xmm0, %xmm2 9986; SSE3-NEXT: pand %xmm1, %xmm2 9987; SSE3-NEXT: psrlw $2, %xmm0 9988; SSE3-NEXT: pand %xmm1, %xmm0 9989; SSE3-NEXT: paddb %xmm2, %xmm0 9990; SSE3-NEXT: movdqa %xmm0, %xmm1 9991; SSE3-NEXT: psrlw $4, %xmm1 9992; SSE3-NEXT: paddb %xmm0, %xmm1 9993; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 9994; SSE3-NEXT: pxor %xmm0, %xmm0 9995; SSE3-NEXT: movdqa %xmm1, %xmm2 9996; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 9997; SSE3-NEXT: psadbw %xmm0, %xmm2 9998; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9999; SSE3-NEXT: psadbw %xmm0, %xmm1 10000; SSE3-NEXT: packuswb %xmm2, %xmm1 10001; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] 10002; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 10003; SSE3-NEXT: retq 10004; 10005; SSSE3-LABEL: ult_13_v4i32: 10006; SSSE3: # %bb.0: 10007; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10008; SSSE3-NEXT: movdqa %xmm0, %xmm2 10009; SSSE3-NEXT: pand %xmm1, %xmm2 10010; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10011; SSSE3-NEXT: movdqa %xmm3, %xmm4 10012; SSSE3-NEXT: pshufb %xmm2, %xmm4 10013; SSSE3-NEXT: psrlw $4, %xmm0 10014; SSSE3-NEXT: pand %xmm1, %xmm0 10015; SSSE3-NEXT: pshufb %xmm0, %xmm3 10016; SSSE3-NEXT: paddb %xmm4, %xmm3 10017; SSSE3-NEXT: pxor %xmm0, %xmm0 10018; SSSE3-NEXT: movdqa %xmm3, %xmm1 10019; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 10020; SSSE3-NEXT: psadbw %xmm0, %xmm1 10021; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 10022; SSSE3-NEXT: psadbw %xmm0, %xmm3 10023; SSSE3-NEXT: packuswb %xmm1, %xmm3 10024; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] 10025; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 10026; SSSE3-NEXT: retq 10027; 10028; SSE41-LABEL: ult_13_v4i32: 10029; SSE41: # %bb.0: 10030; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10031; SSE41-NEXT: movdqa %xmm0, %xmm2 10032; SSE41-NEXT: pand %xmm1, %xmm2 10033; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10034; SSE41-NEXT: movdqa %xmm3, %xmm4 10035; SSE41-NEXT: pshufb %xmm2, %xmm4 10036; SSE41-NEXT: psrlw $4, %xmm0 10037; SSE41-NEXT: pand %xmm1, %xmm0 10038; SSE41-NEXT: pshufb %xmm0, %xmm3 10039; SSE41-NEXT: paddb %xmm4, %xmm3 10040; SSE41-NEXT: pxor %xmm0, %xmm0 10041; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 10042; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 10043; SSE41-NEXT: psadbw %xmm0, %xmm3 10044; SSE41-NEXT: psadbw %xmm0, %xmm1 10045; SSE41-NEXT: packuswb %xmm3, %xmm1 10046; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] 10047; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 10048; SSE41-NEXT: retq 10049; 10050; AVX1-LABEL: ult_13_v4i32: 10051; AVX1: # %bb.0: 10052; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10053; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 10054; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10055; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10056; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 10057; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 10058; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10059; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10060; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 10061; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10062; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10063; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10064; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10065; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10066; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13] 10067; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10068; AVX1-NEXT: retq 10069; 10070; AVX2-LABEL: ult_13_v4i32: 10071; AVX2: # %bb.0: 10072; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10073; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 10074; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10075; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10076; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 10077; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 10078; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10079; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10080; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 10081; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10082; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10083; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10084; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10085; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10086; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] 10087; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10088; AVX2-NEXT: retq 10089; 10090; AVX512VPOPCNTDQ-LABEL: ult_13_v4i32: 10091; AVX512VPOPCNTDQ: # %bb.0: 10092; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10093; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 10094; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] 10095; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10096; AVX512VPOPCNTDQ-NEXT: vzeroupper 10097; AVX512VPOPCNTDQ-NEXT: retq 10098; 10099; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i32: 10100; AVX512VPOPCNTDQVL: # %bb.0: 10101; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 10102; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 10103; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10104; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10105; AVX512VPOPCNTDQVL-NEXT: retq 10106; 10107; BITALG_NOVLX-LABEL: ult_13_v4i32: 10108; BITALG_NOVLX: # %bb.0: 10109; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10110; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 10111; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 10112; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10113; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10114; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10115; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10116; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10117; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] 10118; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10119; BITALG_NOVLX-NEXT: vzeroupper 10120; BITALG_NOVLX-NEXT: retq 10121; 10122; BITALG-LABEL: ult_13_v4i32: 10123; BITALG: # %bb.0: 10124; BITALG-NEXT: vpopcntb %xmm0, %xmm0 10125; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 10126; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10127; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10128; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10129; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10130; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10131; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 10132; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10133; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10134; BITALG-NEXT: retq 10135 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 10136 %3 = icmp ult <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13> 10137 %4 = sext <4 x i1> %3 to <4 x i32> 10138 ret <4 x i32> %4 10139} 10140 10141define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { 10142; SSE2-LABEL: ugt_13_v4i32: 10143; SSE2: # %bb.0: 10144; SSE2-NEXT: movdqa %xmm0, %xmm1 10145; SSE2-NEXT: psrlw $1, %xmm1 10146; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10147; SSE2-NEXT: psubb %xmm1, %xmm0 10148; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10149; SSE2-NEXT: movdqa %xmm0, %xmm2 10150; SSE2-NEXT: pand %xmm1, %xmm2 10151; SSE2-NEXT: psrlw $2, %xmm0 10152; SSE2-NEXT: pand %xmm1, %xmm0 10153; SSE2-NEXT: paddb %xmm2, %xmm0 10154; SSE2-NEXT: movdqa %xmm0, %xmm1 10155; SSE2-NEXT: psrlw $4, %xmm1 10156; SSE2-NEXT: paddb %xmm0, %xmm1 10157; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10158; SSE2-NEXT: pxor %xmm0, %xmm0 10159; SSE2-NEXT: movdqa %xmm1, %xmm2 10160; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10161; SSE2-NEXT: psadbw %xmm0, %xmm2 10162; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10163; SSE2-NEXT: psadbw %xmm0, %xmm1 10164; SSE2-NEXT: packuswb %xmm2, %xmm1 10165; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10166; SSE2-NEXT: movdqa %xmm1, %xmm0 10167; SSE2-NEXT: retq 10168; 10169; SSE3-LABEL: ugt_13_v4i32: 10170; SSE3: # %bb.0: 10171; SSE3-NEXT: movdqa %xmm0, %xmm1 10172; SSE3-NEXT: psrlw $1, %xmm1 10173; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10174; SSE3-NEXT: psubb %xmm1, %xmm0 10175; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10176; SSE3-NEXT: movdqa %xmm0, %xmm2 10177; SSE3-NEXT: pand %xmm1, %xmm2 10178; SSE3-NEXT: psrlw $2, %xmm0 10179; SSE3-NEXT: pand %xmm1, %xmm0 10180; SSE3-NEXT: paddb %xmm2, %xmm0 10181; SSE3-NEXT: movdqa %xmm0, %xmm1 10182; SSE3-NEXT: psrlw $4, %xmm1 10183; SSE3-NEXT: paddb %xmm0, %xmm1 10184; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10185; SSE3-NEXT: pxor %xmm0, %xmm0 10186; SSE3-NEXT: movdqa %xmm1, %xmm2 10187; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10188; SSE3-NEXT: psadbw %xmm0, %xmm2 10189; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10190; SSE3-NEXT: psadbw %xmm0, %xmm1 10191; SSE3-NEXT: packuswb %xmm2, %xmm1 10192; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10193; SSE3-NEXT: movdqa %xmm1, %xmm0 10194; SSE3-NEXT: retq 10195; 10196; SSSE3-LABEL: ugt_13_v4i32: 10197; SSSE3: # %bb.0: 10198; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10199; SSSE3-NEXT: movdqa %xmm0, %xmm3 10200; SSSE3-NEXT: pand %xmm2, %xmm3 10201; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10202; SSSE3-NEXT: movdqa %xmm1, %xmm4 10203; SSSE3-NEXT: pshufb %xmm3, %xmm4 10204; SSSE3-NEXT: psrlw $4, %xmm0 10205; SSSE3-NEXT: pand %xmm2, %xmm0 10206; SSSE3-NEXT: pshufb %xmm0, %xmm1 10207; SSSE3-NEXT: paddb %xmm4, %xmm1 10208; SSSE3-NEXT: pxor %xmm0, %xmm0 10209; SSSE3-NEXT: movdqa %xmm1, %xmm2 10210; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10211; SSSE3-NEXT: psadbw %xmm0, %xmm2 10212; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10213; SSSE3-NEXT: psadbw %xmm0, %xmm1 10214; SSSE3-NEXT: packuswb %xmm2, %xmm1 10215; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10216; SSSE3-NEXT: movdqa %xmm1, %xmm0 10217; SSSE3-NEXT: retq 10218; 10219; SSE41-LABEL: ugt_13_v4i32: 10220; SSE41: # %bb.0: 10221; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10222; SSE41-NEXT: movdqa %xmm0, %xmm2 10223; SSE41-NEXT: pand %xmm1, %xmm2 10224; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10225; SSE41-NEXT: movdqa %xmm3, %xmm4 10226; SSE41-NEXT: pshufb %xmm2, %xmm4 10227; SSE41-NEXT: psrlw $4, %xmm0 10228; SSE41-NEXT: pand %xmm1, %xmm0 10229; SSE41-NEXT: pshufb %xmm0, %xmm3 10230; SSE41-NEXT: paddb %xmm4, %xmm3 10231; SSE41-NEXT: pxor %xmm1, %xmm1 10232; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 10233; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 10234; SSE41-NEXT: psadbw %xmm1, %xmm3 10235; SSE41-NEXT: psadbw %xmm1, %xmm0 10236; SSE41-NEXT: packuswb %xmm3, %xmm0 10237; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 10238; SSE41-NEXT: retq 10239; 10240; AVX1-LABEL: ugt_13_v4i32: 10241; AVX1: # %bb.0: 10242; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10243; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 10244; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10245; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10246; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 10247; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 10248; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10249; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10250; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 10251; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10252; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10253; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10254; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10255; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10256; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 10257; AVX1-NEXT: retq 10258; 10259; AVX2-LABEL: ugt_13_v4i32: 10260; AVX2: # %bb.0: 10261; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10262; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 10263; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10264; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10265; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 10266; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 10267; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10268; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10269; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 10270; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10271; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10272; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10273; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10274; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10275; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] 10276; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 10277; AVX2-NEXT: retq 10278; 10279; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i32: 10280; AVX512VPOPCNTDQ: # %bb.0: 10281; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10282; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 10283; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] 10284; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 10285; AVX512VPOPCNTDQ-NEXT: vzeroupper 10286; AVX512VPOPCNTDQ-NEXT: retq 10287; 10288; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i32: 10289; AVX512VPOPCNTDQVL: # %bb.0: 10290; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 10291; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 10292; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10293; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10294; AVX512VPOPCNTDQVL-NEXT: retq 10295; 10296; BITALG_NOVLX-LABEL: ugt_13_v4i32: 10297; BITALG_NOVLX: # %bb.0: 10298; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10299; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 10300; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 10301; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10302; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10303; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10304; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10305; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10306; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] 10307; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 10308; BITALG_NOVLX-NEXT: vzeroupper 10309; BITALG_NOVLX-NEXT: retq 10310; 10311; BITALG-LABEL: ugt_13_v4i32: 10312; BITALG: # %bb.0: 10313; BITALG-NEXT: vpopcntb %xmm0, %xmm0 10314; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 10315; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10316; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10317; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10318; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10319; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10320; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 10321; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10322; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10323; BITALG-NEXT: retq 10324 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 10325 %3 = icmp ugt <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13> 10326 %4 = sext <4 x i1> %3 to <4 x i32> 10327 ret <4 x i32> %4 10328} 10329 10330define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { 10331; SSE2-LABEL: ult_14_v4i32: 10332; SSE2: # %bb.0: 10333; SSE2-NEXT: movdqa %xmm0, %xmm1 10334; SSE2-NEXT: psrlw $1, %xmm1 10335; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10336; SSE2-NEXT: psubb %xmm1, %xmm0 10337; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10338; SSE2-NEXT: movdqa %xmm0, %xmm2 10339; SSE2-NEXT: pand %xmm1, %xmm2 10340; SSE2-NEXT: psrlw $2, %xmm0 10341; SSE2-NEXT: pand %xmm1, %xmm0 10342; SSE2-NEXT: paddb %xmm2, %xmm0 10343; SSE2-NEXT: movdqa %xmm0, %xmm1 10344; SSE2-NEXT: psrlw $4, %xmm1 10345; SSE2-NEXT: paddb %xmm0, %xmm1 10346; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10347; SSE2-NEXT: pxor %xmm0, %xmm0 10348; SSE2-NEXT: movdqa %xmm1, %xmm2 10349; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10350; SSE2-NEXT: psadbw %xmm0, %xmm2 10351; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10352; SSE2-NEXT: psadbw %xmm0, %xmm1 10353; SSE2-NEXT: packuswb %xmm2, %xmm1 10354; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] 10355; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 10356; SSE2-NEXT: retq 10357; 10358; SSE3-LABEL: ult_14_v4i32: 10359; SSE3: # %bb.0: 10360; SSE3-NEXT: movdqa %xmm0, %xmm1 10361; SSE3-NEXT: psrlw $1, %xmm1 10362; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10363; SSE3-NEXT: psubb %xmm1, %xmm0 10364; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10365; SSE3-NEXT: movdqa %xmm0, %xmm2 10366; SSE3-NEXT: pand %xmm1, %xmm2 10367; SSE3-NEXT: psrlw $2, %xmm0 10368; SSE3-NEXT: pand %xmm1, %xmm0 10369; SSE3-NEXT: paddb %xmm2, %xmm0 10370; SSE3-NEXT: movdqa %xmm0, %xmm1 10371; SSE3-NEXT: psrlw $4, %xmm1 10372; SSE3-NEXT: paddb %xmm0, %xmm1 10373; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10374; SSE3-NEXT: pxor %xmm0, %xmm0 10375; SSE3-NEXT: movdqa %xmm1, %xmm2 10376; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10377; SSE3-NEXT: psadbw %xmm0, %xmm2 10378; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10379; SSE3-NEXT: psadbw %xmm0, %xmm1 10380; SSE3-NEXT: packuswb %xmm2, %xmm1 10381; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] 10382; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 10383; SSE3-NEXT: retq 10384; 10385; SSSE3-LABEL: ult_14_v4i32: 10386; SSSE3: # %bb.0: 10387; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10388; SSSE3-NEXT: movdqa %xmm0, %xmm2 10389; SSSE3-NEXT: pand %xmm1, %xmm2 10390; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10391; SSSE3-NEXT: movdqa %xmm3, %xmm4 10392; SSSE3-NEXT: pshufb %xmm2, %xmm4 10393; SSSE3-NEXT: psrlw $4, %xmm0 10394; SSSE3-NEXT: pand %xmm1, %xmm0 10395; SSSE3-NEXT: pshufb %xmm0, %xmm3 10396; SSSE3-NEXT: paddb %xmm4, %xmm3 10397; SSSE3-NEXT: pxor %xmm0, %xmm0 10398; SSSE3-NEXT: movdqa %xmm3, %xmm1 10399; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 10400; SSSE3-NEXT: psadbw %xmm0, %xmm1 10401; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 10402; SSSE3-NEXT: psadbw %xmm0, %xmm3 10403; SSSE3-NEXT: packuswb %xmm1, %xmm3 10404; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] 10405; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 10406; SSSE3-NEXT: retq 10407; 10408; SSE41-LABEL: ult_14_v4i32: 10409; SSE41: # %bb.0: 10410; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10411; SSE41-NEXT: movdqa %xmm0, %xmm2 10412; SSE41-NEXT: pand %xmm1, %xmm2 10413; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10414; SSE41-NEXT: movdqa %xmm3, %xmm4 10415; SSE41-NEXT: pshufb %xmm2, %xmm4 10416; SSE41-NEXT: psrlw $4, %xmm0 10417; SSE41-NEXT: pand %xmm1, %xmm0 10418; SSE41-NEXT: pshufb %xmm0, %xmm3 10419; SSE41-NEXT: paddb %xmm4, %xmm3 10420; SSE41-NEXT: pxor %xmm0, %xmm0 10421; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 10422; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 10423; SSE41-NEXT: psadbw %xmm0, %xmm3 10424; SSE41-NEXT: psadbw %xmm0, %xmm1 10425; SSE41-NEXT: packuswb %xmm3, %xmm1 10426; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] 10427; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 10428; SSE41-NEXT: retq 10429; 10430; AVX1-LABEL: ult_14_v4i32: 10431; AVX1: # %bb.0: 10432; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10433; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 10434; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10435; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10436; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 10437; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 10438; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10439; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10440; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 10441; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10442; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10443; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10444; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10445; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10446; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14] 10447; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10448; AVX1-NEXT: retq 10449; 10450; AVX2-LABEL: ult_14_v4i32: 10451; AVX2: # %bb.0: 10452; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10453; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 10454; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10455; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10456; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 10457; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 10458; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10459; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10460; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 10461; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10462; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10463; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10464; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10465; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10466; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] 10467; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10468; AVX2-NEXT: retq 10469; 10470; AVX512VPOPCNTDQ-LABEL: ult_14_v4i32: 10471; AVX512VPOPCNTDQ: # %bb.0: 10472; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10473; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 10474; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] 10475; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10476; AVX512VPOPCNTDQ-NEXT: vzeroupper 10477; AVX512VPOPCNTDQ-NEXT: retq 10478; 10479; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i32: 10480; AVX512VPOPCNTDQVL: # %bb.0: 10481; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 10482; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 10483; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10484; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10485; AVX512VPOPCNTDQVL-NEXT: retq 10486; 10487; BITALG_NOVLX-LABEL: ult_14_v4i32: 10488; BITALG_NOVLX: # %bb.0: 10489; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10490; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 10491; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 10492; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10493; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10494; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10495; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10496; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10497; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] 10498; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10499; BITALG_NOVLX-NEXT: vzeroupper 10500; BITALG_NOVLX-NEXT: retq 10501; 10502; BITALG-LABEL: ult_14_v4i32: 10503; BITALG: # %bb.0: 10504; BITALG-NEXT: vpopcntb %xmm0, %xmm0 10505; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 10506; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10507; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10508; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10509; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10510; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10511; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 10512; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10513; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10514; BITALG-NEXT: retq 10515 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 10516 %3 = icmp ult <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14> 10517 %4 = sext <4 x i1> %3 to <4 x i32> 10518 ret <4 x i32> %4 10519} 10520 10521define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { 10522; SSE2-LABEL: ugt_14_v4i32: 10523; SSE2: # %bb.0: 10524; SSE2-NEXT: movdqa %xmm0, %xmm1 10525; SSE2-NEXT: psrlw $1, %xmm1 10526; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10527; SSE2-NEXT: psubb %xmm1, %xmm0 10528; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10529; SSE2-NEXT: movdqa %xmm0, %xmm2 10530; SSE2-NEXT: pand %xmm1, %xmm2 10531; SSE2-NEXT: psrlw $2, %xmm0 10532; SSE2-NEXT: pand %xmm1, %xmm0 10533; SSE2-NEXT: paddb %xmm2, %xmm0 10534; SSE2-NEXT: movdqa %xmm0, %xmm1 10535; SSE2-NEXT: psrlw $4, %xmm1 10536; SSE2-NEXT: paddb %xmm0, %xmm1 10537; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10538; SSE2-NEXT: pxor %xmm0, %xmm0 10539; SSE2-NEXT: movdqa %xmm1, %xmm2 10540; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10541; SSE2-NEXT: psadbw %xmm0, %xmm2 10542; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10543; SSE2-NEXT: psadbw %xmm0, %xmm1 10544; SSE2-NEXT: packuswb %xmm2, %xmm1 10545; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10546; SSE2-NEXT: movdqa %xmm1, %xmm0 10547; SSE2-NEXT: retq 10548; 10549; SSE3-LABEL: ugt_14_v4i32: 10550; SSE3: # %bb.0: 10551; SSE3-NEXT: movdqa %xmm0, %xmm1 10552; SSE3-NEXT: psrlw $1, %xmm1 10553; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10554; SSE3-NEXT: psubb %xmm1, %xmm0 10555; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10556; SSE3-NEXT: movdqa %xmm0, %xmm2 10557; SSE3-NEXT: pand %xmm1, %xmm2 10558; SSE3-NEXT: psrlw $2, %xmm0 10559; SSE3-NEXT: pand %xmm1, %xmm0 10560; SSE3-NEXT: paddb %xmm2, %xmm0 10561; SSE3-NEXT: movdqa %xmm0, %xmm1 10562; SSE3-NEXT: psrlw $4, %xmm1 10563; SSE3-NEXT: paddb %xmm0, %xmm1 10564; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10565; SSE3-NEXT: pxor %xmm0, %xmm0 10566; SSE3-NEXT: movdqa %xmm1, %xmm2 10567; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10568; SSE3-NEXT: psadbw %xmm0, %xmm2 10569; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10570; SSE3-NEXT: psadbw %xmm0, %xmm1 10571; SSE3-NEXT: packuswb %xmm2, %xmm1 10572; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10573; SSE3-NEXT: movdqa %xmm1, %xmm0 10574; SSE3-NEXT: retq 10575; 10576; SSSE3-LABEL: ugt_14_v4i32: 10577; SSSE3: # %bb.0: 10578; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10579; SSSE3-NEXT: movdqa %xmm0, %xmm3 10580; SSSE3-NEXT: pand %xmm2, %xmm3 10581; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10582; SSSE3-NEXT: movdqa %xmm1, %xmm4 10583; SSSE3-NEXT: pshufb %xmm3, %xmm4 10584; SSSE3-NEXT: psrlw $4, %xmm0 10585; SSSE3-NEXT: pand %xmm2, %xmm0 10586; SSSE3-NEXT: pshufb %xmm0, %xmm1 10587; SSSE3-NEXT: paddb %xmm4, %xmm1 10588; SSSE3-NEXT: pxor %xmm0, %xmm0 10589; SSSE3-NEXT: movdqa %xmm1, %xmm2 10590; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10591; SSSE3-NEXT: psadbw %xmm0, %xmm2 10592; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10593; SSSE3-NEXT: psadbw %xmm0, %xmm1 10594; SSSE3-NEXT: packuswb %xmm2, %xmm1 10595; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10596; SSSE3-NEXT: movdqa %xmm1, %xmm0 10597; SSSE3-NEXT: retq 10598; 10599; SSE41-LABEL: ugt_14_v4i32: 10600; SSE41: # %bb.0: 10601; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10602; SSE41-NEXT: movdqa %xmm0, %xmm2 10603; SSE41-NEXT: pand %xmm1, %xmm2 10604; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10605; SSE41-NEXT: movdqa %xmm3, %xmm4 10606; SSE41-NEXT: pshufb %xmm2, %xmm4 10607; SSE41-NEXT: psrlw $4, %xmm0 10608; SSE41-NEXT: pand %xmm1, %xmm0 10609; SSE41-NEXT: pshufb %xmm0, %xmm3 10610; SSE41-NEXT: paddb %xmm4, %xmm3 10611; SSE41-NEXT: pxor %xmm1, %xmm1 10612; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 10613; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 10614; SSE41-NEXT: psadbw %xmm1, %xmm3 10615; SSE41-NEXT: psadbw %xmm1, %xmm0 10616; SSE41-NEXT: packuswb %xmm3, %xmm0 10617; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 10618; SSE41-NEXT: retq 10619; 10620; AVX1-LABEL: ugt_14_v4i32: 10621; AVX1: # %bb.0: 10622; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10623; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 10624; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10625; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10626; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 10627; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 10628; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10629; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10630; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 10631; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10632; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10633; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10634; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10635; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10636; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 10637; AVX1-NEXT: retq 10638; 10639; AVX2-LABEL: ugt_14_v4i32: 10640; AVX2: # %bb.0: 10641; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10642; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 10643; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10644; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10645; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 10646; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 10647; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10648; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10649; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 10650; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10651; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10652; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10653; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10654; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10655; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] 10656; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 10657; AVX2-NEXT: retq 10658; 10659; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i32: 10660; AVX512VPOPCNTDQ: # %bb.0: 10661; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10662; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 10663; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] 10664; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 10665; AVX512VPOPCNTDQ-NEXT: vzeroupper 10666; AVX512VPOPCNTDQ-NEXT: retq 10667; 10668; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i32: 10669; AVX512VPOPCNTDQVL: # %bb.0: 10670; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 10671; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 10672; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10673; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10674; AVX512VPOPCNTDQVL-NEXT: retq 10675; 10676; BITALG_NOVLX-LABEL: ugt_14_v4i32: 10677; BITALG_NOVLX: # %bb.0: 10678; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10679; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 10680; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 10681; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10682; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10683; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10684; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10685; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10686; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] 10687; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 10688; BITALG_NOVLX-NEXT: vzeroupper 10689; BITALG_NOVLX-NEXT: retq 10690; 10691; BITALG-LABEL: ugt_14_v4i32: 10692; BITALG: # %bb.0: 10693; BITALG-NEXT: vpopcntb %xmm0, %xmm0 10694; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 10695; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10696; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10697; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10698; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10699; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10700; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 10701; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10702; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10703; BITALG-NEXT: retq 10704 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 10705 %3 = icmp ugt <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14> 10706 %4 = sext <4 x i1> %3 to <4 x i32> 10707 ret <4 x i32> %4 10708} 10709 10710define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { 10711; SSE2-LABEL: ult_15_v4i32: 10712; SSE2: # %bb.0: 10713; SSE2-NEXT: movdqa %xmm0, %xmm1 10714; SSE2-NEXT: psrlw $1, %xmm1 10715; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10716; SSE2-NEXT: psubb %xmm1, %xmm0 10717; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10718; SSE2-NEXT: movdqa %xmm0, %xmm2 10719; SSE2-NEXT: pand %xmm1, %xmm2 10720; SSE2-NEXT: psrlw $2, %xmm0 10721; SSE2-NEXT: pand %xmm1, %xmm0 10722; SSE2-NEXT: paddb %xmm2, %xmm0 10723; SSE2-NEXT: movdqa %xmm0, %xmm1 10724; SSE2-NEXT: psrlw $4, %xmm1 10725; SSE2-NEXT: paddb %xmm0, %xmm1 10726; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10727; SSE2-NEXT: pxor %xmm0, %xmm0 10728; SSE2-NEXT: movdqa %xmm1, %xmm2 10729; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10730; SSE2-NEXT: psadbw %xmm0, %xmm2 10731; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10732; SSE2-NEXT: psadbw %xmm0, %xmm1 10733; SSE2-NEXT: packuswb %xmm2, %xmm1 10734; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] 10735; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 10736; SSE2-NEXT: retq 10737; 10738; SSE3-LABEL: ult_15_v4i32: 10739; SSE3: # %bb.0: 10740; SSE3-NEXT: movdqa %xmm0, %xmm1 10741; SSE3-NEXT: psrlw $1, %xmm1 10742; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10743; SSE3-NEXT: psubb %xmm1, %xmm0 10744; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10745; SSE3-NEXT: movdqa %xmm0, %xmm2 10746; SSE3-NEXT: pand %xmm1, %xmm2 10747; SSE3-NEXT: psrlw $2, %xmm0 10748; SSE3-NEXT: pand %xmm1, %xmm0 10749; SSE3-NEXT: paddb %xmm2, %xmm0 10750; SSE3-NEXT: movdqa %xmm0, %xmm1 10751; SSE3-NEXT: psrlw $4, %xmm1 10752; SSE3-NEXT: paddb %xmm0, %xmm1 10753; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10754; SSE3-NEXT: pxor %xmm0, %xmm0 10755; SSE3-NEXT: movdqa %xmm1, %xmm2 10756; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10757; SSE3-NEXT: psadbw %xmm0, %xmm2 10758; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10759; SSE3-NEXT: psadbw %xmm0, %xmm1 10760; SSE3-NEXT: packuswb %xmm2, %xmm1 10761; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] 10762; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 10763; SSE3-NEXT: retq 10764; 10765; SSSE3-LABEL: ult_15_v4i32: 10766; SSSE3: # %bb.0: 10767; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10768; SSSE3-NEXT: movdqa %xmm0, %xmm2 10769; SSSE3-NEXT: pand %xmm1, %xmm2 10770; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10771; SSSE3-NEXT: movdqa %xmm3, %xmm4 10772; SSSE3-NEXT: pshufb %xmm2, %xmm4 10773; SSSE3-NEXT: psrlw $4, %xmm0 10774; SSSE3-NEXT: pand %xmm1, %xmm0 10775; SSSE3-NEXT: pshufb %xmm0, %xmm3 10776; SSSE3-NEXT: paddb %xmm4, %xmm3 10777; SSSE3-NEXT: pxor %xmm0, %xmm0 10778; SSSE3-NEXT: movdqa %xmm3, %xmm1 10779; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 10780; SSSE3-NEXT: psadbw %xmm0, %xmm1 10781; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 10782; SSSE3-NEXT: psadbw %xmm0, %xmm3 10783; SSSE3-NEXT: packuswb %xmm1, %xmm3 10784; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] 10785; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 10786; SSSE3-NEXT: retq 10787; 10788; SSE41-LABEL: ult_15_v4i32: 10789; SSE41: # %bb.0: 10790; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10791; SSE41-NEXT: movdqa %xmm0, %xmm2 10792; SSE41-NEXT: pand %xmm1, %xmm2 10793; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10794; SSE41-NEXT: movdqa %xmm3, %xmm4 10795; SSE41-NEXT: pshufb %xmm2, %xmm4 10796; SSE41-NEXT: psrlw $4, %xmm0 10797; SSE41-NEXT: pand %xmm1, %xmm0 10798; SSE41-NEXT: pshufb %xmm0, %xmm3 10799; SSE41-NEXT: paddb %xmm4, %xmm3 10800; SSE41-NEXT: pxor %xmm0, %xmm0 10801; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 10802; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 10803; SSE41-NEXT: psadbw %xmm0, %xmm3 10804; SSE41-NEXT: psadbw %xmm0, %xmm1 10805; SSE41-NEXT: packuswb %xmm3, %xmm1 10806; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] 10807; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 10808; SSE41-NEXT: retq 10809; 10810; AVX1-LABEL: ult_15_v4i32: 10811; AVX1: # %bb.0: 10812; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10813; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 10814; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10815; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10816; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 10817; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 10818; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10819; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10820; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 10821; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10822; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10823; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10824; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10825; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10826; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15] 10827; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10828; AVX1-NEXT: retq 10829; 10830; AVX2-LABEL: ult_15_v4i32: 10831; AVX2: # %bb.0: 10832; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10833; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 10834; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10835; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 10836; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 10837; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 10838; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 10839; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 10840; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 10841; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10842; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10843; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10844; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10845; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10846; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 10847; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10848; AVX2-NEXT: retq 10849; 10850; AVX512VPOPCNTDQ-LABEL: ult_15_v4i32: 10851; AVX512VPOPCNTDQ: # %bb.0: 10852; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10853; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 10854; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 10855; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10856; AVX512VPOPCNTDQ-NEXT: vzeroupper 10857; AVX512VPOPCNTDQ-NEXT: retq 10858; 10859; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i32: 10860; AVX512VPOPCNTDQVL: # %bb.0: 10861; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 10862; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 10863; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10864; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10865; AVX512VPOPCNTDQVL-NEXT: retq 10866; 10867; BITALG_NOVLX-LABEL: ult_15_v4i32: 10868; BITALG_NOVLX: # %bb.0: 10869; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10870; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 10871; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 10872; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10873; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10874; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10875; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10876; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10877; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 10878; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 10879; BITALG_NOVLX-NEXT: vzeroupper 10880; BITALG_NOVLX-NEXT: retq 10881; 10882; BITALG-LABEL: ult_15_v4i32: 10883; BITALG: # %bb.0: 10884; BITALG-NEXT: vpopcntb %xmm0, %xmm0 10885; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 10886; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 10887; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 10888; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 10889; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 10890; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 10891; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 10892; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 10893; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 10894; BITALG-NEXT: retq 10895 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 10896 %3 = icmp ult <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15> 10897 %4 = sext <4 x i1> %3 to <4 x i32> 10898 ret <4 x i32> %4 10899} 10900 10901define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { 10902; SSE2-LABEL: ugt_15_v4i32: 10903; SSE2: # %bb.0: 10904; SSE2-NEXT: movdqa %xmm0, %xmm1 10905; SSE2-NEXT: psrlw $1, %xmm1 10906; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10907; SSE2-NEXT: psubb %xmm1, %xmm0 10908; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10909; SSE2-NEXT: movdqa %xmm0, %xmm2 10910; SSE2-NEXT: pand %xmm1, %xmm2 10911; SSE2-NEXT: psrlw $2, %xmm0 10912; SSE2-NEXT: pand %xmm1, %xmm0 10913; SSE2-NEXT: paddb %xmm2, %xmm0 10914; SSE2-NEXT: movdqa %xmm0, %xmm1 10915; SSE2-NEXT: psrlw $4, %xmm1 10916; SSE2-NEXT: paddb %xmm0, %xmm1 10917; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 10918; SSE2-NEXT: pxor %xmm0, %xmm0 10919; SSE2-NEXT: movdqa %xmm1, %xmm2 10920; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10921; SSE2-NEXT: psadbw %xmm0, %xmm2 10922; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10923; SSE2-NEXT: psadbw %xmm0, %xmm1 10924; SSE2-NEXT: packuswb %xmm2, %xmm1 10925; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10926; SSE2-NEXT: movdqa %xmm1, %xmm0 10927; SSE2-NEXT: retq 10928; 10929; SSE3-LABEL: ugt_15_v4i32: 10930; SSE3: # %bb.0: 10931; SSE3-NEXT: movdqa %xmm0, %xmm1 10932; SSE3-NEXT: psrlw $1, %xmm1 10933; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10934; SSE3-NEXT: psubb %xmm1, %xmm0 10935; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 10936; SSE3-NEXT: movdqa %xmm0, %xmm2 10937; SSE3-NEXT: pand %xmm1, %xmm2 10938; SSE3-NEXT: psrlw $2, %xmm0 10939; SSE3-NEXT: pand %xmm1, %xmm0 10940; SSE3-NEXT: paddb %xmm2, %xmm0 10941; SSE3-NEXT: movdqa %xmm0, %xmm1 10942; SSE3-NEXT: psrlw $4, %xmm1 10943; SSE3-NEXT: paddb %xmm0, %xmm1 10944; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 10945; SSE3-NEXT: pxor %xmm0, %xmm0 10946; SSE3-NEXT: movdqa %xmm1, %xmm2 10947; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10948; SSE3-NEXT: psadbw %xmm0, %xmm2 10949; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10950; SSE3-NEXT: psadbw %xmm0, %xmm1 10951; SSE3-NEXT: packuswb %xmm2, %xmm1 10952; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10953; SSE3-NEXT: movdqa %xmm1, %xmm0 10954; SSE3-NEXT: retq 10955; 10956; SSSE3-LABEL: ugt_15_v4i32: 10957; SSSE3: # %bb.0: 10958; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10959; SSSE3-NEXT: movdqa %xmm0, %xmm3 10960; SSSE3-NEXT: pand %xmm2, %xmm3 10961; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10962; SSSE3-NEXT: movdqa %xmm1, %xmm4 10963; SSSE3-NEXT: pshufb %xmm3, %xmm4 10964; SSSE3-NEXT: psrlw $4, %xmm0 10965; SSSE3-NEXT: pand %xmm2, %xmm0 10966; SSSE3-NEXT: pshufb %xmm0, %xmm1 10967; SSSE3-NEXT: paddb %xmm4, %xmm1 10968; SSSE3-NEXT: pxor %xmm0, %xmm0 10969; SSSE3-NEXT: movdqa %xmm1, %xmm2 10970; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 10971; SSSE3-NEXT: psadbw %xmm0, %xmm2 10972; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 10973; SSSE3-NEXT: psadbw %xmm0, %xmm1 10974; SSSE3-NEXT: packuswb %xmm2, %xmm1 10975; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 10976; SSSE3-NEXT: movdqa %xmm1, %xmm0 10977; SSSE3-NEXT: retq 10978; 10979; SSE41-LABEL: ugt_15_v4i32: 10980; SSE41: # %bb.0: 10981; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 10982; SSE41-NEXT: movdqa %xmm0, %xmm2 10983; SSE41-NEXT: pand %xmm1, %xmm2 10984; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 10985; SSE41-NEXT: movdqa %xmm3, %xmm4 10986; SSE41-NEXT: pshufb %xmm2, %xmm4 10987; SSE41-NEXT: psrlw $4, %xmm0 10988; SSE41-NEXT: pand %xmm1, %xmm0 10989; SSE41-NEXT: pshufb %xmm0, %xmm3 10990; SSE41-NEXT: paddb %xmm4, %xmm3 10991; SSE41-NEXT: pxor %xmm1, %xmm1 10992; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 10993; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 10994; SSE41-NEXT: psadbw %xmm1, %xmm3 10995; SSE41-NEXT: psadbw %xmm1, %xmm0 10996; SSE41-NEXT: packuswb %xmm3, %xmm0 10997; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 10998; SSE41-NEXT: retq 10999; 11000; AVX1-LABEL: ugt_15_v4i32: 11001; AVX1: # %bb.0: 11002; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11003; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 11004; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11005; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11006; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 11007; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 11008; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11009; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11010; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 11011; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11012; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11013; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11014; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11015; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11016; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 11017; AVX1-NEXT: retq 11018; 11019; AVX2-LABEL: ugt_15_v4i32: 11020; AVX2: # %bb.0: 11021; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11022; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 11023; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11024; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11025; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 11026; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 11027; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11028; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11029; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 11030; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11031; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11032; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11033; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11034; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11035; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 11036; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11037; AVX2-NEXT: retq 11038; 11039; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i32: 11040; AVX512VPOPCNTDQ: # %bb.0: 11041; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11042; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 11043; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 11044; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11045; AVX512VPOPCNTDQ-NEXT: vzeroupper 11046; AVX512VPOPCNTDQ-NEXT: retq 11047; 11048; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i32: 11049; AVX512VPOPCNTDQVL: # %bb.0: 11050; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 11051; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 11052; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11053; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11054; AVX512VPOPCNTDQVL-NEXT: retq 11055; 11056; BITALG_NOVLX-LABEL: ugt_15_v4i32: 11057; BITALG_NOVLX: # %bb.0: 11058; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11059; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 11060; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 11061; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11062; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11063; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11064; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11065; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11066; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 11067; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11068; BITALG_NOVLX-NEXT: vzeroupper 11069; BITALG_NOVLX-NEXT: retq 11070; 11071; BITALG-LABEL: ugt_15_v4i32: 11072; BITALG: # %bb.0: 11073; BITALG-NEXT: vpopcntb %xmm0, %xmm0 11074; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 11075; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11076; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11077; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11078; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11079; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11080; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 11081; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11082; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11083; BITALG-NEXT: retq 11084 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 11085 %3 = icmp ugt <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15> 11086 %4 = sext <4 x i1> %3 to <4 x i32> 11087 ret <4 x i32> %4 11088} 11089 11090define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { 11091; SSE2-LABEL: ult_16_v4i32: 11092; SSE2: # %bb.0: 11093; SSE2-NEXT: movdqa %xmm0, %xmm1 11094; SSE2-NEXT: psrlw $1, %xmm1 11095; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11096; SSE2-NEXT: psubb %xmm1, %xmm0 11097; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11098; SSE2-NEXT: movdqa %xmm0, %xmm2 11099; SSE2-NEXT: pand %xmm1, %xmm2 11100; SSE2-NEXT: psrlw $2, %xmm0 11101; SSE2-NEXT: pand %xmm1, %xmm0 11102; SSE2-NEXT: paddb %xmm2, %xmm0 11103; SSE2-NEXT: movdqa %xmm0, %xmm1 11104; SSE2-NEXT: psrlw $4, %xmm1 11105; SSE2-NEXT: paddb %xmm0, %xmm1 11106; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11107; SSE2-NEXT: pxor %xmm0, %xmm0 11108; SSE2-NEXT: movdqa %xmm1, %xmm2 11109; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11110; SSE2-NEXT: psadbw %xmm0, %xmm2 11111; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11112; SSE2-NEXT: psadbw %xmm0, %xmm1 11113; SSE2-NEXT: packuswb %xmm2, %xmm1 11114; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] 11115; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 11116; SSE2-NEXT: retq 11117; 11118; SSE3-LABEL: ult_16_v4i32: 11119; SSE3: # %bb.0: 11120; SSE3-NEXT: movdqa %xmm0, %xmm1 11121; SSE3-NEXT: psrlw $1, %xmm1 11122; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11123; SSE3-NEXT: psubb %xmm1, %xmm0 11124; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11125; SSE3-NEXT: movdqa %xmm0, %xmm2 11126; SSE3-NEXT: pand %xmm1, %xmm2 11127; SSE3-NEXT: psrlw $2, %xmm0 11128; SSE3-NEXT: pand %xmm1, %xmm0 11129; SSE3-NEXT: paddb %xmm2, %xmm0 11130; SSE3-NEXT: movdqa %xmm0, %xmm1 11131; SSE3-NEXT: psrlw $4, %xmm1 11132; SSE3-NEXT: paddb %xmm0, %xmm1 11133; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11134; SSE3-NEXT: pxor %xmm0, %xmm0 11135; SSE3-NEXT: movdqa %xmm1, %xmm2 11136; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11137; SSE3-NEXT: psadbw %xmm0, %xmm2 11138; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11139; SSE3-NEXT: psadbw %xmm0, %xmm1 11140; SSE3-NEXT: packuswb %xmm2, %xmm1 11141; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] 11142; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 11143; SSE3-NEXT: retq 11144; 11145; SSSE3-LABEL: ult_16_v4i32: 11146; SSSE3: # %bb.0: 11147; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11148; SSSE3-NEXT: movdqa %xmm0, %xmm2 11149; SSSE3-NEXT: pand %xmm1, %xmm2 11150; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11151; SSSE3-NEXT: movdqa %xmm3, %xmm4 11152; SSSE3-NEXT: pshufb %xmm2, %xmm4 11153; SSSE3-NEXT: psrlw $4, %xmm0 11154; SSSE3-NEXT: pand %xmm1, %xmm0 11155; SSSE3-NEXT: pshufb %xmm0, %xmm3 11156; SSSE3-NEXT: paddb %xmm4, %xmm3 11157; SSSE3-NEXT: pxor %xmm0, %xmm0 11158; SSSE3-NEXT: movdqa %xmm3, %xmm1 11159; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 11160; SSSE3-NEXT: psadbw %xmm0, %xmm1 11161; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 11162; SSSE3-NEXT: psadbw %xmm0, %xmm3 11163; SSSE3-NEXT: packuswb %xmm1, %xmm3 11164; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] 11165; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 11166; SSSE3-NEXT: retq 11167; 11168; SSE41-LABEL: ult_16_v4i32: 11169; SSE41: # %bb.0: 11170; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11171; SSE41-NEXT: movdqa %xmm0, %xmm2 11172; SSE41-NEXT: pand %xmm1, %xmm2 11173; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11174; SSE41-NEXT: movdqa %xmm3, %xmm4 11175; SSE41-NEXT: pshufb %xmm2, %xmm4 11176; SSE41-NEXT: psrlw $4, %xmm0 11177; SSE41-NEXT: pand %xmm1, %xmm0 11178; SSE41-NEXT: pshufb %xmm0, %xmm3 11179; SSE41-NEXT: paddb %xmm4, %xmm3 11180; SSE41-NEXT: pxor %xmm0, %xmm0 11181; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 11182; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 11183; SSE41-NEXT: psadbw %xmm0, %xmm3 11184; SSE41-NEXT: psadbw %xmm0, %xmm1 11185; SSE41-NEXT: packuswb %xmm3, %xmm1 11186; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] 11187; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 11188; SSE41-NEXT: retq 11189; 11190; AVX1-LABEL: ult_16_v4i32: 11191; AVX1: # %bb.0: 11192; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11193; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 11194; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11195; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11196; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 11197; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 11198; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11199; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11200; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 11201; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11202; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11203; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11204; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11205; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11206; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16] 11207; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11208; AVX1-NEXT: retq 11209; 11210; AVX2-LABEL: ult_16_v4i32: 11211; AVX2: # %bb.0: 11212; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11213; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 11214; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11215; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11216; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 11217; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 11218; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11219; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11220; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 11221; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11222; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11223; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11224; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11225; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11226; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] 11227; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11228; AVX2-NEXT: retq 11229; 11230; AVX512VPOPCNTDQ-LABEL: ult_16_v4i32: 11231; AVX512VPOPCNTDQ: # %bb.0: 11232; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11233; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 11234; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] 11235; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11236; AVX512VPOPCNTDQ-NEXT: vzeroupper 11237; AVX512VPOPCNTDQ-NEXT: retq 11238; 11239; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i32: 11240; AVX512VPOPCNTDQVL: # %bb.0: 11241; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 11242; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 11243; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11244; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11245; AVX512VPOPCNTDQVL-NEXT: retq 11246; 11247; BITALG_NOVLX-LABEL: ult_16_v4i32: 11248; BITALG_NOVLX: # %bb.0: 11249; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11250; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 11251; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 11252; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11253; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11254; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11255; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11256; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11257; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] 11258; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11259; BITALG_NOVLX-NEXT: vzeroupper 11260; BITALG_NOVLX-NEXT: retq 11261; 11262; BITALG-LABEL: ult_16_v4i32: 11263; BITALG: # %bb.0: 11264; BITALG-NEXT: vpopcntb %xmm0, %xmm0 11265; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 11266; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11267; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11268; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11269; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11270; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11271; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 11272; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11273; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11274; BITALG-NEXT: retq 11275 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 11276 %3 = icmp ult <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16> 11277 %4 = sext <4 x i1> %3 to <4 x i32> 11278 ret <4 x i32> %4 11279} 11280 11281define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { 11282; SSE2-LABEL: ugt_16_v4i32: 11283; SSE2: # %bb.0: 11284; SSE2-NEXT: movdqa %xmm0, %xmm1 11285; SSE2-NEXT: psrlw $1, %xmm1 11286; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11287; SSE2-NEXT: psubb %xmm1, %xmm0 11288; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11289; SSE2-NEXT: movdqa %xmm0, %xmm2 11290; SSE2-NEXT: pand %xmm1, %xmm2 11291; SSE2-NEXT: psrlw $2, %xmm0 11292; SSE2-NEXT: pand %xmm1, %xmm0 11293; SSE2-NEXT: paddb %xmm2, %xmm0 11294; SSE2-NEXT: movdqa %xmm0, %xmm1 11295; SSE2-NEXT: psrlw $4, %xmm1 11296; SSE2-NEXT: paddb %xmm0, %xmm1 11297; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11298; SSE2-NEXT: pxor %xmm0, %xmm0 11299; SSE2-NEXT: movdqa %xmm1, %xmm2 11300; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11301; SSE2-NEXT: psadbw %xmm0, %xmm2 11302; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11303; SSE2-NEXT: psadbw %xmm0, %xmm1 11304; SSE2-NEXT: packuswb %xmm2, %xmm1 11305; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 11306; SSE2-NEXT: movdqa %xmm1, %xmm0 11307; SSE2-NEXT: retq 11308; 11309; SSE3-LABEL: ugt_16_v4i32: 11310; SSE3: # %bb.0: 11311; SSE3-NEXT: movdqa %xmm0, %xmm1 11312; SSE3-NEXT: psrlw $1, %xmm1 11313; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11314; SSE3-NEXT: psubb %xmm1, %xmm0 11315; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11316; SSE3-NEXT: movdqa %xmm0, %xmm2 11317; SSE3-NEXT: pand %xmm1, %xmm2 11318; SSE3-NEXT: psrlw $2, %xmm0 11319; SSE3-NEXT: pand %xmm1, %xmm0 11320; SSE3-NEXT: paddb %xmm2, %xmm0 11321; SSE3-NEXT: movdqa %xmm0, %xmm1 11322; SSE3-NEXT: psrlw $4, %xmm1 11323; SSE3-NEXT: paddb %xmm0, %xmm1 11324; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11325; SSE3-NEXT: pxor %xmm0, %xmm0 11326; SSE3-NEXT: movdqa %xmm1, %xmm2 11327; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11328; SSE3-NEXT: psadbw %xmm0, %xmm2 11329; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11330; SSE3-NEXT: psadbw %xmm0, %xmm1 11331; SSE3-NEXT: packuswb %xmm2, %xmm1 11332; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 11333; SSE3-NEXT: movdqa %xmm1, %xmm0 11334; SSE3-NEXT: retq 11335; 11336; SSSE3-LABEL: ugt_16_v4i32: 11337; SSSE3: # %bb.0: 11338; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11339; SSSE3-NEXT: movdqa %xmm0, %xmm3 11340; SSSE3-NEXT: pand %xmm2, %xmm3 11341; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11342; SSSE3-NEXT: movdqa %xmm1, %xmm4 11343; SSSE3-NEXT: pshufb %xmm3, %xmm4 11344; SSSE3-NEXT: psrlw $4, %xmm0 11345; SSSE3-NEXT: pand %xmm2, %xmm0 11346; SSSE3-NEXT: pshufb %xmm0, %xmm1 11347; SSSE3-NEXT: paddb %xmm4, %xmm1 11348; SSSE3-NEXT: pxor %xmm0, %xmm0 11349; SSSE3-NEXT: movdqa %xmm1, %xmm2 11350; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11351; SSSE3-NEXT: psadbw %xmm0, %xmm2 11352; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11353; SSSE3-NEXT: psadbw %xmm0, %xmm1 11354; SSSE3-NEXT: packuswb %xmm2, %xmm1 11355; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 11356; SSSE3-NEXT: movdqa %xmm1, %xmm0 11357; SSSE3-NEXT: retq 11358; 11359; SSE41-LABEL: ugt_16_v4i32: 11360; SSE41: # %bb.0: 11361; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11362; SSE41-NEXT: movdqa %xmm0, %xmm2 11363; SSE41-NEXT: pand %xmm1, %xmm2 11364; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11365; SSE41-NEXT: movdqa %xmm3, %xmm4 11366; SSE41-NEXT: pshufb %xmm2, %xmm4 11367; SSE41-NEXT: psrlw $4, %xmm0 11368; SSE41-NEXT: pand %xmm1, %xmm0 11369; SSE41-NEXT: pshufb %xmm0, %xmm3 11370; SSE41-NEXT: paddb %xmm4, %xmm3 11371; SSE41-NEXT: pxor %xmm1, %xmm1 11372; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 11373; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 11374; SSE41-NEXT: psadbw %xmm1, %xmm3 11375; SSE41-NEXT: psadbw %xmm1, %xmm0 11376; SSE41-NEXT: packuswb %xmm3, %xmm0 11377; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 11378; SSE41-NEXT: retq 11379; 11380; AVX1-LABEL: ugt_16_v4i32: 11381; AVX1: # %bb.0: 11382; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11383; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 11384; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11385; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11386; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 11387; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 11388; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11389; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11390; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 11391; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11392; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11393; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11394; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11395; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11396; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 11397; AVX1-NEXT: retq 11398; 11399; AVX2-LABEL: ugt_16_v4i32: 11400; AVX2: # %bb.0: 11401; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11402; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 11403; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11404; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11405; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 11406; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 11407; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11408; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11409; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 11410; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11411; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11412; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11413; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11414; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11415; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] 11416; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11417; AVX2-NEXT: retq 11418; 11419; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i32: 11420; AVX512VPOPCNTDQ: # %bb.0: 11421; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11422; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 11423; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] 11424; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11425; AVX512VPOPCNTDQ-NEXT: vzeroupper 11426; AVX512VPOPCNTDQ-NEXT: retq 11427; 11428; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i32: 11429; AVX512VPOPCNTDQVL: # %bb.0: 11430; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 11431; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 11432; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11433; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11434; AVX512VPOPCNTDQVL-NEXT: retq 11435; 11436; BITALG_NOVLX-LABEL: ugt_16_v4i32: 11437; BITALG_NOVLX: # %bb.0: 11438; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11439; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 11440; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 11441; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11442; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11443; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11444; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11445; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11446; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] 11447; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11448; BITALG_NOVLX-NEXT: vzeroupper 11449; BITALG_NOVLX-NEXT: retq 11450; 11451; BITALG-LABEL: ugt_16_v4i32: 11452; BITALG: # %bb.0: 11453; BITALG-NEXT: vpopcntb %xmm0, %xmm0 11454; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 11455; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11456; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11457; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11458; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11459; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11460; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 11461; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11462; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11463; BITALG-NEXT: retq 11464 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 11465 %3 = icmp ugt <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16> 11466 %4 = sext <4 x i1> %3 to <4 x i32> 11467 ret <4 x i32> %4 11468} 11469 11470define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { 11471; SSE2-LABEL: ult_17_v4i32: 11472; SSE2: # %bb.0: 11473; SSE2-NEXT: movdqa %xmm0, %xmm1 11474; SSE2-NEXT: psrlw $1, %xmm1 11475; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11476; SSE2-NEXT: psubb %xmm1, %xmm0 11477; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11478; SSE2-NEXT: movdqa %xmm0, %xmm2 11479; SSE2-NEXT: pand %xmm1, %xmm2 11480; SSE2-NEXT: psrlw $2, %xmm0 11481; SSE2-NEXT: pand %xmm1, %xmm0 11482; SSE2-NEXT: paddb %xmm2, %xmm0 11483; SSE2-NEXT: movdqa %xmm0, %xmm1 11484; SSE2-NEXT: psrlw $4, %xmm1 11485; SSE2-NEXT: paddb %xmm0, %xmm1 11486; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11487; SSE2-NEXT: pxor %xmm0, %xmm0 11488; SSE2-NEXT: movdqa %xmm1, %xmm2 11489; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11490; SSE2-NEXT: psadbw %xmm0, %xmm2 11491; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11492; SSE2-NEXT: psadbw %xmm0, %xmm1 11493; SSE2-NEXT: packuswb %xmm2, %xmm1 11494; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] 11495; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 11496; SSE2-NEXT: retq 11497; 11498; SSE3-LABEL: ult_17_v4i32: 11499; SSE3: # %bb.0: 11500; SSE3-NEXT: movdqa %xmm0, %xmm1 11501; SSE3-NEXT: psrlw $1, %xmm1 11502; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11503; SSE3-NEXT: psubb %xmm1, %xmm0 11504; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11505; SSE3-NEXT: movdqa %xmm0, %xmm2 11506; SSE3-NEXT: pand %xmm1, %xmm2 11507; SSE3-NEXT: psrlw $2, %xmm0 11508; SSE3-NEXT: pand %xmm1, %xmm0 11509; SSE3-NEXT: paddb %xmm2, %xmm0 11510; SSE3-NEXT: movdqa %xmm0, %xmm1 11511; SSE3-NEXT: psrlw $4, %xmm1 11512; SSE3-NEXT: paddb %xmm0, %xmm1 11513; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11514; SSE3-NEXT: pxor %xmm0, %xmm0 11515; SSE3-NEXT: movdqa %xmm1, %xmm2 11516; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11517; SSE3-NEXT: psadbw %xmm0, %xmm2 11518; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11519; SSE3-NEXT: psadbw %xmm0, %xmm1 11520; SSE3-NEXT: packuswb %xmm2, %xmm1 11521; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] 11522; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 11523; SSE3-NEXT: retq 11524; 11525; SSSE3-LABEL: ult_17_v4i32: 11526; SSSE3: # %bb.0: 11527; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11528; SSSE3-NEXT: movdqa %xmm0, %xmm2 11529; SSSE3-NEXT: pand %xmm1, %xmm2 11530; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11531; SSSE3-NEXT: movdqa %xmm3, %xmm4 11532; SSSE3-NEXT: pshufb %xmm2, %xmm4 11533; SSSE3-NEXT: psrlw $4, %xmm0 11534; SSSE3-NEXT: pand %xmm1, %xmm0 11535; SSSE3-NEXT: pshufb %xmm0, %xmm3 11536; SSSE3-NEXT: paddb %xmm4, %xmm3 11537; SSSE3-NEXT: pxor %xmm0, %xmm0 11538; SSSE3-NEXT: movdqa %xmm3, %xmm1 11539; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 11540; SSSE3-NEXT: psadbw %xmm0, %xmm1 11541; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 11542; SSSE3-NEXT: psadbw %xmm0, %xmm3 11543; SSSE3-NEXT: packuswb %xmm1, %xmm3 11544; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] 11545; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 11546; SSSE3-NEXT: retq 11547; 11548; SSE41-LABEL: ult_17_v4i32: 11549; SSE41: # %bb.0: 11550; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11551; SSE41-NEXT: movdqa %xmm0, %xmm2 11552; SSE41-NEXT: pand %xmm1, %xmm2 11553; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11554; SSE41-NEXT: movdqa %xmm3, %xmm4 11555; SSE41-NEXT: pshufb %xmm2, %xmm4 11556; SSE41-NEXT: psrlw $4, %xmm0 11557; SSE41-NEXT: pand %xmm1, %xmm0 11558; SSE41-NEXT: pshufb %xmm0, %xmm3 11559; SSE41-NEXT: paddb %xmm4, %xmm3 11560; SSE41-NEXT: pxor %xmm0, %xmm0 11561; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 11562; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 11563; SSE41-NEXT: psadbw %xmm0, %xmm3 11564; SSE41-NEXT: psadbw %xmm0, %xmm1 11565; SSE41-NEXT: packuswb %xmm3, %xmm1 11566; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] 11567; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 11568; SSE41-NEXT: retq 11569; 11570; AVX1-LABEL: ult_17_v4i32: 11571; AVX1: # %bb.0: 11572; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11573; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 11574; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11575; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11576; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 11577; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 11578; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11579; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11580; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 11581; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11582; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11583; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11584; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11585; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11586; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17] 11587; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11588; AVX1-NEXT: retq 11589; 11590; AVX2-LABEL: ult_17_v4i32: 11591; AVX2: # %bb.0: 11592; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11593; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 11594; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11595; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11596; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 11597; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 11598; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11599; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11600; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 11601; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11602; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11603; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11604; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11605; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11606; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] 11607; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11608; AVX2-NEXT: retq 11609; 11610; AVX512VPOPCNTDQ-LABEL: ult_17_v4i32: 11611; AVX512VPOPCNTDQ: # %bb.0: 11612; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11613; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 11614; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] 11615; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11616; AVX512VPOPCNTDQ-NEXT: vzeroupper 11617; AVX512VPOPCNTDQ-NEXT: retq 11618; 11619; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i32: 11620; AVX512VPOPCNTDQVL: # %bb.0: 11621; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 11622; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 11623; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11624; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11625; AVX512VPOPCNTDQVL-NEXT: retq 11626; 11627; BITALG_NOVLX-LABEL: ult_17_v4i32: 11628; BITALG_NOVLX: # %bb.0: 11629; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11630; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 11631; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 11632; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11633; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11634; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11635; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11636; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11637; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] 11638; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11639; BITALG_NOVLX-NEXT: vzeroupper 11640; BITALG_NOVLX-NEXT: retq 11641; 11642; BITALG-LABEL: ult_17_v4i32: 11643; BITALG: # %bb.0: 11644; BITALG-NEXT: vpopcntb %xmm0, %xmm0 11645; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 11646; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11647; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11648; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11649; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11650; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11651; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 11652; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11653; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11654; BITALG-NEXT: retq 11655 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 11656 %3 = icmp ult <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17> 11657 %4 = sext <4 x i1> %3 to <4 x i32> 11658 ret <4 x i32> %4 11659} 11660 11661define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { 11662; SSE2-LABEL: ugt_17_v4i32: 11663; SSE2: # %bb.0: 11664; SSE2-NEXT: movdqa %xmm0, %xmm1 11665; SSE2-NEXT: psrlw $1, %xmm1 11666; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11667; SSE2-NEXT: psubb %xmm1, %xmm0 11668; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11669; SSE2-NEXT: movdqa %xmm0, %xmm2 11670; SSE2-NEXT: pand %xmm1, %xmm2 11671; SSE2-NEXT: psrlw $2, %xmm0 11672; SSE2-NEXT: pand %xmm1, %xmm0 11673; SSE2-NEXT: paddb %xmm2, %xmm0 11674; SSE2-NEXT: movdqa %xmm0, %xmm1 11675; SSE2-NEXT: psrlw $4, %xmm1 11676; SSE2-NEXT: paddb %xmm0, %xmm1 11677; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11678; SSE2-NEXT: pxor %xmm0, %xmm0 11679; SSE2-NEXT: movdqa %xmm1, %xmm2 11680; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11681; SSE2-NEXT: psadbw %xmm0, %xmm2 11682; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11683; SSE2-NEXT: psadbw %xmm0, %xmm1 11684; SSE2-NEXT: packuswb %xmm2, %xmm1 11685; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 11686; SSE2-NEXT: movdqa %xmm1, %xmm0 11687; SSE2-NEXT: retq 11688; 11689; SSE3-LABEL: ugt_17_v4i32: 11690; SSE3: # %bb.0: 11691; SSE3-NEXT: movdqa %xmm0, %xmm1 11692; SSE3-NEXT: psrlw $1, %xmm1 11693; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11694; SSE3-NEXT: psubb %xmm1, %xmm0 11695; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11696; SSE3-NEXT: movdqa %xmm0, %xmm2 11697; SSE3-NEXT: pand %xmm1, %xmm2 11698; SSE3-NEXT: psrlw $2, %xmm0 11699; SSE3-NEXT: pand %xmm1, %xmm0 11700; SSE3-NEXT: paddb %xmm2, %xmm0 11701; SSE3-NEXT: movdqa %xmm0, %xmm1 11702; SSE3-NEXT: psrlw $4, %xmm1 11703; SSE3-NEXT: paddb %xmm0, %xmm1 11704; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11705; SSE3-NEXT: pxor %xmm0, %xmm0 11706; SSE3-NEXT: movdqa %xmm1, %xmm2 11707; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11708; SSE3-NEXT: psadbw %xmm0, %xmm2 11709; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11710; SSE3-NEXT: psadbw %xmm0, %xmm1 11711; SSE3-NEXT: packuswb %xmm2, %xmm1 11712; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 11713; SSE3-NEXT: movdqa %xmm1, %xmm0 11714; SSE3-NEXT: retq 11715; 11716; SSSE3-LABEL: ugt_17_v4i32: 11717; SSSE3: # %bb.0: 11718; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11719; SSSE3-NEXT: movdqa %xmm0, %xmm3 11720; SSSE3-NEXT: pand %xmm2, %xmm3 11721; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11722; SSSE3-NEXT: movdqa %xmm1, %xmm4 11723; SSSE3-NEXT: pshufb %xmm3, %xmm4 11724; SSSE3-NEXT: psrlw $4, %xmm0 11725; SSSE3-NEXT: pand %xmm2, %xmm0 11726; SSSE3-NEXT: pshufb %xmm0, %xmm1 11727; SSSE3-NEXT: paddb %xmm4, %xmm1 11728; SSSE3-NEXT: pxor %xmm0, %xmm0 11729; SSSE3-NEXT: movdqa %xmm1, %xmm2 11730; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11731; SSSE3-NEXT: psadbw %xmm0, %xmm2 11732; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11733; SSSE3-NEXT: psadbw %xmm0, %xmm1 11734; SSSE3-NEXT: packuswb %xmm2, %xmm1 11735; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 11736; SSSE3-NEXT: movdqa %xmm1, %xmm0 11737; SSSE3-NEXT: retq 11738; 11739; SSE41-LABEL: ugt_17_v4i32: 11740; SSE41: # %bb.0: 11741; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11742; SSE41-NEXT: movdqa %xmm0, %xmm2 11743; SSE41-NEXT: pand %xmm1, %xmm2 11744; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11745; SSE41-NEXT: movdqa %xmm3, %xmm4 11746; SSE41-NEXT: pshufb %xmm2, %xmm4 11747; SSE41-NEXT: psrlw $4, %xmm0 11748; SSE41-NEXT: pand %xmm1, %xmm0 11749; SSE41-NEXT: pshufb %xmm0, %xmm3 11750; SSE41-NEXT: paddb %xmm4, %xmm3 11751; SSE41-NEXT: pxor %xmm1, %xmm1 11752; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 11753; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 11754; SSE41-NEXT: psadbw %xmm1, %xmm3 11755; SSE41-NEXT: psadbw %xmm1, %xmm0 11756; SSE41-NEXT: packuswb %xmm3, %xmm0 11757; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 11758; SSE41-NEXT: retq 11759; 11760; AVX1-LABEL: ugt_17_v4i32: 11761; AVX1: # %bb.0: 11762; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11763; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 11764; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11765; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11766; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 11767; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 11768; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11769; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11770; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 11771; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11772; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11773; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11774; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11775; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11776; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 11777; AVX1-NEXT: retq 11778; 11779; AVX2-LABEL: ugt_17_v4i32: 11780; AVX2: # %bb.0: 11781; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11782; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 11783; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11784; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11785; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 11786; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 11787; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11788; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11789; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 11790; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11791; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11792; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11793; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11794; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11795; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] 11796; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11797; AVX2-NEXT: retq 11798; 11799; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i32: 11800; AVX512VPOPCNTDQ: # %bb.0: 11801; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11802; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 11803; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] 11804; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11805; AVX512VPOPCNTDQ-NEXT: vzeroupper 11806; AVX512VPOPCNTDQ-NEXT: retq 11807; 11808; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i32: 11809; AVX512VPOPCNTDQVL: # %bb.0: 11810; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 11811; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 11812; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11813; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11814; AVX512VPOPCNTDQVL-NEXT: retq 11815; 11816; BITALG_NOVLX-LABEL: ugt_17_v4i32: 11817; BITALG_NOVLX: # %bb.0: 11818; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11819; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 11820; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 11821; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11822; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11823; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11824; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11825; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11826; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] 11827; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 11828; BITALG_NOVLX-NEXT: vzeroupper 11829; BITALG_NOVLX-NEXT: retq 11830; 11831; BITALG-LABEL: ugt_17_v4i32: 11832; BITALG: # %bb.0: 11833; BITALG-NEXT: vpopcntb %xmm0, %xmm0 11834; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 11835; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11836; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11837; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11838; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11839; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11840; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 11841; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 11842; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 11843; BITALG-NEXT: retq 11844 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 11845 %3 = icmp ugt <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17> 11846 %4 = sext <4 x i1> %3 to <4 x i32> 11847 ret <4 x i32> %4 11848} 11849 11850define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { 11851; SSE2-LABEL: ult_18_v4i32: 11852; SSE2: # %bb.0: 11853; SSE2-NEXT: movdqa %xmm0, %xmm1 11854; SSE2-NEXT: psrlw $1, %xmm1 11855; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11856; SSE2-NEXT: psubb %xmm1, %xmm0 11857; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11858; SSE2-NEXT: movdqa %xmm0, %xmm2 11859; SSE2-NEXT: pand %xmm1, %xmm2 11860; SSE2-NEXT: psrlw $2, %xmm0 11861; SSE2-NEXT: pand %xmm1, %xmm0 11862; SSE2-NEXT: paddb %xmm2, %xmm0 11863; SSE2-NEXT: movdqa %xmm0, %xmm1 11864; SSE2-NEXT: psrlw $4, %xmm1 11865; SSE2-NEXT: paddb %xmm0, %xmm1 11866; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 11867; SSE2-NEXT: pxor %xmm0, %xmm0 11868; SSE2-NEXT: movdqa %xmm1, %xmm2 11869; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11870; SSE2-NEXT: psadbw %xmm0, %xmm2 11871; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11872; SSE2-NEXT: psadbw %xmm0, %xmm1 11873; SSE2-NEXT: packuswb %xmm2, %xmm1 11874; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] 11875; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 11876; SSE2-NEXT: retq 11877; 11878; SSE3-LABEL: ult_18_v4i32: 11879; SSE3: # %bb.0: 11880; SSE3-NEXT: movdqa %xmm0, %xmm1 11881; SSE3-NEXT: psrlw $1, %xmm1 11882; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11883; SSE3-NEXT: psubb %xmm1, %xmm0 11884; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 11885; SSE3-NEXT: movdqa %xmm0, %xmm2 11886; SSE3-NEXT: pand %xmm1, %xmm2 11887; SSE3-NEXT: psrlw $2, %xmm0 11888; SSE3-NEXT: pand %xmm1, %xmm0 11889; SSE3-NEXT: paddb %xmm2, %xmm0 11890; SSE3-NEXT: movdqa %xmm0, %xmm1 11891; SSE3-NEXT: psrlw $4, %xmm1 11892; SSE3-NEXT: paddb %xmm0, %xmm1 11893; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 11894; SSE3-NEXT: pxor %xmm0, %xmm0 11895; SSE3-NEXT: movdqa %xmm1, %xmm2 11896; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 11897; SSE3-NEXT: psadbw %xmm0, %xmm2 11898; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 11899; SSE3-NEXT: psadbw %xmm0, %xmm1 11900; SSE3-NEXT: packuswb %xmm2, %xmm1 11901; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] 11902; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 11903; SSE3-NEXT: retq 11904; 11905; SSSE3-LABEL: ult_18_v4i32: 11906; SSSE3: # %bb.0: 11907; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11908; SSSE3-NEXT: movdqa %xmm0, %xmm2 11909; SSSE3-NEXT: pand %xmm1, %xmm2 11910; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11911; SSSE3-NEXT: movdqa %xmm3, %xmm4 11912; SSSE3-NEXT: pshufb %xmm2, %xmm4 11913; SSSE3-NEXT: psrlw $4, %xmm0 11914; SSSE3-NEXT: pand %xmm1, %xmm0 11915; SSSE3-NEXT: pshufb %xmm0, %xmm3 11916; SSSE3-NEXT: paddb %xmm4, %xmm3 11917; SSSE3-NEXT: pxor %xmm0, %xmm0 11918; SSSE3-NEXT: movdqa %xmm3, %xmm1 11919; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 11920; SSSE3-NEXT: psadbw %xmm0, %xmm1 11921; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 11922; SSSE3-NEXT: psadbw %xmm0, %xmm3 11923; SSSE3-NEXT: packuswb %xmm1, %xmm3 11924; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] 11925; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 11926; SSSE3-NEXT: retq 11927; 11928; SSE41-LABEL: ult_18_v4i32: 11929; SSE41: # %bb.0: 11930; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11931; SSE41-NEXT: movdqa %xmm0, %xmm2 11932; SSE41-NEXT: pand %xmm1, %xmm2 11933; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11934; SSE41-NEXT: movdqa %xmm3, %xmm4 11935; SSE41-NEXT: pshufb %xmm2, %xmm4 11936; SSE41-NEXT: psrlw $4, %xmm0 11937; SSE41-NEXT: pand %xmm1, %xmm0 11938; SSE41-NEXT: pshufb %xmm0, %xmm3 11939; SSE41-NEXT: paddb %xmm4, %xmm3 11940; SSE41-NEXT: pxor %xmm0, %xmm0 11941; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 11942; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 11943; SSE41-NEXT: psadbw %xmm0, %xmm3 11944; SSE41-NEXT: psadbw %xmm0, %xmm1 11945; SSE41-NEXT: packuswb %xmm3, %xmm1 11946; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] 11947; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 11948; SSE41-NEXT: retq 11949; 11950; AVX1-LABEL: ult_18_v4i32: 11951; AVX1: # %bb.0: 11952; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11953; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 11954; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11955; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11956; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 11957; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 11958; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11959; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11960; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 11961; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11962; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11963; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11964; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11965; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11966; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18] 11967; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11968; AVX1-NEXT: retq 11969; 11970; AVX2-LABEL: ult_18_v4i32: 11971; AVX2: # %bb.0: 11972; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 11973; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 11974; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 11975; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 11976; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 11977; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 11978; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 11979; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 11980; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 11981; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 11982; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 11983; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 11984; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 11985; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 11986; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] 11987; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11988; AVX2-NEXT: retq 11989; 11990; AVX512VPOPCNTDQ-LABEL: ult_18_v4i32: 11991; AVX512VPOPCNTDQ: # %bb.0: 11992; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11993; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 11994; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] 11995; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 11996; AVX512VPOPCNTDQ-NEXT: vzeroupper 11997; AVX512VPOPCNTDQ-NEXT: retq 11998; 11999; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i32: 12000; AVX512VPOPCNTDQVL: # %bb.0: 12001; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 12002; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 12003; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12004; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12005; AVX512VPOPCNTDQVL-NEXT: retq 12006; 12007; BITALG_NOVLX-LABEL: ult_18_v4i32: 12008; BITALG_NOVLX: # %bb.0: 12009; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12010; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 12011; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 12012; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12013; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12014; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12015; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12016; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12017; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] 12018; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12019; BITALG_NOVLX-NEXT: vzeroupper 12020; BITALG_NOVLX-NEXT: retq 12021; 12022; BITALG-LABEL: ult_18_v4i32: 12023; BITALG: # %bb.0: 12024; BITALG-NEXT: vpopcntb %xmm0, %xmm0 12025; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 12026; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12027; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12028; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12029; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12030; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12031; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 12032; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12033; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12034; BITALG-NEXT: retq 12035 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 12036 %3 = icmp ult <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18> 12037 %4 = sext <4 x i1> %3 to <4 x i32> 12038 ret <4 x i32> %4 12039} 12040 12041define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { 12042; SSE2-LABEL: ugt_18_v4i32: 12043; SSE2: # %bb.0: 12044; SSE2-NEXT: movdqa %xmm0, %xmm1 12045; SSE2-NEXT: psrlw $1, %xmm1 12046; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12047; SSE2-NEXT: psubb %xmm1, %xmm0 12048; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12049; SSE2-NEXT: movdqa %xmm0, %xmm2 12050; SSE2-NEXT: pand %xmm1, %xmm2 12051; SSE2-NEXT: psrlw $2, %xmm0 12052; SSE2-NEXT: pand %xmm1, %xmm0 12053; SSE2-NEXT: paddb %xmm2, %xmm0 12054; SSE2-NEXT: movdqa %xmm0, %xmm1 12055; SSE2-NEXT: psrlw $4, %xmm1 12056; SSE2-NEXT: paddb %xmm0, %xmm1 12057; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12058; SSE2-NEXT: pxor %xmm0, %xmm0 12059; SSE2-NEXT: movdqa %xmm1, %xmm2 12060; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12061; SSE2-NEXT: psadbw %xmm0, %xmm2 12062; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12063; SSE2-NEXT: psadbw %xmm0, %xmm1 12064; SSE2-NEXT: packuswb %xmm2, %xmm1 12065; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12066; SSE2-NEXT: movdqa %xmm1, %xmm0 12067; SSE2-NEXT: retq 12068; 12069; SSE3-LABEL: ugt_18_v4i32: 12070; SSE3: # %bb.0: 12071; SSE3-NEXT: movdqa %xmm0, %xmm1 12072; SSE3-NEXT: psrlw $1, %xmm1 12073; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12074; SSE3-NEXT: psubb %xmm1, %xmm0 12075; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12076; SSE3-NEXT: movdqa %xmm0, %xmm2 12077; SSE3-NEXT: pand %xmm1, %xmm2 12078; SSE3-NEXT: psrlw $2, %xmm0 12079; SSE3-NEXT: pand %xmm1, %xmm0 12080; SSE3-NEXT: paddb %xmm2, %xmm0 12081; SSE3-NEXT: movdqa %xmm0, %xmm1 12082; SSE3-NEXT: psrlw $4, %xmm1 12083; SSE3-NEXT: paddb %xmm0, %xmm1 12084; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12085; SSE3-NEXT: pxor %xmm0, %xmm0 12086; SSE3-NEXT: movdqa %xmm1, %xmm2 12087; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12088; SSE3-NEXT: psadbw %xmm0, %xmm2 12089; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12090; SSE3-NEXT: psadbw %xmm0, %xmm1 12091; SSE3-NEXT: packuswb %xmm2, %xmm1 12092; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12093; SSE3-NEXT: movdqa %xmm1, %xmm0 12094; SSE3-NEXT: retq 12095; 12096; SSSE3-LABEL: ugt_18_v4i32: 12097; SSSE3: # %bb.0: 12098; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12099; SSSE3-NEXT: movdqa %xmm0, %xmm3 12100; SSSE3-NEXT: pand %xmm2, %xmm3 12101; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12102; SSSE3-NEXT: movdqa %xmm1, %xmm4 12103; SSSE3-NEXT: pshufb %xmm3, %xmm4 12104; SSSE3-NEXT: psrlw $4, %xmm0 12105; SSSE3-NEXT: pand %xmm2, %xmm0 12106; SSSE3-NEXT: pshufb %xmm0, %xmm1 12107; SSSE3-NEXT: paddb %xmm4, %xmm1 12108; SSSE3-NEXT: pxor %xmm0, %xmm0 12109; SSSE3-NEXT: movdqa %xmm1, %xmm2 12110; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12111; SSSE3-NEXT: psadbw %xmm0, %xmm2 12112; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12113; SSSE3-NEXT: psadbw %xmm0, %xmm1 12114; SSSE3-NEXT: packuswb %xmm2, %xmm1 12115; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12116; SSSE3-NEXT: movdqa %xmm1, %xmm0 12117; SSSE3-NEXT: retq 12118; 12119; SSE41-LABEL: ugt_18_v4i32: 12120; SSE41: # %bb.0: 12121; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12122; SSE41-NEXT: movdqa %xmm0, %xmm2 12123; SSE41-NEXT: pand %xmm1, %xmm2 12124; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12125; SSE41-NEXT: movdqa %xmm3, %xmm4 12126; SSE41-NEXT: pshufb %xmm2, %xmm4 12127; SSE41-NEXT: psrlw $4, %xmm0 12128; SSE41-NEXT: pand %xmm1, %xmm0 12129; SSE41-NEXT: pshufb %xmm0, %xmm3 12130; SSE41-NEXT: paddb %xmm4, %xmm3 12131; SSE41-NEXT: pxor %xmm1, %xmm1 12132; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 12133; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 12134; SSE41-NEXT: psadbw %xmm1, %xmm3 12135; SSE41-NEXT: psadbw %xmm1, %xmm0 12136; SSE41-NEXT: packuswb %xmm3, %xmm0 12137; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 12138; SSE41-NEXT: retq 12139; 12140; AVX1-LABEL: ugt_18_v4i32: 12141; AVX1: # %bb.0: 12142; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12143; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 12144; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12145; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12146; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 12147; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 12148; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12149; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12150; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 12151; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12152; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12153; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12154; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12155; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12156; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 12157; AVX1-NEXT: retq 12158; 12159; AVX2-LABEL: ugt_18_v4i32: 12160; AVX2: # %bb.0: 12161; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12162; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 12163; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12164; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12165; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 12166; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 12167; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12168; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12169; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 12170; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12171; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12172; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12173; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12174; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12175; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] 12176; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12177; AVX2-NEXT: retq 12178; 12179; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i32: 12180; AVX512VPOPCNTDQ: # %bb.0: 12181; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12182; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 12183; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] 12184; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12185; AVX512VPOPCNTDQ-NEXT: vzeroupper 12186; AVX512VPOPCNTDQ-NEXT: retq 12187; 12188; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i32: 12189; AVX512VPOPCNTDQVL: # %bb.0: 12190; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 12191; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 12192; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12193; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12194; AVX512VPOPCNTDQVL-NEXT: retq 12195; 12196; BITALG_NOVLX-LABEL: ugt_18_v4i32: 12197; BITALG_NOVLX: # %bb.0: 12198; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12199; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 12200; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 12201; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12202; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12203; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12204; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12205; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12206; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] 12207; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12208; BITALG_NOVLX-NEXT: vzeroupper 12209; BITALG_NOVLX-NEXT: retq 12210; 12211; BITALG-LABEL: ugt_18_v4i32: 12212; BITALG: # %bb.0: 12213; BITALG-NEXT: vpopcntb %xmm0, %xmm0 12214; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 12215; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12216; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12217; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12218; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12219; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12220; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 12221; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12222; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12223; BITALG-NEXT: retq 12224 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 12225 %3 = icmp ugt <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18> 12226 %4 = sext <4 x i1> %3 to <4 x i32> 12227 ret <4 x i32> %4 12228} 12229 12230define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { 12231; SSE2-LABEL: ult_19_v4i32: 12232; SSE2: # %bb.0: 12233; SSE2-NEXT: movdqa %xmm0, %xmm1 12234; SSE2-NEXT: psrlw $1, %xmm1 12235; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12236; SSE2-NEXT: psubb %xmm1, %xmm0 12237; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12238; SSE2-NEXT: movdqa %xmm0, %xmm2 12239; SSE2-NEXT: pand %xmm1, %xmm2 12240; SSE2-NEXT: psrlw $2, %xmm0 12241; SSE2-NEXT: pand %xmm1, %xmm0 12242; SSE2-NEXT: paddb %xmm2, %xmm0 12243; SSE2-NEXT: movdqa %xmm0, %xmm1 12244; SSE2-NEXT: psrlw $4, %xmm1 12245; SSE2-NEXT: paddb %xmm0, %xmm1 12246; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12247; SSE2-NEXT: pxor %xmm0, %xmm0 12248; SSE2-NEXT: movdqa %xmm1, %xmm2 12249; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12250; SSE2-NEXT: psadbw %xmm0, %xmm2 12251; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12252; SSE2-NEXT: psadbw %xmm0, %xmm1 12253; SSE2-NEXT: packuswb %xmm2, %xmm1 12254; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] 12255; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 12256; SSE2-NEXT: retq 12257; 12258; SSE3-LABEL: ult_19_v4i32: 12259; SSE3: # %bb.0: 12260; SSE3-NEXT: movdqa %xmm0, %xmm1 12261; SSE3-NEXT: psrlw $1, %xmm1 12262; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12263; SSE3-NEXT: psubb %xmm1, %xmm0 12264; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12265; SSE3-NEXT: movdqa %xmm0, %xmm2 12266; SSE3-NEXT: pand %xmm1, %xmm2 12267; SSE3-NEXT: psrlw $2, %xmm0 12268; SSE3-NEXT: pand %xmm1, %xmm0 12269; SSE3-NEXT: paddb %xmm2, %xmm0 12270; SSE3-NEXT: movdqa %xmm0, %xmm1 12271; SSE3-NEXT: psrlw $4, %xmm1 12272; SSE3-NEXT: paddb %xmm0, %xmm1 12273; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12274; SSE3-NEXT: pxor %xmm0, %xmm0 12275; SSE3-NEXT: movdqa %xmm1, %xmm2 12276; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12277; SSE3-NEXT: psadbw %xmm0, %xmm2 12278; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12279; SSE3-NEXT: psadbw %xmm0, %xmm1 12280; SSE3-NEXT: packuswb %xmm2, %xmm1 12281; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] 12282; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 12283; SSE3-NEXT: retq 12284; 12285; SSSE3-LABEL: ult_19_v4i32: 12286; SSSE3: # %bb.0: 12287; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12288; SSSE3-NEXT: movdqa %xmm0, %xmm2 12289; SSSE3-NEXT: pand %xmm1, %xmm2 12290; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12291; SSSE3-NEXT: movdqa %xmm3, %xmm4 12292; SSSE3-NEXT: pshufb %xmm2, %xmm4 12293; SSSE3-NEXT: psrlw $4, %xmm0 12294; SSSE3-NEXT: pand %xmm1, %xmm0 12295; SSSE3-NEXT: pshufb %xmm0, %xmm3 12296; SSSE3-NEXT: paddb %xmm4, %xmm3 12297; SSSE3-NEXT: pxor %xmm0, %xmm0 12298; SSSE3-NEXT: movdqa %xmm3, %xmm1 12299; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 12300; SSSE3-NEXT: psadbw %xmm0, %xmm1 12301; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 12302; SSSE3-NEXT: psadbw %xmm0, %xmm3 12303; SSSE3-NEXT: packuswb %xmm1, %xmm3 12304; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] 12305; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 12306; SSSE3-NEXT: retq 12307; 12308; SSE41-LABEL: ult_19_v4i32: 12309; SSE41: # %bb.0: 12310; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12311; SSE41-NEXT: movdqa %xmm0, %xmm2 12312; SSE41-NEXT: pand %xmm1, %xmm2 12313; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12314; SSE41-NEXT: movdqa %xmm3, %xmm4 12315; SSE41-NEXT: pshufb %xmm2, %xmm4 12316; SSE41-NEXT: psrlw $4, %xmm0 12317; SSE41-NEXT: pand %xmm1, %xmm0 12318; SSE41-NEXT: pshufb %xmm0, %xmm3 12319; SSE41-NEXT: paddb %xmm4, %xmm3 12320; SSE41-NEXT: pxor %xmm0, %xmm0 12321; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 12322; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 12323; SSE41-NEXT: psadbw %xmm0, %xmm3 12324; SSE41-NEXT: psadbw %xmm0, %xmm1 12325; SSE41-NEXT: packuswb %xmm3, %xmm1 12326; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] 12327; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 12328; SSE41-NEXT: retq 12329; 12330; AVX1-LABEL: ult_19_v4i32: 12331; AVX1: # %bb.0: 12332; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12333; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 12334; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12335; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12336; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 12337; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 12338; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12339; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12340; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 12341; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12342; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12343; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12344; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12345; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12346; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19] 12347; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12348; AVX1-NEXT: retq 12349; 12350; AVX2-LABEL: ult_19_v4i32: 12351; AVX2: # %bb.0: 12352; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12353; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 12354; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12355; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12356; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 12357; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 12358; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12359; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12360; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 12361; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12362; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12363; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12364; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12365; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12366; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] 12367; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12368; AVX2-NEXT: retq 12369; 12370; AVX512VPOPCNTDQ-LABEL: ult_19_v4i32: 12371; AVX512VPOPCNTDQ: # %bb.0: 12372; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12373; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 12374; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] 12375; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12376; AVX512VPOPCNTDQ-NEXT: vzeroupper 12377; AVX512VPOPCNTDQ-NEXT: retq 12378; 12379; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i32: 12380; AVX512VPOPCNTDQVL: # %bb.0: 12381; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 12382; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 12383; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12384; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12385; AVX512VPOPCNTDQVL-NEXT: retq 12386; 12387; BITALG_NOVLX-LABEL: ult_19_v4i32: 12388; BITALG_NOVLX: # %bb.0: 12389; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12390; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 12391; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 12392; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12393; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12394; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12395; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12396; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12397; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] 12398; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12399; BITALG_NOVLX-NEXT: vzeroupper 12400; BITALG_NOVLX-NEXT: retq 12401; 12402; BITALG-LABEL: ult_19_v4i32: 12403; BITALG: # %bb.0: 12404; BITALG-NEXT: vpopcntb %xmm0, %xmm0 12405; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 12406; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12407; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12408; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12409; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12410; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12411; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 12412; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12413; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12414; BITALG-NEXT: retq 12415 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 12416 %3 = icmp ult <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19> 12417 %4 = sext <4 x i1> %3 to <4 x i32> 12418 ret <4 x i32> %4 12419} 12420 12421define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { 12422; SSE2-LABEL: ugt_19_v4i32: 12423; SSE2: # %bb.0: 12424; SSE2-NEXT: movdqa %xmm0, %xmm1 12425; SSE2-NEXT: psrlw $1, %xmm1 12426; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12427; SSE2-NEXT: psubb %xmm1, %xmm0 12428; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12429; SSE2-NEXT: movdqa %xmm0, %xmm2 12430; SSE2-NEXT: pand %xmm1, %xmm2 12431; SSE2-NEXT: psrlw $2, %xmm0 12432; SSE2-NEXT: pand %xmm1, %xmm0 12433; SSE2-NEXT: paddb %xmm2, %xmm0 12434; SSE2-NEXT: movdqa %xmm0, %xmm1 12435; SSE2-NEXT: psrlw $4, %xmm1 12436; SSE2-NEXT: paddb %xmm0, %xmm1 12437; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12438; SSE2-NEXT: pxor %xmm0, %xmm0 12439; SSE2-NEXT: movdqa %xmm1, %xmm2 12440; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12441; SSE2-NEXT: psadbw %xmm0, %xmm2 12442; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12443; SSE2-NEXT: psadbw %xmm0, %xmm1 12444; SSE2-NEXT: packuswb %xmm2, %xmm1 12445; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12446; SSE2-NEXT: movdqa %xmm1, %xmm0 12447; SSE2-NEXT: retq 12448; 12449; SSE3-LABEL: ugt_19_v4i32: 12450; SSE3: # %bb.0: 12451; SSE3-NEXT: movdqa %xmm0, %xmm1 12452; SSE3-NEXT: psrlw $1, %xmm1 12453; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12454; SSE3-NEXT: psubb %xmm1, %xmm0 12455; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12456; SSE3-NEXT: movdqa %xmm0, %xmm2 12457; SSE3-NEXT: pand %xmm1, %xmm2 12458; SSE3-NEXT: psrlw $2, %xmm0 12459; SSE3-NEXT: pand %xmm1, %xmm0 12460; SSE3-NEXT: paddb %xmm2, %xmm0 12461; SSE3-NEXT: movdqa %xmm0, %xmm1 12462; SSE3-NEXT: psrlw $4, %xmm1 12463; SSE3-NEXT: paddb %xmm0, %xmm1 12464; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12465; SSE3-NEXT: pxor %xmm0, %xmm0 12466; SSE3-NEXT: movdqa %xmm1, %xmm2 12467; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12468; SSE3-NEXT: psadbw %xmm0, %xmm2 12469; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12470; SSE3-NEXT: psadbw %xmm0, %xmm1 12471; SSE3-NEXT: packuswb %xmm2, %xmm1 12472; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12473; SSE3-NEXT: movdqa %xmm1, %xmm0 12474; SSE3-NEXT: retq 12475; 12476; SSSE3-LABEL: ugt_19_v4i32: 12477; SSSE3: # %bb.0: 12478; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12479; SSSE3-NEXT: movdqa %xmm0, %xmm3 12480; SSSE3-NEXT: pand %xmm2, %xmm3 12481; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12482; SSSE3-NEXT: movdqa %xmm1, %xmm4 12483; SSSE3-NEXT: pshufb %xmm3, %xmm4 12484; SSSE3-NEXT: psrlw $4, %xmm0 12485; SSSE3-NEXT: pand %xmm2, %xmm0 12486; SSSE3-NEXT: pshufb %xmm0, %xmm1 12487; SSSE3-NEXT: paddb %xmm4, %xmm1 12488; SSSE3-NEXT: pxor %xmm0, %xmm0 12489; SSSE3-NEXT: movdqa %xmm1, %xmm2 12490; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12491; SSSE3-NEXT: psadbw %xmm0, %xmm2 12492; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12493; SSSE3-NEXT: psadbw %xmm0, %xmm1 12494; SSSE3-NEXT: packuswb %xmm2, %xmm1 12495; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12496; SSSE3-NEXT: movdqa %xmm1, %xmm0 12497; SSSE3-NEXT: retq 12498; 12499; SSE41-LABEL: ugt_19_v4i32: 12500; SSE41: # %bb.0: 12501; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12502; SSE41-NEXT: movdqa %xmm0, %xmm2 12503; SSE41-NEXT: pand %xmm1, %xmm2 12504; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12505; SSE41-NEXT: movdqa %xmm3, %xmm4 12506; SSE41-NEXT: pshufb %xmm2, %xmm4 12507; SSE41-NEXT: psrlw $4, %xmm0 12508; SSE41-NEXT: pand %xmm1, %xmm0 12509; SSE41-NEXT: pshufb %xmm0, %xmm3 12510; SSE41-NEXT: paddb %xmm4, %xmm3 12511; SSE41-NEXT: pxor %xmm1, %xmm1 12512; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 12513; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 12514; SSE41-NEXT: psadbw %xmm1, %xmm3 12515; SSE41-NEXT: psadbw %xmm1, %xmm0 12516; SSE41-NEXT: packuswb %xmm3, %xmm0 12517; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 12518; SSE41-NEXT: retq 12519; 12520; AVX1-LABEL: ugt_19_v4i32: 12521; AVX1: # %bb.0: 12522; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12523; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 12524; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12525; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12526; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 12527; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 12528; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12529; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12530; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 12531; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12532; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12533; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12534; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12535; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12536; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 12537; AVX1-NEXT: retq 12538; 12539; AVX2-LABEL: ugt_19_v4i32: 12540; AVX2: # %bb.0: 12541; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12542; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 12543; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12544; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12545; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 12546; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 12547; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12548; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12549; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 12550; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12551; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12552; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12553; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12554; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12555; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] 12556; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12557; AVX2-NEXT: retq 12558; 12559; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i32: 12560; AVX512VPOPCNTDQ: # %bb.0: 12561; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12562; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 12563; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] 12564; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12565; AVX512VPOPCNTDQ-NEXT: vzeroupper 12566; AVX512VPOPCNTDQ-NEXT: retq 12567; 12568; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i32: 12569; AVX512VPOPCNTDQVL: # %bb.0: 12570; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 12571; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 12572; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12573; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12574; AVX512VPOPCNTDQVL-NEXT: retq 12575; 12576; BITALG_NOVLX-LABEL: ugt_19_v4i32: 12577; BITALG_NOVLX: # %bb.0: 12578; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12579; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 12580; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 12581; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12582; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12583; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12584; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12585; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12586; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] 12587; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12588; BITALG_NOVLX-NEXT: vzeroupper 12589; BITALG_NOVLX-NEXT: retq 12590; 12591; BITALG-LABEL: ugt_19_v4i32: 12592; BITALG: # %bb.0: 12593; BITALG-NEXT: vpopcntb %xmm0, %xmm0 12594; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 12595; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12596; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12597; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12598; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12599; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12600; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 12601; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12602; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12603; BITALG-NEXT: retq 12604 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 12605 %3 = icmp ugt <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19> 12606 %4 = sext <4 x i1> %3 to <4 x i32> 12607 ret <4 x i32> %4 12608} 12609 12610define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { 12611; SSE2-LABEL: ult_20_v4i32: 12612; SSE2: # %bb.0: 12613; SSE2-NEXT: movdqa %xmm0, %xmm1 12614; SSE2-NEXT: psrlw $1, %xmm1 12615; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12616; SSE2-NEXT: psubb %xmm1, %xmm0 12617; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12618; SSE2-NEXT: movdqa %xmm0, %xmm2 12619; SSE2-NEXT: pand %xmm1, %xmm2 12620; SSE2-NEXT: psrlw $2, %xmm0 12621; SSE2-NEXT: pand %xmm1, %xmm0 12622; SSE2-NEXT: paddb %xmm2, %xmm0 12623; SSE2-NEXT: movdqa %xmm0, %xmm1 12624; SSE2-NEXT: psrlw $4, %xmm1 12625; SSE2-NEXT: paddb %xmm0, %xmm1 12626; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12627; SSE2-NEXT: pxor %xmm0, %xmm0 12628; SSE2-NEXT: movdqa %xmm1, %xmm2 12629; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12630; SSE2-NEXT: psadbw %xmm0, %xmm2 12631; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12632; SSE2-NEXT: psadbw %xmm0, %xmm1 12633; SSE2-NEXT: packuswb %xmm2, %xmm1 12634; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] 12635; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 12636; SSE2-NEXT: retq 12637; 12638; SSE3-LABEL: ult_20_v4i32: 12639; SSE3: # %bb.0: 12640; SSE3-NEXT: movdqa %xmm0, %xmm1 12641; SSE3-NEXT: psrlw $1, %xmm1 12642; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12643; SSE3-NEXT: psubb %xmm1, %xmm0 12644; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12645; SSE3-NEXT: movdqa %xmm0, %xmm2 12646; SSE3-NEXT: pand %xmm1, %xmm2 12647; SSE3-NEXT: psrlw $2, %xmm0 12648; SSE3-NEXT: pand %xmm1, %xmm0 12649; SSE3-NEXT: paddb %xmm2, %xmm0 12650; SSE3-NEXT: movdqa %xmm0, %xmm1 12651; SSE3-NEXT: psrlw $4, %xmm1 12652; SSE3-NEXT: paddb %xmm0, %xmm1 12653; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12654; SSE3-NEXT: pxor %xmm0, %xmm0 12655; SSE3-NEXT: movdqa %xmm1, %xmm2 12656; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12657; SSE3-NEXT: psadbw %xmm0, %xmm2 12658; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12659; SSE3-NEXT: psadbw %xmm0, %xmm1 12660; SSE3-NEXT: packuswb %xmm2, %xmm1 12661; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] 12662; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 12663; SSE3-NEXT: retq 12664; 12665; SSSE3-LABEL: ult_20_v4i32: 12666; SSSE3: # %bb.0: 12667; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12668; SSSE3-NEXT: movdqa %xmm0, %xmm2 12669; SSSE3-NEXT: pand %xmm1, %xmm2 12670; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12671; SSSE3-NEXT: movdqa %xmm3, %xmm4 12672; SSSE3-NEXT: pshufb %xmm2, %xmm4 12673; SSSE3-NEXT: psrlw $4, %xmm0 12674; SSSE3-NEXT: pand %xmm1, %xmm0 12675; SSSE3-NEXT: pshufb %xmm0, %xmm3 12676; SSSE3-NEXT: paddb %xmm4, %xmm3 12677; SSSE3-NEXT: pxor %xmm0, %xmm0 12678; SSSE3-NEXT: movdqa %xmm3, %xmm1 12679; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 12680; SSSE3-NEXT: psadbw %xmm0, %xmm1 12681; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 12682; SSSE3-NEXT: psadbw %xmm0, %xmm3 12683; SSSE3-NEXT: packuswb %xmm1, %xmm3 12684; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] 12685; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 12686; SSSE3-NEXT: retq 12687; 12688; SSE41-LABEL: ult_20_v4i32: 12689; SSE41: # %bb.0: 12690; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12691; SSE41-NEXT: movdqa %xmm0, %xmm2 12692; SSE41-NEXT: pand %xmm1, %xmm2 12693; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12694; SSE41-NEXT: movdqa %xmm3, %xmm4 12695; SSE41-NEXT: pshufb %xmm2, %xmm4 12696; SSE41-NEXT: psrlw $4, %xmm0 12697; SSE41-NEXT: pand %xmm1, %xmm0 12698; SSE41-NEXT: pshufb %xmm0, %xmm3 12699; SSE41-NEXT: paddb %xmm4, %xmm3 12700; SSE41-NEXT: pxor %xmm0, %xmm0 12701; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 12702; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 12703; SSE41-NEXT: psadbw %xmm0, %xmm3 12704; SSE41-NEXT: psadbw %xmm0, %xmm1 12705; SSE41-NEXT: packuswb %xmm3, %xmm1 12706; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] 12707; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 12708; SSE41-NEXT: retq 12709; 12710; AVX1-LABEL: ult_20_v4i32: 12711; AVX1: # %bb.0: 12712; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12713; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 12714; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12715; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12716; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 12717; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 12718; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12719; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12720; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 12721; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12722; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12723; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12724; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12725; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12726; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20] 12727; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12728; AVX1-NEXT: retq 12729; 12730; AVX2-LABEL: ult_20_v4i32: 12731; AVX2: # %bb.0: 12732; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12733; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 12734; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12735; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12736; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 12737; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 12738; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12739; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12740; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 12741; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12742; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12743; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12744; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12745; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12746; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 12747; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12748; AVX2-NEXT: retq 12749; 12750; AVX512VPOPCNTDQ-LABEL: ult_20_v4i32: 12751; AVX512VPOPCNTDQ: # %bb.0: 12752; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12753; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 12754; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 12755; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12756; AVX512VPOPCNTDQ-NEXT: vzeroupper 12757; AVX512VPOPCNTDQ-NEXT: retq 12758; 12759; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i32: 12760; AVX512VPOPCNTDQVL: # %bb.0: 12761; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 12762; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 12763; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12764; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12765; AVX512VPOPCNTDQVL-NEXT: retq 12766; 12767; BITALG_NOVLX-LABEL: ult_20_v4i32: 12768; BITALG_NOVLX: # %bb.0: 12769; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12770; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 12771; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 12772; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12773; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12774; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12775; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12776; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12777; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 12778; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 12779; BITALG_NOVLX-NEXT: vzeroupper 12780; BITALG_NOVLX-NEXT: retq 12781; 12782; BITALG-LABEL: ult_20_v4i32: 12783; BITALG: # %bb.0: 12784; BITALG-NEXT: vpopcntb %xmm0, %xmm0 12785; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 12786; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12787; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12788; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12789; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12790; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12791; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 12792; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12793; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12794; BITALG-NEXT: retq 12795 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 12796 %3 = icmp ult <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20> 12797 %4 = sext <4 x i1> %3 to <4 x i32> 12798 ret <4 x i32> %4 12799} 12800 12801define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { 12802; SSE2-LABEL: ugt_20_v4i32: 12803; SSE2: # %bb.0: 12804; SSE2-NEXT: movdqa %xmm0, %xmm1 12805; SSE2-NEXT: psrlw $1, %xmm1 12806; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12807; SSE2-NEXT: psubb %xmm1, %xmm0 12808; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12809; SSE2-NEXT: movdqa %xmm0, %xmm2 12810; SSE2-NEXT: pand %xmm1, %xmm2 12811; SSE2-NEXT: psrlw $2, %xmm0 12812; SSE2-NEXT: pand %xmm1, %xmm0 12813; SSE2-NEXT: paddb %xmm2, %xmm0 12814; SSE2-NEXT: movdqa %xmm0, %xmm1 12815; SSE2-NEXT: psrlw $4, %xmm1 12816; SSE2-NEXT: paddb %xmm0, %xmm1 12817; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12818; SSE2-NEXT: pxor %xmm0, %xmm0 12819; SSE2-NEXT: movdqa %xmm1, %xmm2 12820; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12821; SSE2-NEXT: psadbw %xmm0, %xmm2 12822; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12823; SSE2-NEXT: psadbw %xmm0, %xmm1 12824; SSE2-NEXT: packuswb %xmm2, %xmm1 12825; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12826; SSE2-NEXT: movdqa %xmm1, %xmm0 12827; SSE2-NEXT: retq 12828; 12829; SSE3-LABEL: ugt_20_v4i32: 12830; SSE3: # %bb.0: 12831; SSE3-NEXT: movdqa %xmm0, %xmm1 12832; SSE3-NEXT: psrlw $1, %xmm1 12833; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12834; SSE3-NEXT: psubb %xmm1, %xmm0 12835; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12836; SSE3-NEXT: movdqa %xmm0, %xmm2 12837; SSE3-NEXT: pand %xmm1, %xmm2 12838; SSE3-NEXT: psrlw $2, %xmm0 12839; SSE3-NEXT: pand %xmm1, %xmm0 12840; SSE3-NEXT: paddb %xmm2, %xmm0 12841; SSE3-NEXT: movdqa %xmm0, %xmm1 12842; SSE3-NEXT: psrlw $4, %xmm1 12843; SSE3-NEXT: paddb %xmm0, %xmm1 12844; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 12845; SSE3-NEXT: pxor %xmm0, %xmm0 12846; SSE3-NEXT: movdqa %xmm1, %xmm2 12847; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12848; SSE3-NEXT: psadbw %xmm0, %xmm2 12849; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12850; SSE3-NEXT: psadbw %xmm0, %xmm1 12851; SSE3-NEXT: packuswb %xmm2, %xmm1 12852; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12853; SSE3-NEXT: movdqa %xmm1, %xmm0 12854; SSE3-NEXT: retq 12855; 12856; SSSE3-LABEL: ugt_20_v4i32: 12857; SSSE3: # %bb.0: 12858; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12859; SSSE3-NEXT: movdqa %xmm0, %xmm3 12860; SSSE3-NEXT: pand %xmm2, %xmm3 12861; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12862; SSSE3-NEXT: movdqa %xmm1, %xmm4 12863; SSSE3-NEXT: pshufb %xmm3, %xmm4 12864; SSSE3-NEXT: psrlw $4, %xmm0 12865; SSSE3-NEXT: pand %xmm2, %xmm0 12866; SSSE3-NEXT: pshufb %xmm0, %xmm1 12867; SSSE3-NEXT: paddb %xmm4, %xmm1 12868; SSSE3-NEXT: pxor %xmm0, %xmm0 12869; SSSE3-NEXT: movdqa %xmm1, %xmm2 12870; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 12871; SSSE3-NEXT: psadbw %xmm0, %xmm2 12872; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 12873; SSSE3-NEXT: psadbw %xmm0, %xmm1 12874; SSSE3-NEXT: packuswb %xmm2, %xmm1 12875; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 12876; SSSE3-NEXT: movdqa %xmm1, %xmm0 12877; SSSE3-NEXT: retq 12878; 12879; SSE41-LABEL: ugt_20_v4i32: 12880; SSE41: # %bb.0: 12881; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12882; SSE41-NEXT: movdqa %xmm0, %xmm2 12883; SSE41-NEXT: pand %xmm1, %xmm2 12884; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12885; SSE41-NEXT: movdqa %xmm3, %xmm4 12886; SSE41-NEXT: pshufb %xmm2, %xmm4 12887; SSE41-NEXT: psrlw $4, %xmm0 12888; SSE41-NEXT: pand %xmm1, %xmm0 12889; SSE41-NEXT: pshufb %xmm0, %xmm3 12890; SSE41-NEXT: paddb %xmm4, %xmm3 12891; SSE41-NEXT: pxor %xmm1, %xmm1 12892; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 12893; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 12894; SSE41-NEXT: psadbw %xmm1, %xmm3 12895; SSE41-NEXT: psadbw %xmm1, %xmm0 12896; SSE41-NEXT: packuswb %xmm3, %xmm0 12897; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 12898; SSE41-NEXT: retq 12899; 12900; AVX1-LABEL: ugt_20_v4i32: 12901; AVX1: # %bb.0: 12902; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12903; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 12904; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12905; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12906; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 12907; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 12908; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12909; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12910; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 12911; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12912; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12913; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12914; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12915; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12916; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 12917; AVX1-NEXT: retq 12918; 12919; AVX2-LABEL: ugt_20_v4i32: 12920; AVX2: # %bb.0: 12921; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 12922; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 12923; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 12924; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 12925; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 12926; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 12927; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 12928; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 12929; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 12930; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12931; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12932; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12933; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12934; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12935; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 12936; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12937; AVX2-NEXT: retq 12938; 12939; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i32: 12940; AVX512VPOPCNTDQ: # %bb.0: 12941; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12942; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 12943; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 12944; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12945; AVX512VPOPCNTDQ-NEXT: vzeroupper 12946; AVX512VPOPCNTDQ-NEXT: retq 12947; 12948; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i32: 12949; AVX512VPOPCNTDQVL: # %bb.0: 12950; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 12951; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 12952; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12953; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12954; AVX512VPOPCNTDQVL-NEXT: retq 12955; 12956; BITALG_NOVLX-LABEL: ugt_20_v4i32: 12957; BITALG_NOVLX: # %bb.0: 12958; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12959; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 12960; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 12961; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12962; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12963; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12964; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12965; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12966; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 12967; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 12968; BITALG_NOVLX-NEXT: vzeroupper 12969; BITALG_NOVLX-NEXT: retq 12970; 12971; BITALG-LABEL: ugt_20_v4i32: 12972; BITALG: # %bb.0: 12973; BITALG-NEXT: vpopcntb %xmm0, %xmm0 12974; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 12975; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 12976; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 12977; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 12978; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 12979; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 12980; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 12981; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 12982; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 12983; BITALG-NEXT: retq 12984 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 12985 %3 = icmp ugt <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20> 12986 %4 = sext <4 x i1> %3 to <4 x i32> 12987 ret <4 x i32> %4 12988} 12989 12990define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { 12991; SSE2-LABEL: ult_21_v4i32: 12992; SSE2: # %bb.0: 12993; SSE2-NEXT: movdqa %xmm0, %xmm1 12994; SSE2-NEXT: psrlw $1, %xmm1 12995; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 12996; SSE2-NEXT: psubb %xmm1, %xmm0 12997; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 12998; SSE2-NEXT: movdqa %xmm0, %xmm2 12999; SSE2-NEXT: pand %xmm1, %xmm2 13000; SSE2-NEXT: psrlw $2, %xmm0 13001; SSE2-NEXT: pand %xmm1, %xmm0 13002; SSE2-NEXT: paddb %xmm2, %xmm0 13003; SSE2-NEXT: movdqa %xmm0, %xmm1 13004; SSE2-NEXT: psrlw $4, %xmm1 13005; SSE2-NEXT: paddb %xmm0, %xmm1 13006; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13007; SSE2-NEXT: pxor %xmm0, %xmm0 13008; SSE2-NEXT: movdqa %xmm1, %xmm2 13009; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13010; SSE2-NEXT: psadbw %xmm0, %xmm2 13011; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13012; SSE2-NEXT: psadbw %xmm0, %xmm1 13013; SSE2-NEXT: packuswb %xmm2, %xmm1 13014; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] 13015; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 13016; SSE2-NEXT: retq 13017; 13018; SSE3-LABEL: ult_21_v4i32: 13019; SSE3: # %bb.0: 13020; SSE3-NEXT: movdqa %xmm0, %xmm1 13021; SSE3-NEXT: psrlw $1, %xmm1 13022; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13023; SSE3-NEXT: psubb %xmm1, %xmm0 13024; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13025; SSE3-NEXT: movdqa %xmm0, %xmm2 13026; SSE3-NEXT: pand %xmm1, %xmm2 13027; SSE3-NEXT: psrlw $2, %xmm0 13028; SSE3-NEXT: pand %xmm1, %xmm0 13029; SSE3-NEXT: paddb %xmm2, %xmm0 13030; SSE3-NEXT: movdqa %xmm0, %xmm1 13031; SSE3-NEXT: psrlw $4, %xmm1 13032; SSE3-NEXT: paddb %xmm0, %xmm1 13033; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13034; SSE3-NEXT: pxor %xmm0, %xmm0 13035; SSE3-NEXT: movdqa %xmm1, %xmm2 13036; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13037; SSE3-NEXT: psadbw %xmm0, %xmm2 13038; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13039; SSE3-NEXT: psadbw %xmm0, %xmm1 13040; SSE3-NEXT: packuswb %xmm2, %xmm1 13041; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] 13042; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 13043; SSE3-NEXT: retq 13044; 13045; SSSE3-LABEL: ult_21_v4i32: 13046; SSSE3: # %bb.0: 13047; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13048; SSSE3-NEXT: movdqa %xmm0, %xmm2 13049; SSSE3-NEXT: pand %xmm1, %xmm2 13050; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13051; SSSE3-NEXT: movdqa %xmm3, %xmm4 13052; SSSE3-NEXT: pshufb %xmm2, %xmm4 13053; SSSE3-NEXT: psrlw $4, %xmm0 13054; SSSE3-NEXT: pand %xmm1, %xmm0 13055; SSSE3-NEXT: pshufb %xmm0, %xmm3 13056; SSSE3-NEXT: paddb %xmm4, %xmm3 13057; SSSE3-NEXT: pxor %xmm0, %xmm0 13058; SSSE3-NEXT: movdqa %xmm3, %xmm1 13059; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 13060; SSSE3-NEXT: psadbw %xmm0, %xmm1 13061; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 13062; SSSE3-NEXT: psadbw %xmm0, %xmm3 13063; SSSE3-NEXT: packuswb %xmm1, %xmm3 13064; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] 13065; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 13066; SSSE3-NEXT: retq 13067; 13068; SSE41-LABEL: ult_21_v4i32: 13069; SSE41: # %bb.0: 13070; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13071; SSE41-NEXT: movdqa %xmm0, %xmm2 13072; SSE41-NEXT: pand %xmm1, %xmm2 13073; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13074; SSE41-NEXT: movdqa %xmm3, %xmm4 13075; SSE41-NEXT: pshufb %xmm2, %xmm4 13076; SSE41-NEXT: psrlw $4, %xmm0 13077; SSE41-NEXT: pand %xmm1, %xmm0 13078; SSE41-NEXT: pshufb %xmm0, %xmm3 13079; SSE41-NEXT: paddb %xmm4, %xmm3 13080; SSE41-NEXT: pxor %xmm0, %xmm0 13081; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 13082; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 13083; SSE41-NEXT: psadbw %xmm0, %xmm3 13084; SSE41-NEXT: psadbw %xmm0, %xmm1 13085; SSE41-NEXT: packuswb %xmm3, %xmm1 13086; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] 13087; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 13088; SSE41-NEXT: retq 13089; 13090; AVX1-LABEL: ult_21_v4i32: 13091; AVX1: # %bb.0: 13092; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13093; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 13094; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13095; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13096; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 13097; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 13098; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13099; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13100; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 13101; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13102; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13103; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13104; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13105; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13106; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21] 13107; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13108; AVX1-NEXT: retq 13109; 13110; AVX2-LABEL: ult_21_v4i32: 13111; AVX2: # %bb.0: 13112; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13113; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 13114; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13115; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13116; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 13117; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 13118; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13119; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13120; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 13121; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13122; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13123; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13124; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13125; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13126; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] 13127; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13128; AVX2-NEXT: retq 13129; 13130; AVX512VPOPCNTDQ-LABEL: ult_21_v4i32: 13131; AVX512VPOPCNTDQ: # %bb.0: 13132; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13133; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 13134; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] 13135; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13136; AVX512VPOPCNTDQ-NEXT: vzeroupper 13137; AVX512VPOPCNTDQ-NEXT: retq 13138; 13139; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i32: 13140; AVX512VPOPCNTDQVL: # %bb.0: 13141; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 13142; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 13143; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13144; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13145; AVX512VPOPCNTDQVL-NEXT: retq 13146; 13147; BITALG_NOVLX-LABEL: ult_21_v4i32: 13148; BITALG_NOVLX: # %bb.0: 13149; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13150; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 13151; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 13152; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13153; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13154; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13155; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13156; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13157; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] 13158; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13159; BITALG_NOVLX-NEXT: vzeroupper 13160; BITALG_NOVLX-NEXT: retq 13161; 13162; BITALG-LABEL: ult_21_v4i32: 13163; BITALG: # %bb.0: 13164; BITALG-NEXT: vpopcntb %xmm0, %xmm0 13165; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 13166; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13167; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13168; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13169; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13170; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13171; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 13172; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13173; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13174; BITALG-NEXT: retq 13175 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 13176 %3 = icmp ult <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21> 13177 %4 = sext <4 x i1> %3 to <4 x i32> 13178 ret <4 x i32> %4 13179} 13180 13181define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { 13182; SSE2-LABEL: ugt_21_v4i32: 13183; SSE2: # %bb.0: 13184; SSE2-NEXT: movdqa %xmm0, %xmm1 13185; SSE2-NEXT: psrlw $1, %xmm1 13186; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13187; SSE2-NEXT: psubb %xmm1, %xmm0 13188; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13189; SSE2-NEXT: movdqa %xmm0, %xmm2 13190; SSE2-NEXT: pand %xmm1, %xmm2 13191; SSE2-NEXT: psrlw $2, %xmm0 13192; SSE2-NEXT: pand %xmm1, %xmm0 13193; SSE2-NEXT: paddb %xmm2, %xmm0 13194; SSE2-NEXT: movdqa %xmm0, %xmm1 13195; SSE2-NEXT: psrlw $4, %xmm1 13196; SSE2-NEXT: paddb %xmm0, %xmm1 13197; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13198; SSE2-NEXT: pxor %xmm0, %xmm0 13199; SSE2-NEXT: movdqa %xmm1, %xmm2 13200; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13201; SSE2-NEXT: psadbw %xmm0, %xmm2 13202; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13203; SSE2-NEXT: psadbw %xmm0, %xmm1 13204; SSE2-NEXT: packuswb %xmm2, %xmm1 13205; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13206; SSE2-NEXT: movdqa %xmm1, %xmm0 13207; SSE2-NEXT: retq 13208; 13209; SSE3-LABEL: ugt_21_v4i32: 13210; SSE3: # %bb.0: 13211; SSE3-NEXT: movdqa %xmm0, %xmm1 13212; SSE3-NEXT: psrlw $1, %xmm1 13213; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13214; SSE3-NEXT: psubb %xmm1, %xmm0 13215; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13216; SSE3-NEXT: movdqa %xmm0, %xmm2 13217; SSE3-NEXT: pand %xmm1, %xmm2 13218; SSE3-NEXT: psrlw $2, %xmm0 13219; SSE3-NEXT: pand %xmm1, %xmm0 13220; SSE3-NEXT: paddb %xmm2, %xmm0 13221; SSE3-NEXT: movdqa %xmm0, %xmm1 13222; SSE3-NEXT: psrlw $4, %xmm1 13223; SSE3-NEXT: paddb %xmm0, %xmm1 13224; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13225; SSE3-NEXT: pxor %xmm0, %xmm0 13226; SSE3-NEXT: movdqa %xmm1, %xmm2 13227; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13228; SSE3-NEXT: psadbw %xmm0, %xmm2 13229; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13230; SSE3-NEXT: psadbw %xmm0, %xmm1 13231; SSE3-NEXT: packuswb %xmm2, %xmm1 13232; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13233; SSE3-NEXT: movdqa %xmm1, %xmm0 13234; SSE3-NEXT: retq 13235; 13236; SSSE3-LABEL: ugt_21_v4i32: 13237; SSSE3: # %bb.0: 13238; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13239; SSSE3-NEXT: movdqa %xmm0, %xmm3 13240; SSSE3-NEXT: pand %xmm2, %xmm3 13241; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13242; SSSE3-NEXT: movdqa %xmm1, %xmm4 13243; SSSE3-NEXT: pshufb %xmm3, %xmm4 13244; SSSE3-NEXT: psrlw $4, %xmm0 13245; SSSE3-NEXT: pand %xmm2, %xmm0 13246; SSSE3-NEXT: pshufb %xmm0, %xmm1 13247; SSSE3-NEXT: paddb %xmm4, %xmm1 13248; SSSE3-NEXT: pxor %xmm0, %xmm0 13249; SSSE3-NEXT: movdqa %xmm1, %xmm2 13250; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13251; SSSE3-NEXT: psadbw %xmm0, %xmm2 13252; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13253; SSSE3-NEXT: psadbw %xmm0, %xmm1 13254; SSSE3-NEXT: packuswb %xmm2, %xmm1 13255; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13256; SSSE3-NEXT: movdqa %xmm1, %xmm0 13257; SSSE3-NEXT: retq 13258; 13259; SSE41-LABEL: ugt_21_v4i32: 13260; SSE41: # %bb.0: 13261; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13262; SSE41-NEXT: movdqa %xmm0, %xmm2 13263; SSE41-NEXT: pand %xmm1, %xmm2 13264; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13265; SSE41-NEXT: movdqa %xmm3, %xmm4 13266; SSE41-NEXT: pshufb %xmm2, %xmm4 13267; SSE41-NEXT: psrlw $4, %xmm0 13268; SSE41-NEXT: pand %xmm1, %xmm0 13269; SSE41-NEXT: pshufb %xmm0, %xmm3 13270; SSE41-NEXT: paddb %xmm4, %xmm3 13271; SSE41-NEXT: pxor %xmm1, %xmm1 13272; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 13273; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 13274; SSE41-NEXT: psadbw %xmm1, %xmm3 13275; SSE41-NEXT: psadbw %xmm1, %xmm0 13276; SSE41-NEXT: packuswb %xmm3, %xmm0 13277; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 13278; SSE41-NEXT: retq 13279; 13280; AVX1-LABEL: ugt_21_v4i32: 13281; AVX1: # %bb.0: 13282; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13283; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 13284; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13285; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13286; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 13287; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 13288; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13289; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13290; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 13291; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13292; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13293; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13294; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13295; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13296; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 13297; AVX1-NEXT: retq 13298; 13299; AVX2-LABEL: ugt_21_v4i32: 13300; AVX2: # %bb.0: 13301; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13302; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 13303; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13304; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13305; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 13306; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 13307; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13308; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13309; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 13310; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13311; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13312; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13313; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13314; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13315; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] 13316; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 13317; AVX2-NEXT: retq 13318; 13319; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i32: 13320; AVX512VPOPCNTDQ: # %bb.0: 13321; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13322; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 13323; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] 13324; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 13325; AVX512VPOPCNTDQ-NEXT: vzeroupper 13326; AVX512VPOPCNTDQ-NEXT: retq 13327; 13328; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i32: 13329; AVX512VPOPCNTDQVL: # %bb.0: 13330; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 13331; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 13332; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13333; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13334; AVX512VPOPCNTDQVL-NEXT: retq 13335; 13336; BITALG_NOVLX-LABEL: ugt_21_v4i32: 13337; BITALG_NOVLX: # %bb.0: 13338; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13339; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 13340; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 13341; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13342; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13343; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13344; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13345; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13346; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] 13347; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 13348; BITALG_NOVLX-NEXT: vzeroupper 13349; BITALG_NOVLX-NEXT: retq 13350; 13351; BITALG-LABEL: ugt_21_v4i32: 13352; BITALG: # %bb.0: 13353; BITALG-NEXT: vpopcntb %xmm0, %xmm0 13354; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 13355; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13356; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13357; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13358; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13359; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13360; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 13361; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13362; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13363; BITALG-NEXT: retq 13364 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 13365 %3 = icmp ugt <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21> 13366 %4 = sext <4 x i1> %3 to <4 x i32> 13367 ret <4 x i32> %4 13368} 13369 13370define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { 13371; SSE2-LABEL: ult_22_v4i32: 13372; SSE2: # %bb.0: 13373; SSE2-NEXT: movdqa %xmm0, %xmm1 13374; SSE2-NEXT: psrlw $1, %xmm1 13375; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13376; SSE2-NEXT: psubb %xmm1, %xmm0 13377; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13378; SSE2-NEXT: movdqa %xmm0, %xmm2 13379; SSE2-NEXT: pand %xmm1, %xmm2 13380; SSE2-NEXT: psrlw $2, %xmm0 13381; SSE2-NEXT: pand %xmm1, %xmm0 13382; SSE2-NEXT: paddb %xmm2, %xmm0 13383; SSE2-NEXT: movdqa %xmm0, %xmm1 13384; SSE2-NEXT: psrlw $4, %xmm1 13385; SSE2-NEXT: paddb %xmm0, %xmm1 13386; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13387; SSE2-NEXT: pxor %xmm0, %xmm0 13388; SSE2-NEXT: movdqa %xmm1, %xmm2 13389; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13390; SSE2-NEXT: psadbw %xmm0, %xmm2 13391; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13392; SSE2-NEXT: psadbw %xmm0, %xmm1 13393; SSE2-NEXT: packuswb %xmm2, %xmm1 13394; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] 13395; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 13396; SSE2-NEXT: retq 13397; 13398; SSE3-LABEL: ult_22_v4i32: 13399; SSE3: # %bb.0: 13400; SSE3-NEXT: movdqa %xmm0, %xmm1 13401; SSE3-NEXT: psrlw $1, %xmm1 13402; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13403; SSE3-NEXT: psubb %xmm1, %xmm0 13404; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13405; SSE3-NEXT: movdqa %xmm0, %xmm2 13406; SSE3-NEXT: pand %xmm1, %xmm2 13407; SSE3-NEXT: psrlw $2, %xmm0 13408; SSE3-NEXT: pand %xmm1, %xmm0 13409; SSE3-NEXT: paddb %xmm2, %xmm0 13410; SSE3-NEXT: movdqa %xmm0, %xmm1 13411; SSE3-NEXT: psrlw $4, %xmm1 13412; SSE3-NEXT: paddb %xmm0, %xmm1 13413; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13414; SSE3-NEXT: pxor %xmm0, %xmm0 13415; SSE3-NEXT: movdqa %xmm1, %xmm2 13416; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13417; SSE3-NEXT: psadbw %xmm0, %xmm2 13418; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13419; SSE3-NEXT: psadbw %xmm0, %xmm1 13420; SSE3-NEXT: packuswb %xmm2, %xmm1 13421; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] 13422; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 13423; SSE3-NEXT: retq 13424; 13425; SSSE3-LABEL: ult_22_v4i32: 13426; SSSE3: # %bb.0: 13427; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13428; SSSE3-NEXT: movdqa %xmm0, %xmm2 13429; SSSE3-NEXT: pand %xmm1, %xmm2 13430; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13431; SSSE3-NEXT: movdqa %xmm3, %xmm4 13432; SSSE3-NEXT: pshufb %xmm2, %xmm4 13433; SSSE3-NEXT: psrlw $4, %xmm0 13434; SSSE3-NEXT: pand %xmm1, %xmm0 13435; SSSE3-NEXT: pshufb %xmm0, %xmm3 13436; SSSE3-NEXT: paddb %xmm4, %xmm3 13437; SSSE3-NEXT: pxor %xmm0, %xmm0 13438; SSSE3-NEXT: movdqa %xmm3, %xmm1 13439; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 13440; SSSE3-NEXT: psadbw %xmm0, %xmm1 13441; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 13442; SSSE3-NEXT: psadbw %xmm0, %xmm3 13443; SSSE3-NEXT: packuswb %xmm1, %xmm3 13444; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] 13445; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 13446; SSSE3-NEXT: retq 13447; 13448; SSE41-LABEL: ult_22_v4i32: 13449; SSE41: # %bb.0: 13450; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13451; SSE41-NEXT: movdqa %xmm0, %xmm2 13452; SSE41-NEXT: pand %xmm1, %xmm2 13453; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13454; SSE41-NEXT: movdqa %xmm3, %xmm4 13455; SSE41-NEXT: pshufb %xmm2, %xmm4 13456; SSE41-NEXT: psrlw $4, %xmm0 13457; SSE41-NEXT: pand %xmm1, %xmm0 13458; SSE41-NEXT: pshufb %xmm0, %xmm3 13459; SSE41-NEXT: paddb %xmm4, %xmm3 13460; SSE41-NEXT: pxor %xmm0, %xmm0 13461; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 13462; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 13463; SSE41-NEXT: psadbw %xmm0, %xmm3 13464; SSE41-NEXT: psadbw %xmm0, %xmm1 13465; SSE41-NEXT: packuswb %xmm3, %xmm1 13466; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] 13467; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 13468; SSE41-NEXT: retq 13469; 13470; AVX1-LABEL: ult_22_v4i32: 13471; AVX1: # %bb.0: 13472; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13473; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 13474; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13475; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13476; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 13477; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 13478; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13479; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13480; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 13481; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13482; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13483; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13484; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13485; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13486; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22] 13487; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13488; AVX1-NEXT: retq 13489; 13490; AVX2-LABEL: ult_22_v4i32: 13491; AVX2: # %bb.0: 13492; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13493; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 13494; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13495; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13496; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 13497; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 13498; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13499; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13500; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 13501; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13502; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13503; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13504; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13505; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13506; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] 13507; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13508; AVX2-NEXT: retq 13509; 13510; AVX512VPOPCNTDQ-LABEL: ult_22_v4i32: 13511; AVX512VPOPCNTDQ: # %bb.0: 13512; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13513; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 13514; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] 13515; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13516; AVX512VPOPCNTDQ-NEXT: vzeroupper 13517; AVX512VPOPCNTDQ-NEXT: retq 13518; 13519; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i32: 13520; AVX512VPOPCNTDQVL: # %bb.0: 13521; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 13522; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 13523; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13524; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13525; AVX512VPOPCNTDQVL-NEXT: retq 13526; 13527; BITALG_NOVLX-LABEL: ult_22_v4i32: 13528; BITALG_NOVLX: # %bb.0: 13529; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13530; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 13531; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 13532; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13533; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13534; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13535; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13536; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13537; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] 13538; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13539; BITALG_NOVLX-NEXT: vzeroupper 13540; BITALG_NOVLX-NEXT: retq 13541; 13542; BITALG-LABEL: ult_22_v4i32: 13543; BITALG: # %bb.0: 13544; BITALG-NEXT: vpopcntb %xmm0, %xmm0 13545; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 13546; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13547; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13548; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13549; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13550; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13551; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 13552; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13553; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13554; BITALG-NEXT: retq 13555 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 13556 %3 = icmp ult <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22> 13557 %4 = sext <4 x i1> %3 to <4 x i32> 13558 ret <4 x i32> %4 13559} 13560 13561define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { 13562; SSE2-LABEL: ugt_22_v4i32: 13563; SSE2: # %bb.0: 13564; SSE2-NEXT: movdqa %xmm0, %xmm1 13565; SSE2-NEXT: psrlw $1, %xmm1 13566; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13567; SSE2-NEXT: psubb %xmm1, %xmm0 13568; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13569; SSE2-NEXT: movdqa %xmm0, %xmm2 13570; SSE2-NEXT: pand %xmm1, %xmm2 13571; SSE2-NEXT: psrlw $2, %xmm0 13572; SSE2-NEXT: pand %xmm1, %xmm0 13573; SSE2-NEXT: paddb %xmm2, %xmm0 13574; SSE2-NEXT: movdqa %xmm0, %xmm1 13575; SSE2-NEXT: psrlw $4, %xmm1 13576; SSE2-NEXT: paddb %xmm0, %xmm1 13577; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13578; SSE2-NEXT: pxor %xmm0, %xmm0 13579; SSE2-NEXT: movdqa %xmm1, %xmm2 13580; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13581; SSE2-NEXT: psadbw %xmm0, %xmm2 13582; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13583; SSE2-NEXT: psadbw %xmm0, %xmm1 13584; SSE2-NEXT: packuswb %xmm2, %xmm1 13585; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13586; SSE2-NEXT: movdqa %xmm1, %xmm0 13587; SSE2-NEXT: retq 13588; 13589; SSE3-LABEL: ugt_22_v4i32: 13590; SSE3: # %bb.0: 13591; SSE3-NEXT: movdqa %xmm0, %xmm1 13592; SSE3-NEXT: psrlw $1, %xmm1 13593; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13594; SSE3-NEXT: psubb %xmm1, %xmm0 13595; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13596; SSE3-NEXT: movdqa %xmm0, %xmm2 13597; SSE3-NEXT: pand %xmm1, %xmm2 13598; SSE3-NEXT: psrlw $2, %xmm0 13599; SSE3-NEXT: pand %xmm1, %xmm0 13600; SSE3-NEXT: paddb %xmm2, %xmm0 13601; SSE3-NEXT: movdqa %xmm0, %xmm1 13602; SSE3-NEXT: psrlw $4, %xmm1 13603; SSE3-NEXT: paddb %xmm0, %xmm1 13604; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13605; SSE3-NEXT: pxor %xmm0, %xmm0 13606; SSE3-NEXT: movdqa %xmm1, %xmm2 13607; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13608; SSE3-NEXT: psadbw %xmm0, %xmm2 13609; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13610; SSE3-NEXT: psadbw %xmm0, %xmm1 13611; SSE3-NEXT: packuswb %xmm2, %xmm1 13612; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13613; SSE3-NEXT: movdqa %xmm1, %xmm0 13614; SSE3-NEXT: retq 13615; 13616; SSSE3-LABEL: ugt_22_v4i32: 13617; SSSE3: # %bb.0: 13618; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13619; SSSE3-NEXT: movdqa %xmm0, %xmm3 13620; SSSE3-NEXT: pand %xmm2, %xmm3 13621; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13622; SSSE3-NEXT: movdqa %xmm1, %xmm4 13623; SSSE3-NEXT: pshufb %xmm3, %xmm4 13624; SSSE3-NEXT: psrlw $4, %xmm0 13625; SSSE3-NEXT: pand %xmm2, %xmm0 13626; SSSE3-NEXT: pshufb %xmm0, %xmm1 13627; SSSE3-NEXT: paddb %xmm4, %xmm1 13628; SSSE3-NEXT: pxor %xmm0, %xmm0 13629; SSSE3-NEXT: movdqa %xmm1, %xmm2 13630; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13631; SSSE3-NEXT: psadbw %xmm0, %xmm2 13632; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13633; SSSE3-NEXT: psadbw %xmm0, %xmm1 13634; SSSE3-NEXT: packuswb %xmm2, %xmm1 13635; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13636; SSSE3-NEXT: movdqa %xmm1, %xmm0 13637; SSSE3-NEXT: retq 13638; 13639; SSE41-LABEL: ugt_22_v4i32: 13640; SSE41: # %bb.0: 13641; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13642; SSE41-NEXT: movdqa %xmm0, %xmm2 13643; SSE41-NEXT: pand %xmm1, %xmm2 13644; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13645; SSE41-NEXT: movdqa %xmm3, %xmm4 13646; SSE41-NEXT: pshufb %xmm2, %xmm4 13647; SSE41-NEXT: psrlw $4, %xmm0 13648; SSE41-NEXT: pand %xmm1, %xmm0 13649; SSE41-NEXT: pshufb %xmm0, %xmm3 13650; SSE41-NEXT: paddb %xmm4, %xmm3 13651; SSE41-NEXT: pxor %xmm1, %xmm1 13652; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 13653; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 13654; SSE41-NEXT: psadbw %xmm1, %xmm3 13655; SSE41-NEXT: psadbw %xmm1, %xmm0 13656; SSE41-NEXT: packuswb %xmm3, %xmm0 13657; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 13658; SSE41-NEXT: retq 13659; 13660; AVX1-LABEL: ugt_22_v4i32: 13661; AVX1: # %bb.0: 13662; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13663; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 13664; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13665; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13666; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 13667; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 13668; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13669; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13670; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 13671; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13672; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13673; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13674; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13675; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13676; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 13677; AVX1-NEXT: retq 13678; 13679; AVX2-LABEL: ugt_22_v4i32: 13680; AVX2: # %bb.0: 13681; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13682; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 13683; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13684; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13685; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 13686; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 13687; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13688; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13689; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 13690; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13691; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13692; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13693; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13694; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13695; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] 13696; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 13697; AVX2-NEXT: retq 13698; 13699; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i32: 13700; AVX512VPOPCNTDQ: # %bb.0: 13701; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13702; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 13703; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] 13704; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 13705; AVX512VPOPCNTDQ-NEXT: vzeroupper 13706; AVX512VPOPCNTDQ-NEXT: retq 13707; 13708; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i32: 13709; AVX512VPOPCNTDQVL: # %bb.0: 13710; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 13711; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 13712; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13713; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13714; AVX512VPOPCNTDQVL-NEXT: retq 13715; 13716; BITALG_NOVLX-LABEL: ugt_22_v4i32: 13717; BITALG_NOVLX: # %bb.0: 13718; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13719; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 13720; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 13721; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13722; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13723; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13724; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13725; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13726; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] 13727; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 13728; BITALG_NOVLX-NEXT: vzeroupper 13729; BITALG_NOVLX-NEXT: retq 13730; 13731; BITALG-LABEL: ugt_22_v4i32: 13732; BITALG: # %bb.0: 13733; BITALG-NEXT: vpopcntb %xmm0, %xmm0 13734; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 13735; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13736; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13737; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13738; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13739; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13740; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 13741; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13742; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13743; BITALG-NEXT: retq 13744 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 13745 %3 = icmp ugt <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22> 13746 %4 = sext <4 x i1> %3 to <4 x i32> 13747 ret <4 x i32> %4 13748} 13749 13750define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { 13751; SSE2-LABEL: ult_23_v4i32: 13752; SSE2: # %bb.0: 13753; SSE2-NEXT: movdqa %xmm0, %xmm1 13754; SSE2-NEXT: psrlw $1, %xmm1 13755; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13756; SSE2-NEXT: psubb %xmm1, %xmm0 13757; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13758; SSE2-NEXT: movdqa %xmm0, %xmm2 13759; SSE2-NEXT: pand %xmm1, %xmm2 13760; SSE2-NEXT: psrlw $2, %xmm0 13761; SSE2-NEXT: pand %xmm1, %xmm0 13762; SSE2-NEXT: paddb %xmm2, %xmm0 13763; SSE2-NEXT: movdqa %xmm0, %xmm1 13764; SSE2-NEXT: psrlw $4, %xmm1 13765; SSE2-NEXT: paddb %xmm0, %xmm1 13766; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13767; SSE2-NEXT: pxor %xmm0, %xmm0 13768; SSE2-NEXT: movdqa %xmm1, %xmm2 13769; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13770; SSE2-NEXT: psadbw %xmm0, %xmm2 13771; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13772; SSE2-NEXT: psadbw %xmm0, %xmm1 13773; SSE2-NEXT: packuswb %xmm2, %xmm1 13774; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] 13775; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 13776; SSE2-NEXT: retq 13777; 13778; SSE3-LABEL: ult_23_v4i32: 13779; SSE3: # %bb.0: 13780; SSE3-NEXT: movdqa %xmm0, %xmm1 13781; SSE3-NEXT: psrlw $1, %xmm1 13782; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13783; SSE3-NEXT: psubb %xmm1, %xmm0 13784; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13785; SSE3-NEXT: movdqa %xmm0, %xmm2 13786; SSE3-NEXT: pand %xmm1, %xmm2 13787; SSE3-NEXT: psrlw $2, %xmm0 13788; SSE3-NEXT: pand %xmm1, %xmm0 13789; SSE3-NEXT: paddb %xmm2, %xmm0 13790; SSE3-NEXT: movdqa %xmm0, %xmm1 13791; SSE3-NEXT: psrlw $4, %xmm1 13792; SSE3-NEXT: paddb %xmm0, %xmm1 13793; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13794; SSE3-NEXT: pxor %xmm0, %xmm0 13795; SSE3-NEXT: movdqa %xmm1, %xmm2 13796; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13797; SSE3-NEXT: psadbw %xmm0, %xmm2 13798; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13799; SSE3-NEXT: psadbw %xmm0, %xmm1 13800; SSE3-NEXT: packuswb %xmm2, %xmm1 13801; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] 13802; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 13803; SSE3-NEXT: retq 13804; 13805; SSSE3-LABEL: ult_23_v4i32: 13806; SSSE3: # %bb.0: 13807; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13808; SSSE3-NEXT: movdqa %xmm0, %xmm2 13809; SSSE3-NEXT: pand %xmm1, %xmm2 13810; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13811; SSSE3-NEXT: movdqa %xmm3, %xmm4 13812; SSSE3-NEXT: pshufb %xmm2, %xmm4 13813; SSSE3-NEXT: psrlw $4, %xmm0 13814; SSSE3-NEXT: pand %xmm1, %xmm0 13815; SSSE3-NEXT: pshufb %xmm0, %xmm3 13816; SSSE3-NEXT: paddb %xmm4, %xmm3 13817; SSSE3-NEXT: pxor %xmm0, %xmm0 13818; SSSE3-NEXT: movdqa %xmm3, %xmm1 13819; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 13820; SSSE3-NEXT: psadbw %xmm0, %xmm1 13821; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 13822; SSSE3-NEXT: psadbw %xmm0, %xmm3 13823; SSSE3-NEXT: packuswb %xmm1, %xmm3 13824; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] 13825; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 13826; SSSE3-NEXT: retq 13827; 13828; SSE41-LABEL: ult_23_v4i32: 13829; SSE41: # %bb.0: 13830; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13831; SSE41-NEXT: movdqa %xmm0, %xmm2 13832; SSE41-NEXT: pand %xmm1, %xmm2 13833; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13834; SSE41-NEXT: movdqa %xmm3, %xmm4 13835; SSE41-NEXT: pshufb %xmm2, %xmm4 13836; SSE41-NEXT: psrlw $4, %xmm0 13837; SSE41-NEXT: pand %xmm1, %xmm0 13838; SSE41-NEXT: pshufb %xmm0, %xmm3 13839; SSE41-NEXT: paddb %xmm4, %xmm3 13840; SSE41-NEXT: pxor %xmm0, %xmm0 13841; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 13842; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 13843; SSE41-NEXT: psadbw %xmm0, %xmm3 13844; SSE41-NEXT: psadbw %xmm0, %xmm1 13845; SSE41-NEXT: packuswb %xmm3, %xmm1 13846; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] 13847; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 13848; SSE41-NEXT: retq 13849; 13850; AVX1-LABEL: ult_23_v4i32: 13851; AVX1: # %bb.0: 13852; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13853; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 13854; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13855; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13856; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 13857; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 13858; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13859; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13860; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 13861; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13862; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13863; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13864; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13865; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13866; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23] 13867; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13868; AVX1-NEXT: retq 13869; 13870; AVX2-LABEL: ult_23_v4i32: 13871; AVX2: # %bb.0: 13872; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13873; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 13874; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 13875; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 13876; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 13877; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 13878; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 13879; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 13880; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 13881; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13882; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13883; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13884; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13885; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13886; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] 13887; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13888; AVX2-NEXT: retq 13889; 13890; AVX512VPOPCNTDQ-LABEL: ult_23_v4i32: 13891; AVX512VPOPCNTDQ: # %bb.0: 13892; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13893; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 13894; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] 13895; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13896; AVX512VPOPCNTDQ-NEXT: vzeroupper 13897; AVX512VPOPCNTDQ-NEXT: retq 13898; 13899; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i32: 13900; AVX512VPOPCNTDQVL: # %bb.0: 13901; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 13902; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 13903; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13904; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13905; AVX512VPOPCNTDQVL-NEXT: retq 13906; 13907; BITALG_NOVLX-LABEL: ult_23_v4i32: 13908; BITALG_NOVLX: # %bb.0: 13909; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13910; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 13911; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 13912; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13913; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13914; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13915; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13916; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13917; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] 13918; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 13919; BITALG_NOVLX-NEXT: vzeroupper 13920; BITALG_NOVLX-NEXT: retq 13921; 13922; BITALG-LABEL: ult_23_v4i32: 13923; BITALG: # %bb.0: 13924; BITALG-NEXT: vpopcntb %xmm0, %xmm0 13925; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 13926; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13927; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 13928; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 13929; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 13930; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 13931; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 13932; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 13933; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 13934; BITALG-NEXT: retq 13935 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 13936 %3 = icmp ult <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23> 13937 %4 = sext <4 x i1> %3 to <4 x i32> 13938 ret <4 x i32> %4 13939} 13940 13941define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { 13942; SSE2-LABEL: ugt_23_v4i32: 13943; SSE2: # %bb.0: 13944; SSE2-NEXT: movdqa %xmm0, %xmm1 13945; SSE2-NEXT: psrlw $1, %xmm1 13946; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13947; SSE2-NEXT: psubb %xmm1, %xmm0 13948; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13949; SSE2-NEXT: movdqa %xmm0, %xmm2 13950; SSE2-NEXT: pand %xmm1, %xmm2 13951; SSE2-NEXT: psrlw $2, %xmm0 13952; SSE2-NEXT: pand %xmm1, %xmm0 13953; SSE2-NEXT: paddb %xmm2, %xmm0 13954; SSE2-NEXT: movdqa %xmm0, %xmm1 13955; SSE2-NEXT: psrlw $4, %xmm1 13956; SSE2-NEXT: paddb %xmm0, %xmm1 13957; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 13958; SSE2-NEXT: pxor %xmm0, %xmm0 13959; SSE2-NEXT: movdqa %xmm1, %xmm2 13960; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13961; SSE2-NEXT: psadbw %xmm0, %xmm2 13962; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13963; SSE2-NEXT: psadbw %xmm0, %xmm1 13964; SSE2-NEXT: packuswb %xmm2, %xmm1 13965; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13966; SSE2-NEXT: movdqa %xmm1, %xmm0 13967; SSE2-NEXT: retq 13968; 13969; SSE3-LABEL: ugt_23_v4i32: 13970; SSE3: # %bb.0: 13971; SSE3-NEXT: movdqa %xmm0, %xmm1 13972; SSE3-NEXT: psrlw $1, %xmm1 13973; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13974; SSE3-NEXT: psubb %xmm1, %xmm0 13975; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 13976; SSE3-NEXT: movdqa %xmm0, %xmm2 13977; SSE3-NEXT: pand %xmm1, %xmm2 13978; SSE3-NEXT: psrlw $2, %xmm0 13979; SSE3-NEXT: pand %xmm1, %xmm0 13980; SSE3-NEXT: paddb %xmm2, %xmm0 13981; SSE3-NEXT: movdqa %xmm0, %xmm1 13982; SSE3-NEXT: psrlw $4, %xmm1 13983; SSE3-NEXT: paddb %xmm0, %xmm1 13984; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 13985; SSE3-NEXT: pxor %xmm0, %xmm0 13986; SSE3-NEXT: movdqa %xmm1, %xmm2 13987; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 13988; SSE3-NEXT: psadbw %xmm0, %xmm2 13989; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 13990; SSE3-NEXT: psadbw %xmm0, %xmm1 13991; SSE3-NEXT: packuswb %xmm2, %xmm1 13992; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 13993; SSE3-NEXT: movdqa %xmm1, %xmm0 13994; SSE3-NEXT: retq 13995; 13996; SSSE3-LABEL: ugt_23_v4i32: 13997; SSSE3: # %bb.0: 13998; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13999; SSSE3-NEXT: movdqa %xmm0, %xmm3 14000; SSSE3-NEXT: pand %xmm2, %xmm3 14001; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14002; SSSE3-NEXT: movdqa %xmm1, %xmm4 14003; SSSE3-NEXT: pshufb %xmm3, %xmm4 14004; SSSE3-NEXT: psrlw $4, %xmm0 14005; SSSE3-NEXT: pand %xmm2, %xmm0 14006; SSSE3-NEXT: pshufb %xmm0, %xmm1 14007; SSSE3-NEXT: paddb %xmm4, %xmm1 14008; SSSE3-NEXT: pxor %xmm0, %xmm0 14009; SSSE3-NEXT: movdqa %xmm1, %xmm2 14010; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14011; SSSE3-NEXT: psadbw %xmm0, %xmm2 14012; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14013; SSSE3-NEXT: psadbw %xmm0, %xmm1 14014; SSSE3-NEXT: packuswb %xmm2, %xmm1 14015; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 14016; SSSE3-NEXT: movdqa %xmm1, %xmm0 14017; SSSE3-NEXT: retq 14018; 14019; SSE41-LABEL: ugt_23_v4i32: 14020; SSE41: # %bb.0: 14021; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14022; SSE41-NEXT: movdqa %xmm0, %xmm2 14023; SSE41-NEXT: pand %xmm1, %xmm2 14024; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14025; SSE41-NEXT: movdqa %xmm3, %xmm4 14026; SSE41-NEXT: pshufb %xmm2, %xmm4 14027; SSE41-NEXT: psrlw $4, %xmm0 14028; SSE41-NEXT: pand %xmm1, %xmm0 14029; SSE41-NEXT: pshufb %xmm0, %xmm3 14030; SSE41-NEXT: paddb %xmm4, %xmm3 14031; SSE41-NEXT: pxor %xmm1, %xmm1 14032; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 14033; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 14034; SSE41-NEXT: psadbw %xmm1, %xmm3 14035; SSE41-NEXT: psadbw %xmm1, %xmm0 14036; SSE41-NEXT: packuswb %xmm3, %xmm0 14037; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 14038; SSE41-NEXT: retq 14039; 14040; AVX1-LABEL: ugt_23_v4i32: 14041; AVX1: # %bb.0: 14042; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14043; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 14044; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14045; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14046; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 14047; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 14048; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14049; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14050; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 14051; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14052; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14053; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14054; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14055; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14056; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 14057; AVX1-NEXT: retq 14058; 14059; AVX2-LABEL: ugt_23_v4i32: 14060; AVX2: # %bb.0: 14061; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14062; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 14063; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14064; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14065; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 14066; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 14067; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14068; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14069; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 14070; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14071; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14072; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14073; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14074; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14075; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] 14076; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14077; AVX2-NEXT: retq 14078; 14079; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i32: 14080; AVX512VPOPCNTDQ: # %bb.0: 14081; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14082; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 14083; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] 14084; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14085; AVX512VPOPCNTDQ-NEXT: vzeroupper 14086; AVX512VPOPCNTDQ-NEXT: retq 14087; 14088; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i32: 14089; AVX512VPOPCNTDQVL: # %bb.0: 14090; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 14091; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 14092; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14093; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14094; AVX512VPOPCNTDQVL-NEXT: retq 14095; 14096; BITALG_NOVLX-LABEL: ugt_23_v4i32: 14097; BITALG_NOVLX: # %bb.0: 14098; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14099; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 14100; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 14101; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14102; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14103; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14104; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14105; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14106; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] 14107; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14108; BITALG_NOVLX-NEXT: vzeroupper 14109; BITALG_NOVLX-NEXT: retq 14110; 14111; BITALG-LABEL: ugt_23_v4i32: 14112; BITALG: # %bb.0: 14113; BITALG-NEXT: vpopcntb %xmm0, %xmm0 14114; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 14115; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14116; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14117; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14118; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14119; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14120; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 14121; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14122; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14123; BITALG-NEXT: retq 14124 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 14125 %3 = icmp ugt <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23> 14126 %4 = sext <4 x i1> %3 to <4 x i32> 14127 ret <4 x i32> %4 14128} 14129 14130define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { 14131; SSE2-LABEL: ult_24_v4i32: 14132; SSE2: # %bb.0: 14133; SSE2-NEXT: movdqa %xmm0, %xmm1 14134; SSE2-NEXT: psrlw $1, %xmm1 14135; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14136; SSE2-NEXT: psubb %xmm1, %xmm0 14137; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14138; SSE2-NEXT: movdqa %xmm0, %xmm2 14139; SSE2-NEXT: pand %xmm1, %xmm2 14140; SSE2-NEXT: psrlw $2, %xmm0 14141; SSE2-NEXT: pand %xmm1, %xmm0 14142; SSE2-NEXT: paddb %xmm2, %xmm0 14143; SSE2-NEXT: movdqa %xmm0, %xmm1 14144; SSE2-NEXT: psrlw $4, %xmm1 14145; SSE2-NEXT: paddb %xmm0, %xmm1 14146; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14147; SSE2-NEXT: pxor %xmm0, %xmm0 14148; SSE2-NEXT: movdqa %xmm1, %xmm2 14149; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14150; SSE2-NEXT: psadbw %xmm0, %xmm2 14151; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14152; SSE2-NEXT: psadbw %xmm0, %xmm1 14153; SSE2-NEXT: packuswb %xmm2, %xmm1 14154; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] 14155; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 14156; SSE2-NEXT: retq 14157; 14158; SSE3-LABEL: ult_24_v4i32: 14159; SSE3: # %bb.0: 14160; SSE3-NEXT: movdqa %xmm0, %xmm1 14161; SSE3-NEXT: psrlw $1, %xmm1 14162; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14163; SSE3-NEXT: psubb %xmm1, %xmm0 14164; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14165; SSE3-NEXT: movdqa %xmm0, %xmm2 14166; SSE3-NEXT: pand %xmm1, %xmm2 14167; SSE3-NEXT: psrlw $2, %xmm0 14168; SSE3-NEXT: pand %xmm1, %xmm0 14169; SSE3-NEXT: paddb %xmm2, %xmm0 14170; SSE3-NEXT: movdqa %xmm0, %xmm1 14171; SSE3-NEXT: psrlw $4, %xmm1 14172; SSE3-NEXT: paddb %xmm0, %xmm1 14173; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14174; SSE3-NEXT: pxor %xmm0, %xmm0 14175; SSE3-NEXT: movdqa %xmm1, %xmm2 14176; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14177; SSE3-NEXT: psadbw %xmm0, %xmm2 14178; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14179; SSE3-NEXT: psadbw %xmm0, %xmm1 14180; SSE3-NEXT: packuswb %xmm2, %xmm1 14181; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] 14182; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 14183; SSE3-NEXT: retq 14184; 14185; SSSE3-LABEL: ult_24_v4i32: 14186; SSSE3: # %bb.0: 14187; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14188; SSSE3-NEXT: movdqa %xmm0, %xmm2 14189; SSSE3-NEXT: pand %xmm1, %xmm2 14190; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14191; SSSE3-NEXT: movdqa %xmm3, %xmm4 14192; SSSE3-NEXT: pshufb %xmm2, %xmm4 14193; SSSE3-NEXT: psrlw $4, %xmm0 14194; SSSE3-NEXT: pand %xmm1, %xmm0 14195; SSSE3-NEXT: pshufb %xmm0, %xmm3 14196; SSSE3-NEXT: paddb %xmm4, %xmm3 14197; SSSE3-NEXT: pxor %xmm0, %xmm0 14198; SSSE3-NEXT: movdqa %xmm3, %xmm1 14199; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 14200; SSSE3-NEXT: psadbw %xmm0, %xmm1 14201; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 14202; SSSE3-NEXT: psadbw %xmm0, %xmm3 14203; SSSE3-NEXT: packuswb %xmm1, %xmm3 14204; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] 14205; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 14206; SSSE3-NEXT: retq 14207; 14208; SSE41-LABEL: ult_24_v4i32: 14209; SSE41: # %bb.0: 14210; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14211; SSE41-NEXT: movdqa %xmm0, %xmm2 14212; SSE41-NEXT: pand %xmm1, %xmm2 14213; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14214; SSE41-NEXT: movdqa %xmm3, %xmm4 14215; SSE41-NEXT: pshufb %xmm2, %xmm4 14216; SSE41-NEXT: psrlw $4, %xmm0 14217; SSE41-NEXT: pand %xmm1, %xmm0 14218; SSE41-NEXT: pshufb %xmm0, %xmm3 14219; SSE41-NEXT: paddb %xmm4, %xmm3 14220; SSE41-NEXT: pxor %xmm0, %xmm0 14221; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 14222; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 14223; SSE41-NEXT: psadbw %xmm0, %xmm3 14224; SSE41-NEXT: psadbw %xmm0, %xmm1 14225; SSE41-NEXT: packuswb %xmm3, %xmm1 14226; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] 14227; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 14228; SSE41-NEXT: retq 14229; 14230; AVX1-LABEL: ult_24_v4i32: 14231; AVX1: # %bb.0: 14232; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14233; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 14234; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14235; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14236; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 14237; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 14238; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14239; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14240; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 14241; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14242; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14243; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14244; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14245; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14246; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24] 14247; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14248; AVX1-NEXT: retq 14249; 14250; AVX2-LABEL: ult_24_v4i32: 14251; AVX2: # %bb.0: 14252; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14253; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 14254; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14255; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14256; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 14257; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 14258; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14259; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14260; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 14261; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14262; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14263; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14264; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14265; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14266; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] 14267; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14268; AVX2-NEXT: retq 14269; 14270; AVX512VPOPCNTDQ-LABEL: ult_24_v4i32: 14271; AVX512VPOPCNTDQ: # %bb.0: 14272; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14273; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 14274; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] 14275; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14276; AVX512VPOPCNTDQ-NEXT: vzeroupper 14277; AVX512VPOPCNTDQ-NEXT: retq 14278; 14279; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i32: 14280; AVX512VPOPCNTDQVL: # %bb.0: 14281; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 14282; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 14283; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14284; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14285; AVX512VPOPCNTDQVL-NEXT: retq 14286; 14287; BITALG_NOVLX-LABEL: ult_24_v4i32: 14288; BITALG_NOVLX: # %bb.0: 14289; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14290; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 14291; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 14292; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14293; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14294; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14295; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14296; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14297; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] 14298; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14299; BITALG_NOVLX-NEXT: vzeroupper 14300; BITALG_NOVLX-NEXT: retq 14301; 14302; BITALG-LABEL: ult_24_v4i32: 14303; BITALG: # %bb.0: 14304; BITALG-NEXT: vpopcntb %xmm0, %xmm0 14305; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 14306; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14307; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14308; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14309; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14310; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14311; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 14312; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14313; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14314; BITALG-NEXT: retq 14315 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 14316 %3 = icmp ult <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24> 14317 %4 = sext <4 x i1> %3 to <4 x i32> 14318 ret <4 x i32> %4 14319} 14320 14321define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { 14322; SSE2-LABEL: ugt_24_v4i32: 14323; SSE2: # %bb.0: 14324; SSE2-NEXT: movdqa %xmm0, %xmm1 14325; SSE2-NEXT: psrlw $1, %xmm1 14326; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14327; SSE2-NEXT: psubb %xmm1, %xmm0 14328; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14329; SSE2-NEXT: movdqa %xmm0, %xmm2 14330; SSE2-NEXT: pand %xmm1, %xmm2 14331; SSE2-NEXT: psrlw $2, %xmm0 14332; SSE2-NEXT: pand %xmm1, %xmm0 14333; SSE2-NEXT: paddb %xmm2, %xmm0 14334; SSE2-NEXT: movdqa %xmm0, %xmm1 14335; SSE2-NEXT: psrlw $4, %xmm1 14336; SSE2-NEXT: paddb %xmm0, %xmm1 14337; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14338; SSE2-NEXT: pxor %xmm0, %xmm0 14339; SSE2-NEXT: movdqa %xmm1, %xmm2 14340; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14341; SSE2-NEXT: psadbw %xmm0, %xmm2 14342; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14343; SSE2-NEXT: psadbw %xmm0, %xmm1 14344; SSE2-NEXT: packuswb %xmm2, %xmm1 14345; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 14346; SSE2-NEXT: movdqa %xmm1, %xmm0 14347; SSE2-NEXT: retq 14348; 14349; SSE3-LABEL: ugt_24_v4i32: 14350; SSE3: # %bb.0: 14351; SSE3-NEXT: movdqa %xmm0, %xmm1 14352; SSE3-NEXT: psrlw $1, %xmm1 14353; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14354; SSE3-NEXT: psubb %xmm1, %xmm0 14355; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14356; SSE3-NEXT: movdqa %xmm0, %xmm2 14357; SSE3-NEXT: pand %xmm1, %xmm2 14358; SSE3-NEXT: psrlw $2, %xmm0 14359; SSE3-NEXT: pand %xmm1, %xmm0 14360; SSE3-NEXT: paddb %xmm2, %xmm0 14361; SSE3-NEXT: movdqa %xmm0, %xmm1 14362; SSE3-NEXT: psrlw $4, %xmm1 14363; SSE3-NEXT: paddb %xmm0, %xmm1 14364; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14365; SSE3-NEXT: pxor %xmm0, %xmm0 14366; SSE3-NEXT: movdqa %xmm1, %xmm2 14367; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14368; SSE3-NEXT: psadbw %xmm0, %xmm2 14369; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14370; SSE3-NEXT: psadbw %xmm0, %xmm1 14371; SSE3-NEXT: packuswb %xmm2, %xmm1 14372; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 14373; SSE3-NEXT: movdqa %xmm1, %xmm0 14374; SSE3-NEXT: retq 14375; 14376; SSSE3-LABEL: ugt_24_v4i32: 14377; SSSE3: # %bb.0: 14378; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14379; SSSE3-NEXT: movdqa %xmm0, %xmm3 14380; SSSE3-NEXT: pand %xmm2, %xmm3 14381; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14382; SSSE3-NEXT: movdqa %xmm1, %xmm4 14383; SSSE3-NEXT: pshufb %xmm3, %xmm4 14384; SSSE3-NEXT: psrlw $4, %xmm0 14385; SSSE3-NEXT: pand %xmm2, %xmm0 14386; SSSE3-NEXT: pshufb %xmm0, %xmm1 14387; SSSE3-NEXT: paddb %xmm4, %xmm1 14388; SSSE3-NEXT: pxor %xmm0, %xmm0 14389; SSSE3-NEXT: movdqa %xmm1, %xmm2 14390; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14391; SSSE3-NEXT: psadbw %xmm0, %xmm2 14392; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14393; SSSE3-NEXT: psadbw %xmm0, %xmm1 14394; SSSE3-NEXT: packuswb %xmm2, %xmm1 14395; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 14396; SSSE3-NEXT: movdqa %xmm1, %xmm0 14397; SSSE3-NEXT: retq 14398; 14399; SSE41-LABEL: ugt_24_v4i32: 14400; SSE41: # %bb.0: 14401; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14402; SSE41-NEXT: movdqa %xmm0, %xmm2 14403; SSE41-NEXT: pand %xmm1, %xmm2 14404; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14405; SSE41-NEXT: movdqa %xmm3, %xmm4 14406; SSE41-NEXT: pshufb %xmm2, %xmm4 14407; SSE41-NEXT: psrlw $4, %xmm0 14408; SSE41-NEXT: pand %xmm1, %xmm0 14409; SSE41-NEXT: pshufb %xmm0, %xmm3 14410; SSE41-NEXT: paddb %xmm4, %xmm3 14411; SSE41-NEXT: pxor %xmm1, %xmm1 14412; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 14413; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 14414; SSE41-NEXT: psadbw %xmm1, %xmm3 14415; SSE41-NEXT: psadbw %xmm1, %xmm0 14416; SSE41-NEXT: packuswb %xmm3, %xmm0 14417; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 14418; SSE41-NEXT: retq 14419; 14420; AVX1-LABEL: ugt_24_v4i32: 14421; AVX1: # %bb.0: 14422; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14423; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 14424; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14425; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14426; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 14427; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 14428; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14429; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14430; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 14431; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14432; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14433; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14434; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14435; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14436; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 14437; AVX1-NEXT: retq 14438; 14439; AVX2-LABEL: ugt_24_v4i32: 14440; AVX2: # %bb.0: 14441; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14442; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 14443; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14444; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14445; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 14446; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 14447; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14448; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14449; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 14450; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14451; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14452; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14453; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14454; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14455; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] 14456; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14457; AVX2-NEXT: retq 14458; 14459; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i32: 14460; AVX512VPOPCNTDQ: # %bb.0: 14461; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14462; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 14463; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] 14464; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14465; AVX512VPOPCNTDQ-NEXT: vzeroupper 14466; AVX512VPOPCNTDQ-NEXT: retq 14467; 14468; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i32: 14469; AVX512VPOPCNTDQVL: # %bb.0: 14470; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 14471; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 14472; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14473; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14474; AVX512VPOPCNTDQVL-NEXT: retq 14475; 14476; BITALG_NOVLX-LABEL: ugt_24_v4i32: 14477; BITALG_NOVLX: # %bb.0: 14478; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14479; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 14480; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 14481; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14482; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14483; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14484; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14485; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14486; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] 14487; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14488; BITALG_NOVLX-NEXT: vzeroupper 14489; BITALG_NOVLX-NEXT: retq 14490; 14491; BITALG-LABEL: ugt_24_v4i32: 14492; BITALG: # %bb.0: 14493; BITALG-NEXT: vpopcntb %xmm0, %xmm0 14494; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 14495; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14496; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14497; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14498; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14499; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14500; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 14501; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14502; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14503; BITALG-NEXT: retq 14504 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 14505 %3 = icmp ugt <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24> 14506 %4 = sext <4 x i1> %3 to <4 x i32> 14507 ret <4 x i32> %4 14508} 14509 14510define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { 14511; SSE2-LABEL: ult_25_v4i32: 14512; SSE2: # %bb.0: 14513; SSE2-NEXT: movdqa %xmm0, %xmm1 14514; SSE2-NEXT: psrlw $1, %xmm1 14515; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14516; SSE2-NEXT: psubb %xmm1, %xmm0 14517; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14518; SSE2-NEXT: movdqa %xmm0, %xmm2 14519; SSE2-NEXT: pand %xmm1, %xmm2 14520; SSE2-NEXT: psrlw $2, %xmm0 14521; SSE2-NEXT: pand %xmm1, %xmm0 14522; SSE2-NEXT: paddb %xmm2, %xmm0 14523; SSE2-NEXT: movdqa %xmm0, %xmm1 14524; SSE2-NEXT: psrlw $4, %xmm1 14525; SSE2-NEXT: paddb %xmm0, %xmm1 14526; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14527; SSE2-NEXT: pxor %xmm0, %xmm0 14528; SSE2-NEXT: movdqa %xmm1, %xmm2 14529; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14530; SSE2-NEXT: psadbw %xmm0, %xmm2 14531; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14532; SSE2-NEXT: psadbw %xmm0, %xmm1 14533; SSE2-NEXT: packuswb %xmm2, %xmm1 14534; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] 14535; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 14536; SSE2-NEXT: retq 14537; 14538; SSE3-LABEL: ult_25_v4i32: 14539; SSE3: # %bb.0: 14540; SSE3-NEXT: movdqa %xmm0, %xmm1 14541; SSE3-NEXT: psrlw $1, %xmm1 14542; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14543; SSE3-NEXT: psubb %xmm1, %xmm0 14544; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14545; SSE3-NEXT: movdqa %xmm0, %xmm2 14546; SSE3-NEXT: pand %xmm1, %xmm2 14547; SSE3-NEXT: psrlw $2, %xmm0 14548; SSE3-NEXT: pand %xmm1, %xmm0 14549; SSE3-NEXT: paddb %xmm2, %xmm0 14550; SSE3-NEXT: movdqa %xmm0, %xmm1 14551; SSE3-NEXT: psrlw $4, %xmm1 14552; SSE3-NEXT: paddb %xmm0, %xmm1 14553; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14554; SSE3-NEXT: pxor %xmm0, %xmm0 14555; SSE3-NEXT: movdqa %xmm1, %xmm2 14556; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14557; SSE3-NEXT: psadbw %xmm0, %xmm2 14558; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14559; SSE3-NEXT: psadbw %xmm0, %xmm1 14560; SSE3-NEXT: packuswb %xmm2, %xmm1 14561; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] 14562; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 14563; SSE3-NEXT: retq 14564; 14565; SSSE3-LABEL: ult_25_v4i32: 14566; SSSE3: # %bb.0: 14567; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14568; SSSE3-NEXT: movdqa %xmm0, %xmm2 14569; SSSE3-NEXT: pand %xmm1, %xmm2 14570; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14571; SSSE3-NEXT: movdqa %xmm3, %xmm4 14572; SSSE3-NEXT: pshufb %xmm2, %xmm4 14573; SSSE3-NEXT: psrlw $4, %xmm0 14574; SSSE3-NEXT: pand %xmm1, %xmm0 14575; SSSE3-NEXT: pshufb %xmm0, %xmm3 14576; SSSE3-NEXT: paddb %xmm4, %xmm3 14577; SSSE3-NEXT: pxor %xmm0, %xmm0 14578; SSSE3-NEXT: movdqa %xmm3, %xmm1 14579; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 14580; SSSE3-NEXT: psadbw %xmm0, %xmm1 14581; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 14582; SSSE3-NEXT: psadbw %xmm0, %xmm3 14583; SSSE3-NEXT: packuswb %xmm1, %xmm3 14584; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] 14585; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 14586; SSSE3-NEXT: retq 14587; 14588; SSE41-LABEL: ult_25_v4i32: 14589; SSE41: # %bb.0: 14590; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14591; SSE41-NEXT: movdqa %xmm0, %xmm2 14592; SSE41-NEXT: pand %xmm1, %xmm2 14593; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14594; SSE41-NEXT: movdqa %xmm3, %xmm4 14595; SSE41-NEXT: pshufb %xmm2, %xmm4 14596; SSE41-NEXT: psrlw $4, %xmm0 14597; SSE41-NEXT: pand %xmm1, %xmm0 14598; SSE41-NEXT: pshufb %xmm0, %xmm3 14599; SSE41-NEXT: paddb %xmm4, %xmm3 14600; SSE41-NEXT: pxor %xmm0, %xmm0 14601; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 14602; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 14603; SSE41-NEXT: psadbw %xmm0, %xmm3 14604; SSE41-NEXT: psadbw %xmm0, %xmm1 14605; SSE41-NEXT: packuswb %xmm3, %xmm1 14606; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] 14607; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 14608; SSE41-NEXT: retq 14609; 14610; AVX1-LABEL: ult_25_v4i32: 14611; AVX1: # %bb.0: 14612; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14613; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 14614; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14615; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14616; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 14617; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 14618; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14619; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14620; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 14621; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14622; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14623; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14624; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14625; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14626; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25] 14627; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14628; AVX1-NEXT: retq 14629; 14630; AVX2-LABEL: ult_25_v4i32: 14631; AVX2: # %bb.0: 14632; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14633; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 14634; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14635; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14636; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 14637; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 14638; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14639; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14640; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 14641; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14642; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14643; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14644; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14645; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14646; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] 14647; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14648; AVX2-NEXT: retq 14649; 14650; AVX512VPOPCNTDQ-LABEL: ult_25_v4i32: 14651; AVX512VPOPCNTDQ: # %bb.0: 14652; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14653; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 14654; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] 14655; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14656; AVX512VPOPCNTDQ-NEXT: vzeroupper 14657; AVX512VPOPCNTDQ-NEXT: retq 14658; 14659; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i32: 14660; AVX512VPOPCNTDQVL: # %bb.0: 14661; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 14662; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 14663; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14664; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14665; AVX512VPOPCNTDQVL-NEXT: retq 14666; 14667; BITALG_NOVLX-LABEL: ult_25_v4i32: 14668; BITALG_NOVLX: # %bb.0: 14669; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14670; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 14671; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 14672; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14673; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14674; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14675; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14676; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14677; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] 14678; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 14679; BITALG_NOVLX-NEXT: vzeroupper 14680; BITALG_NOVLX-NEXT: retq 14681; 14682; BITALG-LABEL: ult_25_v4i32: 14683; BITALG: # %bb.0: 14684; BITALG-NEXT: vpopcntb %xmm0, %xmm0 14685; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 14686; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14687; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14688; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14689; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14690; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14691; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 14692; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14693; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14694; BITALG-NEXT: retq 14695 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 14696 %3 = icmp ult <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25> 14697 %4 = sext <4 x i1> %3 to <4 x i32> 14698 ret <4 x i32> %4 14699} 14700 14701define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) { 14702; SSE2-LABEL: ugt_25_v4i32: 14703; SSE2: # %bb.0: 14704; SSE2-NEXT: movdqa %xmm0, %xmm1 14705; SSE2-NEXT: psrlw $1, %xmm1 14706; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14707; SSE2-NEXT: psubb %xmm1, %xmm0 14708; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14709; SSE2-NEXT: movdqa %xmm0, %xmm2 14710; SSE2-NEXT: pand %xmm1, %xmm2 14711; SSE2-NEXT: psrlw $2, %xmm0 14712; SSE2-NEXT: pand %xmm1, %xmm0 14713; SSE2-NEXT: paddb %xmm2, %xmm0 14714; SSE2-NEXT: movdqa %xmm0, %xmm1 14715; SSE2-NEXT: psrlw $4, %xmm1 14716; SSE2-NEXT: paddb %xmm0, %xmm1 14717; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14718; SSE2-NEXT: pxor %xmm0, %xmm0 14719; SSE2-NEXT: movdqa %xmm1, %xmm2 14720; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14721; SSE2-NEXT: psadbw %xmm0, %xmm2 14722; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14723; SSE2-NEXT: psadbw %xmm0, %xmm1 14724; SSE2-NEXT: packuswb %xmm2, %xmm1 14725; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 14726; SSE2-NEXT: movdqa %xmm1, %xmm0 14727; SSE2-NEXT: retq 14728; 14729; SSE3-LABEL: ugt_25_v4i32: 14730; SSE3: # %bb.0: 14731; SSE3-NEXT: movdqa %xmm0, %xmm1 14732; SSE3-NEXT: psrlw $1, %xmm1 14733; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14734; SSE3-NEXT: psubb %xmm1, %xmm0 14735; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14736; SSE3-NEXT: movdqa %xmm0, %xmm2 14737; SSE3-NEXT: pand %xmm1, %xmm2 14738; SSE3-NEXT: psrlw $2, %xmm0 14739; SSE3-NEXT: pand %xmm1, %xmm0 14740; SSE3-NEXT: paddb %xmm2, %xmm0 14741; SSE3-NEXT: movdqa %xmm0, %xmm1 14742; SSE3-NEXT: psrlw $4, %xmm1 14743; SSE3-NEXT: paddb %xmm0, %xmm1 14744; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14745; SSE3-NEXT: pxor %xmm0, %xmm0 14746; SSE3-NEXT: movdqa %xmm1, %xmm2 14747; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14748; SSE3-NEXT: psadbw %xmm0, %xmm2 14749; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14750; SSE3-NEXT: psadbw %xmm0, %xmm1 14751; SSE3-NEXT: packuswb %xmm2, %xmm1 14752; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 14753; SSE3-NEXT: movdqa %xmm1, %xmm0 14754; SSE3-NEXT: retq 14755; 14756; SSSE3-LABEL: ugt_25_v4i32: 14757; SSSE3: # %bb.0: 14758; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14759; SSSE3-NEXT: movdqa %xmm0, %xmm3 14760; SSSE3-NEXT: pand %xmm2, %xmm3 14761; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14762; SSSE3-NEXT: movdqa %xmm1, %xmm4 14763; SSSE3-NEXT: pshufb %xmm3, %xmm4 14764; SSSE3-NEXT: psrlw $4, %xmm0 14765; SSSE3-NEXT: pand %xmm2, %xmm0 14766; SSSE3-NEXT: pshufb %xmm0, %xmm1 14767; SSSE3-NEXT: paddb %xmm4, %xmm1 14768; SSSE3-NEXT: pxor %xmm0, %xmm0 14769; SSSE3-NEXT: movdqa %xmm1, %xmm2 14770; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14771; SSSE3-NEXT: psadbw %xmm0, %xmm2 14772; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14773; SSSE3-NEXT: psadbw %xmm0, %xmm1 14774; SSSE3-NEXT: packuswb %xmm2, %xmm1 14775; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 14776; SSSE3-NEXT: movdqa %xmm1, %xmm0 14777; SSSE3-NEXT: retq 14778; 14779; SSE41-LABEL: ugt_25_v4i32: 14780; SSE41: # %bb.0: 14781; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14782; SSE41-NEXT: movdqa %xmm0, %xmm2 14783; SSE41-NEXT: pand %xmm1, %xmm2 14784; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14785; SSE41-NEXT: movdqa %xmm3, %xmm4 14786; SSE41-NEXT: pshufb %xmm2, %xmm4 14787; SSE41-NEXT: psrlw $4, %xmm0 14788; SSE41-NEXT: pand %xmm1, %xmm0 14789; SSE41-NEXT: pshufb %xmm0, %xmm3 14790; SSE41-NEXT: paddb %xmm4, %xmm3 14791; SSE41-NEXT: pxor %xmm1, %xmm1 14792; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 14793; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 14794; SSE41-NEXT: psadbw %xmm1, %xmm3 14795; SSE41-NEXT: psadbw %xmm1, %xmm0 14796; SSE41-NEXT: packuswb %xmm3, %xmm0 14797; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 14798; SSE41-NEXT: retq 14799; 14800; AVX1-LABEL: ugt_25_v4i32: 14801; AVX1: # %bb.0: 14802; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14803; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 14804; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14805; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14806; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 14807; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 14808; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14809; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14810; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 14811; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14812; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14813; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14814; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14815; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14816; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 14817; AVX1-NEXT: retq 14818; 14819; AVX2-LABEL: ugt_25_v4i32: 14820; AVX2: # %bb.0: 14821; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14822; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 14823; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14824; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14825; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 14826; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 14827; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14828; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 14829; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 14830; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14831; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14832; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14833; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14834; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14835; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] 14836; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14837; AVX2-NEXT: retq 14838; 14839; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i32: 14840; AVX512VPOPCNTDQ: # %bb.0: 14841; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14842; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 14843; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] 14844; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14845; AVX512VPOPCNTDQ-NEXT: vzeroupper 14846; AVX512VPOPCNTDQ-NEXT: retq 14847; 14848; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i32: 14849; AVX512VPOPCNTDQVL: # %bb.0: 14850; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 14851; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 14852; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14853; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14854; AVX512VPOPCNTDQVL-NEXT: retq 14855; 14856; BITALG_NOVLX-LABEL: ugt_25_v4i32: 14857; BITALG_NOVLX: # %bb.0: 14858; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 14859; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 14860; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 14861; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14862; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14863; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14864; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14865; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14866; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] 14867; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 14868; BITALG_NOVLX-NEXT: vzeroupper 14869; BITALG_NOVLX-NEXT: retq 14870; 14871; BITALG-LABEL: ugt_25_v4i32: 14872; BITALG: # %bb.0: 14873; BITALG-NEXT: vpopcntb %xmm0, %xmm0 14874; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 14875; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 14876; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 14877; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 14878; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 14879; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 14880; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 14881; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 14882; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 14883; BITALG-NEXT: retq 14884 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 14885 %3 = icmp ugt <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25> 14886 %4 = sext <4 x i1> %3 to <4 x i32> 14887 ret <4 x i32> %4 14888} 14889 14890define <4 x i32> @ult_26_v4i32(<4 x i32> %0) { 14891; SSE2-LABEL: ult_26_v4i32: 14892; SSE2: # %bb.0: 14893; SSE2-NEXT: movdqa %xmm0, %xmm1 14894; SSE2-NEXT: psrlw $1, %xmm1 14895; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14896; SSE2-NEXT: psubb %xmm1, %xmm0 14897; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14898; SSE2-NEXT: movdqa %xmm0, %xmm2 14899; SSE2-NEXT: pand %xmm1, %xmm2 14900; SSE2-NEXT: psrlw $2, %xmm0 14901; SSE2-NEXT: pand %xmm1, %xmm0 14902; SSE2-NEXT: paddb %xmm2, %xmm0 14903; SSE2-NEXT: movdqa %xmm0, %xmm1 14904; SSE2-NEXT: psrlw $4, %xmm1 14905; SSE2-NEXT: paddb %xmm0, %xmm1 14906; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 14907; SSE2-NEXT: pxor %xmm0, %xmm0 14908; SSE2-NEXT: movdqa %xmm1, %xmm2 14909; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14910; SSE2-NEXT: psadbw %xmm0, %xmm2 14911; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14912; SSE2-NEXT: psadbw %xmm0, %xmm1 14913; SSE2-NEXT: packuswb %xmm2, %xmm1 14914; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] 14915; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 14916; SSE2-NEXT: retq 14917; 14918; SSE3-LABEL: ult_26_v4i32: 14919; SSE3: # %bb.0: 14920; SSE3-NEXT: movdqa %xmm0, %xmm1 14921; SSE3-NEXT: psrlw $1, %xmm1 14922; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14923; SSE3-NEXT: psubb %xmm1, %xmm0 14924; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 14925; SSE3-NEXT: movdqa %xmm0, %xmm2 14926; SSE3-NEXT: pand %xmm1, %xmm2 14927; SSE3-NEXT: psrlw $2, %xmm0 14928; SSE3-NEXT: pand %xmm1, %xmm0 14929; SSE3-NEXT: paddb %xmm2, %xmm0 14930; SSE3-NEXT: movdqa %xmm0, %xmm1 14931; SSE3-NEXT: psrlw $4, %xmm1 14932; SSE3-NEXT: paddb %xmm0, %xmm1 14933; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 14934; SSE3-NEXT: pxor %xmm0, %xmm0 14935; SSE3-NEXT: movdqa %xmm1, %xmm2 14936; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 14937; SSE3-NEXT: psadbw %xmm0, %xmm2 14938; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 14939; SSE3-NEXT: psadbw %xmm0, %xmm1 14940; SSE3-NEXT: packuswb %xmm2, %xmm1 14941; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] 14942; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 14943; SSE3-NEXT: retq 14944; 14945; SSSE3-LABEL: ult_26_v4i32: 14946; SSSE3: # %bb.0: 14947; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14948; SSSE3-NEXT: movdqa %xmm0, %xmm2 14949; SSSE3-NEXT: pand %xmm1, %xmm2 14950; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14951; SSSE3-NEXT: movdqa %xmm3, %xmm4 14952; SSSE3-NEXT: pshufb %xmm2, %xmm4 14953; SSSE3-NEXT: psrlw $4, %xmm0 14954; SSSE3-NEXT: pand %xmm1, %xmm0 14955; SSSE3-NEXT: pshufb %xmm0, %xmm3 14956; SSSE3-NEXT: paddb %xmm4, %xmm3 14957; SSSE3-NEXT: pxor %xmm0, %xmm0 14958; SSSE3-NEXT: movdqa %xmm3, %xmm1 14959; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 14960; SSSE3-NEXT: psadbw %xmm0, %xmm1 14961; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 14962; SSSE3-NEXT: psadbw %xmm0, %xmm3 14963; SSSE3-NEXT: packuswb %xmm1, %xmm3 14964; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] 14965; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 14966; SSSE3-NEXT: retq 14967; 14968; SSE41-LABEL: ult_26_v4i32: 14969; SSE41: # %bb.0: 14970; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14971; SSE41-NEXT: movdqa %xmm0, %xmm2 14972; SSE41-NEXT: pand %xmm1, %xmm2 14973; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14974; SSE41-NEXT: movdqa %xmm3, %xmm4 14975; SSE41-NEXT: pshufb %xmm2, %xmm4 14976; SSE41-NEXT: psrlw $4, %xmm0 14977; SSE41-NEXT: pand %xmm1, %xmm0 14978; SSE41-NEXT: pshufb %xmm0, %xmm3 14979; SSE41-NEXT: paddb %xmm4, %xmm3 14980; SSE41-NEXT: pxor %xmm0, %xmm0 14981; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 14982; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 14983; SSE41-NEXT: psadbw %xmm0, %xmm3 14984; SSE41-NEXT: psadbw %xmm0, %xmm1 14985; SSE41-NEXT: packuswb %xmm3, %xmm1 14986; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] 14987; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 14988; SSE41-NEXT: retq 14989; 14990; AVX1-LABEL: ult_26_v4i32: 14991; AVX1: # %bb.0: 14992; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14993; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 14994; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 14995; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 14996; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 14997; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 14998; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 14999; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15000; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 15001; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15002; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15003; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15004; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15005; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15006; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26] 15007; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15008; AVX1-NEXT: retq 15009; 15010; AVX2-LABEL: ult_26_v4i32: 15011; AVX2: # %bb.0: 15012; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15013; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 15014; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15015; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15016; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 15017; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 15018; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15019; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15020; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 15021; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15022; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15023; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15024; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15025; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15026; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] 15027; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15028; AVX2-NEXT: retq 15029; 15030; AVX512VPOPCNTDQ-LABEL: ult_26_v4i32: 15031; AVX512VPOPCNTDQ: # %bb.0: 15032; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15033; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 15034; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] 15035; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15036; AVX512VPOPCNTDQ-NEXT: vzeroupper 15037; AVX512VPOPCNTDQ-NEXT: retq 15038; 15039; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i32: 15040; AVX512VPOPCNTDQVL: # %bb.0: 15041; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 15042; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 15043; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15044; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15045; AVX512VPOPCNTDQVL-NEXT: retq 15046; 15047; BITALG_NOVLX-LABEL: ult_26_v4i32: 15048; BITALG_NOVLX: # %bb.0: 15049; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15050; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 15051; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 15052; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15053; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15054; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15055; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15056; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15057; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] 15058; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15059; BITALG_NOVLX-NEXT: vzeroupper 15060; BITALG_NOVLX-NEXT: retq 15061; 15062; BITALG-LABEL: ult_26_v4i32: 15063; BITALG: # %bb.0: 15064; BITALG-NEXT: vpopcntb %xmm0, %xmm0 15065; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 15066; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15067; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15068; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15069; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15070; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15071; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 15072; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15073; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15074; BITALG-NEXT: retq 15075 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 15076 %3 = icmp ult <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26> 15077 %4 = sext <4 x i1> %3 to <4 x i32> 15078 ret <4 x i32> %4 15079} 15080 15081define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) { 15082; SSE2-LABEL: ugt_26_v4i32: 15083; SSE2: # %bb.0: 15084; SSE2-NEXT: movdqa %xmm0, %xmm1 15085; SSE2-NEXT: psrlw $1, %xmm1 15086; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15087; SSE2-NEXT: psubb %xmm1, %xmm0 15088; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15089; SSE2-NEXT: movdqa %xmm0, %xmm2 15090; SSE2-NEXT: pand %xmm1, %xmm2 15091; SSE2-NEXT: psrlw $2, %xmm0 15092; SSE2-NEXT: pand %xmm1, %xmm0 15093; SSE2-NEXT: paddb %xmm2, %xmm0 15094; SSE2-NEXT: movdqa %xmm0, %xmm1 15095; SSE2-NEXT: psrlw $4, %xmm1 15096; SSE2-NEXT: paddb %xmm0, %xmm1 15097; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15098; SSE2-NEXT: pxor %xmm0, %xmm0 15099; SSE2-NEXT: movdqa %xmm1, %xmm2 15100; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15101; SSE2-NEXT: psadbw %xmm0, %xmm2 15102; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15103; SSE2-NEXT: psadbw %xmm0, %xmm1 15104; SSE2-NEXT: packuswb %xmm2, %xmm1 15105; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15106; SSE2-NEXT: movdqa %xmm1, %xmm0 15107; SSE2-NEXT: retq 15108; 15109; SSE3-LABEL: ugt_26_v4i32: 15110; SSE3: # %bb.0: 15111; SSE3-NEXT: movdqa %xmm0, %xmm1 15112; SSE3-NEXT: psrlw $1, %xmm1 15113; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15114; SSE3-NEXT: psubb %xmm1, %xmm0 15115; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15116; SSE3-NEXT: movdqa %xmm0, %xmm2 15117; SSE3-NEXT: pand %xmm1, %xmm2 15118; SSE3-NEXT: psrlw $2, %xmm0 15119; SSE3-NEXT: pand %xmm1, %xmm0 15120; SSE3-NEXT: paddb %xmm2, %xmm0 15121; SSE3-NEXT: movdqa %xmm0, %xmm1 15122; SSE3-NEXT: psrlw $4, %xmm1 15123; SSE3-NEXT: paddb %xmm0, %xmm1 15124; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15125; SSE3-NEXT: pxor %xmm0, %xmm0 15126; SSE3-NEXT: movdqa %xmm1, %xmm2 15127; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15128; SSE3-NEXT: psadbw %xmm0, %xmm2 15129; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15130; SSE3-NEXT: psadbw %xmm0, %xmm1 15131; SSE3-NEXT: packuswb %xmm2, %xmm1 15132; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15133; SSE3-NEXT: movdqa %xmm1, %xmm0 15134; SSE3-NEXT: retq 15135; 15136; SSSE3-LABEL: ugt_26_v4i32: 15137; SSSE3: # %bb.0: 15138; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15139; SSSE3-NEXT: movdqa %xmm0, %xmm3 15140; SSSE3-NEXT: pand %xmm2, %xmm3 15141; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15142; SSSE3-NEXT: movdqa %xmm1, %xmm4 15143; SSSE3-NEXT: pshufb %xmm3, %xmm4 15144; SSSE3-NEXT: psrlw $4, %xmm0 15145; SSSE3-NEXT: pand %xmm2, %xmm0 15146; SSSE3-NEXT: pshufb %xmm0, %xmm1 15147; SSSE3-NEXT: paddb %xmm4, %xmm1 15148; SSSE3-NEXT: pxor %xmm0, %xmm0 15149; SSSE3-NEXT: movdqa %xmm1, %xmm2 15150; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15151; SSSE3-NEXT: psadbw %xmm0, %xmm2 15152; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15153; SSSE3-NEXT: psadbw %xmm0, %xmm1 15154; SSSE3-NEXT: packuswb %xmm2, %xmm1 15155; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15156; SSSE3-NEXT: movdqa %xmm1, %xmm0 15157; SSSE3-NEXT: retq 15158; 15159; SSE41-LABEL: ugt_26_v4i32: 15160; SSE41: # %bb.0: 15161; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15162; SSE41-NEXT: movdqa %xmm0, %xmm2 15163; SSE41-NEXT: pand %xmm1, %xmm2 15164; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15165; SSE41-NEXT: movdqa %xmm3, %xmm4 15166; SSE41-NEXT: pshufb %xmm2, %xmm4 15167; SSE41-NEXT: psrlw $4, %xmm0 15168; SSE41-NEXT: pand %xmm1, %xmm0 15169; SSE41-NEXT: pshufb %xmm0, %xmm3 15170; SSE41-NEXT: paddb %xmm4, %xmm3 15171; SSE41-NEXT: pxor %xmm1, %xmm1 15172; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 15173; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 15174; SSE41-NEXT: psadbw %xmm1, %xmm3 15175; SSE41-NEXT: psadbw %xmm1, %xmm0 15176; SSE41-NEXT: packuswb %xmm3, %xmm0 15177; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 15178; SSE41-NEXT: retq 15179; 15180; AVX1-LABEL: ugt_26_v4i32: 15181; AVX1: # %bb.0: 15182; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15183; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 15184; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15185; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15186; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 15187; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 15188; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15189; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15190; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 15191; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15192; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15193; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15194; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15195; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15196; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 15197; AVX1-NEXT: retq 15198; 15199; AVX2-LABEL: ugt_26_v4i32: 15200; AVX2: # %bb.0: 15201; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15202; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 15203; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15204; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15205; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 15206; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 15207; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15208; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15209; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 15210; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15211; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15212; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15213; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15214; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15215; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] 15216; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15217; AVX2-NEXT: retq 15218; 15219; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i32: 15220; AVX512VPOPCNTDQ: # %bb.0: 15221; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15222; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 15223; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] 15224; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15225; AVX512VPOPCNTDQ-NEXT: vzeroupper 15226; AVX512VPOPCNTDQ-NEXT: retq 15227; 15228; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i32: 15229; AVX512VPOPCNTDQVL: # %bb.0: 15230; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 15231; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 15232; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15233; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15234; AVX512VPOPCNTDQVL-NEXT: retq 15235; 15236; BITALG_NOVLX-LABEL: ugt_26_v4i32: 15237; BITALG_NOVLX: # %bb.0: 15238; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15239; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 15240; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 15241; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15242; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15243; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15244; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15245; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15246; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] 15247; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15248; BITALG_NOVLX-NEXT: vzeroupper 15249; BITALG_NOVLX-NEXT: retq 15250; 15251; BITALG-LABEL: ugt_26_v4i32: 15252; BITALG: # %bb.0: 15253; BITALG-NEXT: vpopcntb %xmm0, %xmm0 15254; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 15255; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15256; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15257; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15258; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15259; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15260; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 15261; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15262; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15263; BITALG-NEXT: retq 15264 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 15265 %3 = icmp ugt <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26> 15266 %4 = sext <4 x i1> %3 to <4 x i32> 15267 ret <4 x i32> %4 15268} 15269 15270define <4 x i32> @ult_27_v4i32(<4 x i32> %0) { 15271; SSE2-LABEL: ult_27_v4i32: 15272; SSE2: # %bb.0: 15273; SSE2-NEXT: movdqa %xmm0, %xmm1 15274; SSE2-NEXT: psrlw $1, %xmm1 15275; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15276; SSE2-NEXT: psubb %xmm1, %xmm0 15277; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15278; SSE2-NEXT: movdqa %xmm0, %xmm2 15279; SSE2-NEXT: pand %xmm1, %xmm2 15280; SSE2-NEXT: psrlw $2, %xmm0 15281; SSE2-NEXT: pand %xmm1, %xmm0 15282; SSE2-NEXT: paddb %xmm2, %xmm0 15283; SSE2-NEXT: movdqa %xmm0, %xmm1 15284; SSE2-NEXT: psrlw $4, %xmm1 15285; SSE2-NEXT: paddb %xmm0, %xmm1 15286; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15287; SSE2-NEXT: pxor %xmm0, %xmm0 15288; SSE2-NEXT: movdqa %xmm1, %xmm2 15289; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15290; SSE2-NEXT: psadbw %xmm0, %xmm2 15291; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15292; SSE2-NEXT: psadbw %xmm0, %xmm1 15293; SSE2-NEXT: packuswb %xmm2, %xmm1 15294; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] 15295; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 15296; SSE2-NEXT: retq 15297; 15298; SSE3-LABEL: ult_27_v4i32: 15299; SSE3: # %bb.0: 15300; SSE3-NEXT: movdqa %xmm0, %xmm1 15301; SSE3-NEXT: psrlw $1, %xmm1 15302; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15303; SSE3-NEXT: psubb %xmm1, %xmm0 15304; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15305; SSE3-NEXT: movdqa %xmm0, %xmm2 15306; SSE3-NEXT: pand %xmm1, %xmm2 15307; SSE3-NEXT: psrlw $2, %xmm0 15308; SSE3-NEXT: pand %xmm1, %xmm0 15309; SSE3-NEXT: paddb %xmm2, %xmm0 15310; SSE3-NEXT: movdqa %xmm0, %xmm1 15311; SSE3-NEXT: psrlw $4, %xmm1 15312; SSE3-NEXT: paddb %xmm0, %xmm1 15313; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15314; SSE3-NEXT: pxor %xmm0, %xmm0 15315; SSE3-NEXT: movdqa %xmm1, %xmm2 15316; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15317; SSE3-NEXT: psadbw %xmm0, %xmm2 15318; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15319; SSE3-NEXT: psadbw %xmm0, %xmm1 15320; SSE3-NEXT: packuswb %xmm2, %xmm1 15321; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] 15322; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 15323; SSE3-NEXT: retq 15324; 15325; SSSE3-LABEL: ult_27_v4i32: 15326; SSSE3: # %bb.0: 15327; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15328; SSSE3-NEXT: movdqa %xmm0, %xmm2 15329; SSSE3-NEXT: pand %xmm1, %xmm2 15330; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15331; SSSE3-NEXT: movdqa %xmm3, %xmm4 15332; SSSE3-NEXT: pshufb %xmm2, %xmm4 15333; SSSE3-NEXT: psrlw $4, %xmm0 15334; SSSE3-NEXT: pand %xmm1, %xmm0 15335; SSSE3-NEXT: pshufb %xmm0, %xmm3 15336; SSSE3-NEXT: paddb %xmm4, %xmm3 15337; SSSE3-NEXT: pxor %xmm0, %xmm0 15338; SSSE3-NEXT: movdqa %xmm3, %xmm1 15339; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 15340; SSSE3-NEXT: psadbw %xmm0, %xmm1 15341; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 15342; SSSE3-NEXT: psadbw %xmm0, %xmm3 15343; SSSE3-NEXT: packuswb %xmm1, %xmm3 15344; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] 15345; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 15346; SSSE3-NEXT: retq 15347; 15348; SSE41-LABEL: ult_27_v4i32: 15349; SSE41: # %bb.0: 15350; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15351; SSE41-NEXT: movdqa %xmm0, %xmm2 15352; SSE41-NEXT: pand %xmm1, %xmm2 15353; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15354; SSE41-NEXT: movdqa %xmm3, %xmm4 15355; SSE41-NEXT: pshufb %xmm2, %xmm4 15356; SSE41-NEXT: psrlw $4, %xmm0 15357; SSE41-NEXT: pand %xmm1, %xmm0 15358; SSE41-NEXT: pshufb %xmm0, %xmm3 15359; SSE41-NEXT: paddb %xmm4, %xmm3 15360; SSE41-NEXT: pxor %xmm0, %xmm0 15361; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 15362; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 15363; SSE41-NEXT: psadbw %xmm0, %xmm3 15364; SSE41-NEXT: psadbw %xmm0, %xmm1 15365; SSE41-NEXT: packuswb %xmm3, %xmm1 15366; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] 15367; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 15368; SSE41-NEXT: retq 15369; 15370; AVX1-LABEL: ult_27_v4i32: 15371; AVX1: # %bb.0: 15372; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15373; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 15374; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15375; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15376; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 15377; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 15378; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15379; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15380; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 15381; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15382; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15383; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15384; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15385; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15386; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27] 15387; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15388; AVX1-NEXT: retq 15389; 15390; AVX2-LABEL: ult_27_v4i32: 15391; AVX2: # %bb.0: 15392; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15393; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 15394; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15395; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15396; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 15397; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 15398; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15399; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15400; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 15401; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15402; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15403; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15404; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15405; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15406; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] 15407; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15408; AVX2-NEXT: retq 15409; 15410; AVX512VPOPCNTDQ-LABEL: ult_27_v4i32: 15411; AVX512VPOPCNTDQ: # %bb.0: 15412; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15413; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 15414; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] 15415; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15416; AVX512VPOPCNTDQ-NEXT: vzeroupper 15417; AVX512VPOPCNTDQ-NEXT: retq 15418; 15419; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i32: 15420; AVX512VPOPCNTDQVL: # %bb.0: 15421; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 15422; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 15423; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15424; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15425; AVX512VPOPCNTDQVL-NEXT: retq 15426; 15427; BITALG_NOVLX-LABEL: ult_27_v4i32: 15428; BITALG_NOVLX: # %bb.0: 15429; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15430; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 15431; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 15432; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15433; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15434; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15435; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15436; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15437; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] 15438; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15439; BITALG_NOVLX-NEXT: vzeroupper 15440; BITALG_NOVLX-NEXT: retq 15441; 15442; BITALG-LABEL: ult_27_v4i32: 15443; BITALG: # %bb.0: 15444; BITALG-NEXT: vpopcntb %xmm0, %xmm0 15445; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 15446; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15447; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15448; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15449; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15450; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15451; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 15452; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15453; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15454; BITALG-NEXT: retq 15455 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 15456 %3 = icmp ult <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27> 15457 %4 = sext <4 x i1> %3 to <4 x i32> 15458 ret <4 x i32> %4 15459} 15460 15461define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) { 15462; SSE2-LABEL: ugt_27_v4i32: 15463; SSE2: # %bb.0: 15464; SSE2-NEXT: movdqa %xmm0, %xmm1 15465; SSE2-NEXT: psrlw $1, %xmm1 15466; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15467; SSE2-NEXT: psubb %xmm1, %xmm0 15468; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15469; SSE2-NEXT: movdqa %xmm0, %xmm2 15470; SSE2-NEXT: pand %xmm1, %xmm2 15471; SSE2-NEXT: psrlw $2, %xmm0 15472; SSE2-NEXT: pand %xmm1, %xmm0 15473; SSE2-NEXT: paddb %xmm2, %xmm0 15474; SSE2-NEXT: movdqa %xmm0, %xmm1 15475; SSE2-NEXT: psrlw $4, %xmm1 15476; SSE2-NEXT: paddb %xmm0, %xmm1 15477; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15478; SSE2-NEXT: pxor %xmm0, %xmm0 15479; SSE2-NEXT: movdqa %xmm1, %xmm2 15480; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15481; SSE2-NEXT: psadbw %xmm0, %xmm2 15482; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15483; SSE2-NEXT: psadbw %xmm0, %xmm1 15484; SSE2-NEXT: packuswb %xmm2, %xmm1 15485; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15486; SSE2-NEXT: movdqa %xmm1, %xmm0 15487; SSE2-NEXT: retq 15488; 15489; SSE3-LABEL: ugt_27_v4i32: 15490; SSE3: # %bb.0: 15491; SSE3-NEXT: movdqa %xmm0, %xmm1 15492; SSE3-NEXT: psrlw $1, %xmm1 15493; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15494; SSE3-NEXT: psubb %xmm1, %xmm0 15495; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15496; SSE3-NEXT: movdqa %xmm0, %xmm2 15497; SSE3-NEXT: pand %xmm1, %xmm2 15498; SSE3-NEXT: psrlw $2, %xmm0 15499; SSE3-NEXT: pand %xmm1, %xmm0 15500; SSE3-NEXT: paddb %xmm2, %xmm0 15501; SSE3-NEXT: movdqa %xmm0, %xmm1 15502; SSE3-NEXT: psrlw $4, %xmm1 15503; SSE3-NEXT: paddb %xmm0, %xmm1 15504; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15505; SSE3-NEXT: pxor %xmm0, %xmm0 15506; SSE3-NEXT: movdqa %xmm1, %xmm2 15507; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15508; SSE3-NEXT: psadbw %xmm0, %xmm2 15509; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15510; SSE3-NEXT: psadbw %xmm0, %xmm1 15511; SSE3-NEXT: packuswb %xmm2, %xmm1 15512; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15513; SSE3-NEXT: movdqa %xmm1, %xmm0 15514; SSE3-NEXT: retq 15515; 15516; SSSE3-LABEL: ugt_27_v4i32: 15517; SSSE3: # %bb.0: 15518; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15519; SSSE3-NEXT: movdqa %xmm0, %xmm3 15520; SSSE3-NEXT: pand %xmm2, %xmm3 15521; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15522; SSSE3-NEXT: movdqa %xmm1, %xmm4 15523; SSSE3-NEXT: pshufb %xmm3, %xmm4 15524; SSSE3-NEXT: psrlw $4, %xmm0 15525; SSSE3-NEXT: pand %xmm2, %xmm0 15526; SSSE3-NEXT: pshufb %xmm0, %xmm1 15527; SSSE3-NEXT: paddb %xmm4, %xmm1 15528; SSSE3-NEXT: pxor %xmm0, %xmm0 15529; SSSE3-NEXT: movdqa %xmm1, %xmm2 15530; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15531; SSSE3-NEXT: psadbw %xmm0, %xmm2 15532; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15533; SSSE3-NEXT: psadbw %xmm0, %xmm1 15534; SSSE3-NEXT: packuswb %xmm2, %xmm1 15535; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15536; SSSE3-NEXT: movdqa %xmm1, %xmm0 15537; SSSE3-NEXT: retq 15538; 15539; SSE41-LABEL: ugt_27_v4i32: 15540; SSE41: # %bb.0: 15541; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15542; SSE41-NEXT: movdqa %xmm0, %xmm2 15543; SSE41-NEXT: pand %xmm1, %xmm2 15544; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15545; SSE41-NEXT: movdqa %xmm3, %xmm4 15546; SSE41-NEXT: pshufb %xmm2, %xmm4 15547; SSE41-NEXT: psrlw $4, %xmm0 15548; SSE41-NEXT: pand %xmm1, %xmm0 15549; SSE41-NEXT: pshufb %xmm0, %xmm3 15550; SSE41-NEXT: paddb %xmm4, %xmm3 15551; SSE41-NEXT: pxor %xmm1, %xmm1 15552; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 15553; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 15554; SSE41-NEXT: psadbw %xmm1, %xmm3 15555; SSE41-NEXT: psadbw %xmm1, %xmm0 15556; SSE41-NEXT: packuswb %xmm3, %xmm0 15557; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 15558; SSE41-NEXT: retq 15559; 15560; AVX1-LABEL: ugt_27_v4i32: 15561; AVX1: # %bb.0: 15562; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15563; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 15564; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15565; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15566; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 15567; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 15568; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15569; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15570; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 15571; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15572; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15573; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15574; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15575; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15576; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 15577; AVX1-NEXT: retq 15578; 15579; AVX2-LABEL: ugt_27_v4i32: 15580; AVX2: # %bb.0: 15581; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15582; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 15583; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15584; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15585; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 15586; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 15587; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15588; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15589; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 15590; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15591; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15592; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15593; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15594; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15595; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] 15596; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15597; AVX2-NEXT: retq 15598; 15599; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i32: 15600; AVX512VPOPCNTDQ: # %bb.0: 15601; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15602; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 15603; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] 15604; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15605; AVX512VPOPCNTDQ-NEXT: vzeroupper 15606; AVX512VPOPCNTDQ-NEXT: retq 15607; 15608; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i32: 15609; AVX512VPOPCNTDQVL: # %bb.0: 15610; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 15611; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 15612; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15613; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15614; AVX512VPOPCNTDQVL-NEXT: retq 15615; 15616; BITALG_NOVLX-LABEL: ugt_27_v4i32: 15617; BITALG_NOVLX: # %bb.0: 15618; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15619; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 15620; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 15621; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15622; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15623; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15624; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15625; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15626; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] 15627; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15628; BITALG_NOVLX-NEXT: vzeroupper 15629; BITALG_NOVLX-NEXT: retq 15630; 15631; BITALG-LABEL: ugt_27_v4i32: 15632; BITALG: # %bb.0: 15633; BITALG-NEXT: vpopcntb %xmm0, %xmm0 15634; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 15635; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15636; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15637; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15638; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15639; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15640; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 15641; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15642; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15643; BITALG-NEXT: retq 15644 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 15645 %3 = icmp ugt <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27> 15646 %4 = sext <4 x i1> %3 to <4 x i32> 15647 ret <4 x i32> %4 15648} 15649 15650define <4 x i32> @ult_28_v4i32(<4 x i32> %0) { 15651; SSE2-LABEL: ult_28_v4i32: 15652; SSE2: # %bb.0: 15653; SSE2-NEXT: movdqa %xmm0, %xmm1 15654; SSE2-NEXT: psrlw $1, %xmm1 15655; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15656; SSE2-NEXT: psubb %xmm1, %xmm0 15657; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15658; SSE2-NEXT: movdqa %xmm0, %xmm2 15659; SSE2-NEXT: pand %xmm1, %xmm2 15660; SSE2-NEXT: psrlw $2, %xmm0 15661; SSE2-NEXT: pand %xmm1, %xmm0 15662; SSE2-NEXT: paddb %xmm2, %xmm0 15663; SSE2-NEXT: movdqa %xmm0, %xmm1 15664; SSE2-NEXT: psrlw $4, %xmm1 15665; SSE2-NEXT: paddb %xmm0, %xmm1 15666; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15667; SSE2-NEXT: pxor %xmm0, %xmm0 15668; SSE2-NEXT: movdqa %xmm1, %xmm2 15669; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15670; SSE2-NEXT: psadbw %xmm0, %xmm2 15671; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15672; SSE2-NEXT: psadbw %xmm0, %xmm1 15673; SSE2-NEXT: packuswb %xmm2, %xmm1 15674; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] 15675; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 15676; SSE2-NEXT: retq 15677; 15678; SSE3-LABEL: ult_28_v4i32: 15679; SSE3: # %bb.0: 15680; SSE3-NEXT: movdqa %xmm0, %xmm1 15681; SSE3-NEXT: psrlw $1, %xmm1 15682; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15683; SSE3-NEXT: psubb %xmm1, %xmm0 15684; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15685; SSE3-NEXT: movdqa %xmm0, %xmm2 15686; SSE3-NEXT: pand %xmm1, %xmm2 15687; SSE3-NEXT: psrlw $2, %xmm0 15688; SSE3-NEXT: pand %xmm1, %xmm0 15689; SSE3-NEXT: paddb %xmm2, %xmm0 15690; SSE3-NEXT: movdqa %xmm0, %xmm1 15691; SSE3-NEXT: psrlw $4, %xmm1 15692; SSE3-NEXT: paddb %xmm0, %xmm1 15693; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15694; SSE3-NEXT: pxor %xmm0, %xmm0 15695; SSE3-NEXT: movdqa %xmm1, %xmm2 15696; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15697; SSE3-NEXT: psadbw %xmm0, %xmm2 15698; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15699; SSE3-NEXT: psadbw %xmm0, %xmm1 15700; SSE3-NEXT: packuswb %xmm2, %xmm1 15701; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] 15702; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 15703; SSE3-NEXT: retq 15704; 15705; SSSE3-LABEL: ult_28_v4i32: 15706; SSSE3: # %bb.0: 15707; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15708; SSSE3-NEXT: movdqa %xmm0, %xmm2 15709; SSSE3-NEXT: pand %xmm1, %xmm2 15710; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15711; SSSE3-NEXT: movdqa %xmm3, %xmm4 15712; SSSE3-NEXT: pshufb %xmm2, %xmm4 15713; SSSE3-NEXT: psrlw $4, %xmm0 15714; SSSE3-NEXT: pand %xmm1, %xmm0 15715; SSSE3-NEXT: pshufb %xmm0, %xmm3 15716; SSSE3-NEXT: paddb %xmm4, %xmm3 15717; SSSE3-NEXT: pxor %xmm0, %xmm0 15718; SSSE3-NEXT: movdqa %xmm3, %xmm1 15719; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 15720; SSSE3-NEXT: psadbw %xmm0, %xmm1 15721; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 15722; SSSE3-NEXT: psadbw %xmm0, %xmm3 15723; SSSE3-NEXT: packuswb %xmm1, %xmm3 15724; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] 15725; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 15726; SSSE3-NEXT: retq 15727; 15728; SSE41-LABEL: ult_28_v4i32: 15729; SSE41: # %bb.0: 15730; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15731; SSE41-NEXT: movdqa %xmm0, %xmm2 15732; SSE41-NEXT: pand %xmm1, %xmm2 15733; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15734; SSE41-NEXT: movdqa %xmm3, %xmm4 15735; SSE41-NEXT: pshufb %xmm2, %xmm4 15736; SSE41-NEXT: psrlw $4, %xmm0 15737; SSE41-NEXT: pand %xmm1, %xmm0 15738; SSE41-NEXT: pshufb %xmm0, %xmm3 15739; SSE41-NEXT: paddb %xmm4, %xmm3 15740; SSE41-NEXT: pxor %xmm0, %xmm0 15741; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 15742; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 15743; SSE41-NEXT: psadbw %xmm0, %xmm3 15744; SSE41-NEXT: psadbw %xmm0, %xmm1 15745; SSE41-NEXT: packuswb %xmm3, %xmm1 15746; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] 15747; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 15748; SSE41-NEXT: retq 15749; 15750; AVX1-LABEL: ult_28_v4i32: 15751; AVX1: # %bb.0: 15752; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15753; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 15754; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15755; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15756; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 15757; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 15758; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15759; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15760; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 15761; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15762; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15763; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15764; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15765; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15766; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28] 15767; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15768; AVX1-NEXT: retq 15769; 15770; AVX2-LABEL: ult_28_v4i32: 15771; AVX2: # %bb.0: 15772; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15773; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 15774; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15775; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15776; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 15777; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 15778; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15779; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15780; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 15781; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15782; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15783; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15784; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15785; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15786; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] 15787; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15788; AVX2-NEXT: retq 15789; 15790; AVX512VPOPCNTDQ-LABEL: ult_28_v4i32: 15791; AVX512VPOPCNTDQ: # %bb.0: 15792; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15793; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 15794; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] 15795; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15796; AVX512VPOPCNTDQ-NEXT: vzeroupper 15797; AVX512VPOPCNTDQ-NEXT: retq 15798; 15799; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i32: 15800; AVX512VPOPCNTDQVL: # %bb.0: 15801; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 15802; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 15803; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15804; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15805; AVX512VPOPCNTDQVL-NEXT: retq 15806; 15807; BITALG_NOVLX-LABEL: ult_28_v4i32: 15808; BITALG_NOVLX: # %bb.0: 15809; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15810; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 15811; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 15812; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15813; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15814; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15815; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15816; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15817; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] 15818; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 15819; BITALG_NOVLX-NEXT: vzeroupper 15820; BITALG_NOVLX-NEXT: retq 15821; 15822; BITALG-LABEL: ult_28_v4i32: 15823; BITALG: # %bb.0: 15824; BITALG-NEXT: vpopcntb %xmm0, %xmm0 15825; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 15826; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15827; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15828; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15829; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15830; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15831; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 15832; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15833; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15834; BITALG-NEXT: retq 15835 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 15836 %3 = icmp ult <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28> 15837 %4 = sext <4 x i1> %3 to <4 x i32> 15838 ret <4 x i32> %4 15839} 15840 15841define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) { 15842; SSE2-LABEL: ugt_28_v4i32: 15843; SSE2: # %bb.0: 15844; SSE2-NEXT: movdqa %xmm0, %xmm1 15845; SSE2-NEXT: psrlw $1, %xmm1 15846; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15847; SSE2-NEXT: psubb %xmm1, %xmm0 15848; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15849; SSE2-NEXT: movdqa %xmm0, %xmm2 15850; SSE2-NEXT: pand %xmm1, %xmm2 15851; SSE2-NEXT: psrlw $2, %xmm0 15852; SSE2-NEXT: pand %xmm1, %xmm0 15853; SSE2-NEXT: paddb %xmm2, %xmm0 15854; SSE2-NEXT: movdqa %xmm0, %xmm1 15855; SSE2-NEXT: psrlw $4, %xmm1 15856; SSE2-NEXT: paddb %xmm0, %xmm1 15857; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 15858; SSE2-NEXT: pxor %xmm0, %xmm0 15859; SSE2-NEXT: movdqa %xmm1, %xmm2 15860; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15861; SSE2-NEXT: psadbw %xmm0, %xmm2 15862; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15863; SSE2-NEXT: psadbw %xmm0, %xmm1 15864; SSE2-NEXT: packuswb %xmm2, %xmm1 15865; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15866; SSE2-NEXT: movdqa %xmm1, %xmm0 15867; SSE2-NEXT: retq 15868; 15869; SSE3-LABEL: ugt_28_v4i32: 15870; SSE3: # %bb.0: 15871; SSE3-NEXT: movdqa %xmm0, %xmm1 15872; SSE3-NEXT: psrlw $1, %xmm1 15873; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15874; SSE3-NEXT: psubb %xmm1, %xmm0 15875; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 15876; SSE3-NEXT: movdqa %xmm0, %xmm2 15877; SSE3-NEXT: pand %xmm1, %xmm2 15878; SSE3-NEXT: psrlw $2, %xmm0 15879; SSE3-NEXT: pand %xmm1, %xmm0 15880; SSE3-NEXT: paddb %xmm2, %xmm0 15881; SSE3-NEXT: movdqa %xmm0, %xmm1 15882; SSE3-NEXT: psrlw $4, %xmm1 15883; SSE3-NEXT: paddb %xmm0, %xmm1 15884; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 15885; SSE3-NEXT: pxor %xmm0, %xmm0 15886; SSE3-NEXT: movdqa %xmm1, %xmm2 15887; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15888; SSE3-NEXT: psadbw %xmm0, %xmm2 15889; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15890; SSE3-NEXT: psadbw %xmm0, %xmm1 15891; SSE3-NEXT: packuswb %xmm2, %xmm1 15892; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15893; SSE3-NEXT: movdqa %xmm1, %xmm0 15894; SSE3-NEXT: retq 15895; 15896; SSSE3-LABEL: ugt_28_v4i32: 15897; SSSE3: # %bb.0: 15898; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15899; SSSE3-NEXT: movdqa %xmm0, %xmm3 15900; SSSE3-NEXT: pand %xmm2, %xmm3 15901; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15902; SSSE3-NEXT: movdqa %xmm1, %xmm4 15903; SSSE3-NEXT: pshufb %xmm3, %xmm4 15904; SSSE3-NEXT: psrlw $4, %xmm0 15905; SSSE3-NEXT: pand %xmm2, %xmm0 15906; SSSE3-NEXT: pshufb %xmm0, %xmm1 15907; SSSE3-NEXT: paddb %xmm4, %xmm1 15908; SSSE3-NEXT: pxor %xmm0, %xmm0 15909; SSSE3-NEXT: movdqa %xmm1, %xmm2 15910; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 15911; SSSE3-NEXT: psadbw %xmm0, %xmm2 15912; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 15913; SSSE3-NEXT: psadbw %xmm0, %xmm1 15914; SSSE3-NEXT: packuswb %xmm2, %xmm1 15915; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 15916; SSSE3-NEXT: movdqa %xmm1, %xmm0 15917; SSSE3-NEXT: retq 15918; 15919; SSE41-LABEL: ugt_28_v4i32: 15920; SSE41: # %bb.0: 15921; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15922; SSE41-NEXT: movdqa %xmm0, %xmm2 15923; SSE41-NEXT: pand %xmm1, %xmm2 15924; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15925; SSE41-NEXT: movdqa %xmm3, %xmm4 15926; SSE41-NEXT: pshufb %xmm2, %xmm4 15927; SSE41-NEXT: psrlw $4, %xmm0 15928; SSE41-NEXT: pand %xmm1, %xmm0 15929; SSE41-NEXT: pshufb %xmm0, %xmm3 15930; SSE41-NEXT: paddb %xmm4, %xmm3 15931; SSE41-NEXT: pxor %xmm1, %xmm1 15932; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 15933; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 15934; SSE41-NEXT: psadbw %xmm1, %xmm3 15935; SSE41-NEXT: psadbw %xmm1, %xmm0 15936; SSE41-NEXT: packuswb %xmm3, %xmm0 15937; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 15938; SSE41-NEXT: retq 15939; 15940; AVX1-LABEL: ugt_28_v4i32: 15941; AVX1: # %bb.0: 15942; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15943; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 15944; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15945; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15946; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 15947; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 15948; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15949; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15950; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 15951; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15952; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15953; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15954; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15955; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15956; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 15957; AVX1-NEXT: retq 15958; 15959; AVX2-LABEL: ugt_28_v4i32: 15960; AVX2: # %bb.0: 15961; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 15962; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 15963; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15964; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 15965; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 15966; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 15967; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 15968; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 15969; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 15970; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 15971; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 15972; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 15973; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 15974; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 15975; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] 15976; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15977; AVX2-NEXT: retq 15978; 15979; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i32: 15980; AVX512VPOPCNTDQ: # %bb.0: 15981; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15982; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 15983; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] 15984; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 15985; AVX512VPOPCNTDQ-NEXT: vzeroupper 15986; AVX512VPOPCNTDQ-NEXT: retq 15987; 15988; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i32: 15989; AVX512VPOPCNTDQVL: # %bb.0: 15990; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 15991; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 15992; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 15993; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 15994; AVX512VPOPCNTDQVL-NEXT: retq 15995; 15996; BITALG_NOVLX-LABEL: ugt_28_v4i32: 15997; BITALG_NOVLX: # %bb.0: 15998; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15999; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 16000; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 16001; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16002; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16003; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16004; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16005; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16006; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] 16007; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 16008; BITALG_NOVLX-NEXT: vzeroupper 16009; BITALG_NOVLX-NEXT: retq 16010; 16011; BITALG-LABEL: ugt_28_v4i32: 16012; BITALG: # %bb.0: 16013; BITALG-NEXT: vpopcntb %xmm0, %xmm0 16014; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 16015; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16016; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16017; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16018; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16019; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16020; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 16021; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16022; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16023; BITALG-NEXT: retq 16024 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 16025 %3 = icmp ugt <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28> 16026 %4 = sext <4 x i1> %3 to <4 x i32> 16027 ret <4 x i32> %4 16028} 16029 16030define <4 x i32> @ult_29_v4i32(<4 x i32> %0) { 16031; SSE2-LABEL: ult_29_v4i32: 16032; SSE2: # %bb.0: 16033; SSE2-NEXT: movdqa %xmm0, %xmm1 16034; SSE2-NEXT: psrlw $1, %xmm1 16035; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16036; SSE2-NEXT: psubb %xmm1, %xmm0 16037; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16038; SSE2-NEXT: movdqa %xmm0, %xmm2 16039; SSE2-NEXT: pand %xmm1, %xmm2 16040; SSE2-NEXT: psrlw $2, %xmm0 16041; SSE2-NEXT: pand %xmm1, %xmm0 16042; SSE2-NEXT: paddb %xmm2, %xmm0 16043; SSE2-NEXT: movdqa %xmm0, %xmm1 16044; SSE2-NEXT: psrlw $4, %xmm1 16045; SSE2-NEXT: paddb %xmm0, %xmm1 16046; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16047; SSE2-NEXT: pxor %xmm0, %xmm0 16048; SSE2-NEXT: movdqa %xmm1, %xmm2 16049; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16050; SSE2-NEXT: psadbw %xmm0, %xmm2 16051; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16052; SSE2-NEXT: psadbw %xmm0, %xmm1 16053; SSE2-NEXT: packuswb %xmm2, %xmm1 16054; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] 16055; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 16056; SSE2-NEXT: retq 16057; 16058; SSE3-LABEL: ult_29_v4i32: 16059; SSE3: # %bb.0: 16060; SSE3-NEXT: movdqa %xmm0, %xmm1 16061; SSE3-NEXT: psrlw $1, %xmm1 16062; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16063; SSE3-NEXT: psubb %xmm1, %xmm0 16064; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16065; SSE3-NEXT: movdqa %xmm0, %xmm2 16066; SSE3-NEXT: pand %xmm1, %xmm2 16067; SSE3-NEXT: psrlw $2, %xmm0 16068; SSE3-NEXT: pand %xmm1, %xmm0 16069; SSE3-NEXT: paddb %xmm2, %xmm0 16070; SSE3-NEXT: movdqa %xmm0, %xmm1 16071; SSE3-NEXT: psrlw $4, %xmm1 16072; SSE3-NEXT: paddb %xmm0, %xmm1 16073; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16074; SSE3-NEXT: pxor %xmm0, %xmm0 16075; SSE3-NEXT: movdqa %xmm1, %xmm2 16076; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16077; SSE3-NEXT: psadbw %xmm0, %xmm2 16078; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16079; SSE3-NEXT: psadbw %xmm0, %xmm1 16080; SSE3-NEXT: packuswb %xmm2, %xmm1 16081; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] 16082; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 16083; SSE3-NEXT: retq 16084; 16085; SSSE3-LABEL: ult_29_v4i32: 16086; SSSE3: # %bb.0: 16087; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16088; SSSE3-NEXT: movdqa %xmm0, %xmm2 16089; SSSE3-NEXT: pand %xmm1, %xmm2 16090; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16091; SSSE3-NEXT: movdqa %xmm3, %xmm4 16092; SSSE3-NEXT: pshufb %xmm2, %xmm4 16093; SSSE3-NEXT: psrlw $4, %xmm0 16094; SSSE3-NEXT: pand %xmm1, %xmm0 16095; SSSE3-NEXT: pshufb %xmm0, %xmm3 16096; SSSE3-NEXT: paddb %xmm4, %xmm3 16097; SSSE3-NEXT: pxor %xmm0, %xmm0 16098; SSSE3-NEXT: movdqa %xmm3, %xmm1 16099; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 16100; SSSE3-NEXT: psadbw %xmm0, %xmm1 16101; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 16102; SSSE3-NEXT: psadbw %xmm0, %xmm3 16103; SSSE3-NEXT: packuswb %xmm1, %xmm3 16104; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] 16105; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 16106; SSSE3-NEXT: retq 16107; 16108; SSE41-LABEL: ult_29_v4i32: 16109; SSE41: # %bb.0: 16110; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16111; SSE41-NEXT: movdqa %xmm0, %xmm2 16112; SSE41-NEXT: pand %xmm1, %xmm2 16113; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16114; SSE41-NEXT: movdqa %xmm3, %xmm4 16115; SSE41-NEXT: pshufb %xmm2, %xmm4 16116; SSE41-NEXT: psrlw $4, %xmm0 16117; SSE41-NEXT: pand %xmm1, %xmm0 16118; SSE41-NEXT: pshufb %xmm0, %xmm3 16119; SSE41-NEXT: paddb %xmm4, %xmm3 16120; SSE41-NEXT: pxor %xmm0, %xmm0 16121; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 16122; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 16123; SSE41-NEXT: psadbw %xmm0, %xmm3 16124; SSE41-NEXT: psadbw %xmm0, %xmm1 16125; SSE41-NEXT: packuswb %xmm3, %xmm1 16126; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] 16127; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 16128; SSE41-NEXT: retq 16129; 16130; AVX1-LABEL: ult_29_v4i32: 16131; AVX1: # %bb.0: 16132; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16133; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 16134; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16135; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16136; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 16137; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 16138; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16139; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16140; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 16141; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16142; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16143; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16144; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16145; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16146; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29] 16147; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16148; AVX1-NEXT: retq 16149; 16150; AVX2-LABEL: ult_29_v4i32: 16151; AVX2: # %bb.0: 16152; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16153; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 16154; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16155; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16156; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 16157; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 16158; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16159; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16160; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 16161; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16162; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16163; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16164; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16165; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16166; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] 16167; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16168; AVX2-NEXT: retq 16169; 16170; AVX512VPOPCNTDQ-LABEL: ult_29_v4i32: 16171; AVX512VPOPCNTDQ: # %bb.0: 16172; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16173; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 16174; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] 16175; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16176; AVX512VPOPCNTDQ-NEXT: vzeroupper 16177; AVX512VPOPCNTDQ-NEXT: retq 16178; 16179; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i32: 16180; AVX512VPOPCNTDQVL: # %bb.0: 16181; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 16182; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 16183; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16184; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16185; AVX512VPOPCNTDQVL-NEXT: retq 16186; 16187; BITALG_NOVLX-LABEL: ult_29_v4i32: 16188; BITALG_NOVLX: # %bb.0: 16189; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16190; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 16191; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 16192; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16193; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16194; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16195; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16196; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16197; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] 16198; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16199; BITALG_NOVLX-NEXT: vzeroupper 16200; BITALG_NOVLX-NEXT: retq 16201; 16202; BITALG-LABEL: ult_29_v4i32: 16203; BITALG: # %bb.0: 16204; BITALG-NEXT: vpopcntb %xmm0, %xmm0 16205; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 16206; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16207; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16208; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16209; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16210; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16211; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 16212; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16213; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16214; BITALG-NEXT: retq 16215 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 16216 %3 = icmp ult <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29> 16217 %4 = sext <4 x i1> %3 to <4 x i32> 16218 ret <4 x i32> %4 16219} 16220 16221define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) { 16222; SSE2-LABEL: ugt_29_v4i32: 16223; SSE2: # %bb.0: 16224; SSE2-NEXT: movdqa %xmm0, %xmm1 16225; SSE2-NEXT: psrlw $1, %xmm1 16226; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16227; SSE2-NEXT: psubb %xmm1, %xmm0 16228; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16229; SSE2-NEXT: movdqa %xmm0, %xmm2 16230; SSE2-NEXT: pand %xmm1, %xmm2 16231; SSE2-NEXT: psrlw $2, %xmm0 16232; SSE2-NEXT: pand %xmm1, %xmm0 16233; SSE2-NEXT: paddb %xmm2, %xmm0 16234; SSE2-NEXT: movdqa %xmm0, %xmm1 16235; SSE2-NEXT: psrlw $4, %xmm1 16236; SSE2-NEXT: paddb %xmm0, %xmm1 16237; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16238; SSE2-NEXT: pxor %xmm0, %xmm0 16239; SSE2-NEXT: movdqa %xmm1, %xmm2 16240; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16241; SSE2-NEXT: psadbw %xmm0, %xmm2 16242; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16243; SSE2-NEXT: psadbw %xmm0, %xmm1 16244; SSE2-NEXT: packuswb %xmm2, %xmm1 16245; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 16246; SSE2-NEXT: movdqa %xmm1, %xmm0 16247; SSE2-NEXT: retq 16248; 16249; SSE3-LABEL: ugt_29_v4i32: 16250; SSE3: # %bb.0: 16251; SSE3-NEXT: movdqa %xmm0, %xmm1 16252; SSE3-NEXT: psrlw $1, %xmm1 16253; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16254; SSE3-NEXT: psubb %xmm1, %xmm0 16255; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16256; SSE3-NEXT: movdqa %xmm0, %xmm2 16257; SSE3-NEXT: pand %xmm1, %xmm2 16258; SSE3-NEXT: psrlw $2, %xmm0 16259; SSE3-NEXT: pand %xmm1, %xmm0 16260; SSE3-NEXT: paddb %xmm2, %xmm0 16261; SSE3-NEXT: movdqa %xmm0, %xmm1 16262; SSE3-NEXT: psrlw $4, %xmm1 16263; SSE3-NEXT: paddb %xmm0, %xmm1 16264; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16265; SSE3-NEXT: pxor %xmm0, %xmm0 16266; SSE3-NEXT: movdqa %xmm1, %xmm2 16267; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16268; SSE3-NEXT: psadbw %xmm0, %xmm2 16269; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16270; SSE3-NEXT: psadbw %xmm0, %xmm1 16271; SSE3-NEXT: packuswb %xmm2, %xmm1 16272; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 16273; SSE3-NEXT: movdqa %xmm1, %xmm0 16274; SSE3-NEXT: retq 16275; 16276; SSSE3-LABEL: ugt_29_v4i32: 16277; SSSE3: # %bb.0: 16278; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16279; SSSE3-NEXT: movdqa %xmm0, %xmm3 16280; SSSE3-NEXT: pand %xmm2, %xmm3 16281; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16282; SSSE3-NEXT: movdqa %xmm1, %xmm4 16283; SSSE3-NEXT: pshufb %xmm3, %xmm4 16284; SSSE3-NEXT: psrlw $4, %xmm0 16285; SSSE3-NEXT: pand %xmm2, %xmm0 16286; SSSE3-NEXT: pshufb %xmm0, %xmm1 16287; SSSE3-NEXT: paddb %xmm4, %xmm1 16288; SSSE3-NEXT: pxor %xmm0, %xmm0 16289; SSSE3-NEXT: movdqa %xmm1, %xmm2 16290; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16291; SSSE3-NEXT: psadbw %xmm0, %xmm2 16292; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16293; SSSE3-NEXT: psadbw %xmm0, %xmm1 16294; SSSE3-NEXT: packuswb %xmm2, %xmm1 16295; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 16296; SSSE3-NEXT: movdqa %xmm1, %xmm0 16297; SSSE3-NEXT: retq 16298; 16299; SSE41-LABEL: ugt_29_v4i32: 16300; SSE41: # %bb.0: 16301; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16302; SSE41-NEXT: movdqa %xmm0, %xmm2 16303; SSE41-NEXT: pand %xmm1, %xmm2 16304; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16305; SSE41-NEXT: movdqa %xmm3, %xmm4 16306; SSE41-NEXT: pshufb %xmm2, %xmm4 16307; SSE41-NEXT: psrlw $4, %xmm0 16308; SSE41-NEXT: pand %xmm1, %xmm0 16309; SSE41-NEXT: pshufb %xmm0, %xmm3 16310; SSE41-NEXT: paddb %xmm4, %xmm3 16311; SSE41-NEXT: pxor %xmm1, %xmm1 16312; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 16313; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 16314; SSE41-NEXT: psadbw %xmm1, %xmm3 16315; SSE41-NEXT: psadbw %xmm1, %xmm0 16316; SSE41-NEXT: packuswb %xmm3, %xmm0 16317; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 16318; SSE41-NEXT: retq 16319; 16320; AVX1-LABEL: ugt_29_v4i32: 16321; AVX1: # %bb.0: 16322; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16323; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 16324; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16325; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16326; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 16327; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 16328; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16329; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16330; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 16331; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16332; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16333; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16334; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16335; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16336; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 16337; AVX1-NEXT: retq 16338; 16339; AVX2-LABEL: ugt_29_v4i32: 16340; AVX2: # %bb.0: 16341; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16342; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 16343; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16344; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16345; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 16346; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 16347; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16348; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16349; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 16350; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16351; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16352; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16353; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16354; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16355; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] 16356; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 16357; AVX2-NEXT: retq 16358; 16359; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i32: 16360; AVX512VPOPCNTDQ: # %bb.0: 16361; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16362; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 16363; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] 16364; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 16365; AVX512VPOPCNTDQ-NEXT: vzeroupper 16366; AVX512VPOPCNTDQ-NEXT: retq 16367; 16368; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i32: 16369; AVX512VPOPCNTDQVL: # %bb.0: 16370; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 16371; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 16372; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16373; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16374; AVX512VPOPCNTDQVL-NEXT: retq 16375; 16376; BITALG_NOVLX-LABEL: ugt_29_v4i32: 16377; BITALG_NOVLX: # %bb.0: 16378; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16379; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 16380; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 16381; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16382; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16383; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16384; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16385; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16386; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] 16387; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 16388; BITALG_NOVLX-NEXT: vzeroupper 16389; BITALG_NOVLX-NEXT: retq 16390; 16391; BITALG-LABEL: ugt_29_v4i32: 16392; BITALG: # %bb.0: 16393; BITALG-NEXT: vpopcntb %xmm0, %xmm0 16394; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 16395; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16396; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16397; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16398; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16399; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16400; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 16401; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16402; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16403; BITALG-NEXT: retq 16404 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 16405 %3 = icmp ugt <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29> 16406 %4 = sext <4 x i1> %3 to <4 x i32> 16407 ret <4 x i32> %4 16408} 16409 16410define <4 x i32> @ult_30_v4i32(<4 x i32> %0) { 16411; SSE2-LABEL: ult_30_v4i32: 16412; SSE2: # %bb.0: 16413; SSE2-NEXT: movdqa %xmm0, %xmm1 16414; SSE2-NEXT: psrlw $1, %xmm1 16415; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16416; SSE2-NEXT: psubb %xmm1, %xmm0 16417; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16418; SSE2-NEXT: movdqa %xmm0, %xmm2 16419; SSE2-NEXT: pand %xmm1, %xmm2 16420; SSE2-NEXT: psrlw $2, %xmm0 16421; SSE2-NEXT: pand %xmm1, %xmm0 16422; SSE2-NEXT: paddb %xmm2, %xmm0 16423; SSE2-NEXT: movdqa %xmm0, %xmm1 16424; SSE2-NEXT: psrlw $4, %xmm1 16425; SSE2-NEXT: paddb %xmm0, %xmm1 16426; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16427; SSE2-NEXT: pxor %xmm0, %xmm0 16428; SSE2-NEXT: movdqa %xmm1, %xmm2 16429; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16430; SSE2-NEXT: psadbw %xmm0, %xmm2 16431; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16432; SSE2-NEXT: psadbw %xmm0, %xmm1 16433; SSE2-NEXT: packuswb %xmm2, %xmm1 16434; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] 16435; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 16436; SSE2-NEXT: retq 16437; 16438; SSE3-LABEL: ult_30_v4i32: 16439; SSE3: # %bb.0: 16440; SSE3-NEXT: movdqa %xmm0, %xmm1 16441; SSE3-NEXT: psrlw $1, %xmm1 16442; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16443; SSE3-NEXT: psubb %xmm1, %xmm0 16444; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16445; SSE3-NEXT: movdqa %xmm0, %xmm2 16446; SSE3-NEXT: pand %xmm1, %xmm2 16447; SSE3-NEXT: psrlw $2, %xmm0 16448; SSE3-NEXT: pand %xmm1, %xmm0 16449; SSE3-NEXT: paddb %xmm2, %xmm0 16450; SSE3-NEXT: movdqa %xmm0, %xmm1 16451; SSE3-NEXT: psrlw $4, %xmm1 16452; SSE3-NEXT: paddb %xmm0, %xmm1 16453; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16454; SSE3-NEXT: pxor %xmm0, %xmm0 16455; SSE3-NEXT: movdqa %xmm1, %xmm2 16456; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16457; SSE3-NEXT: psadbw %xmm0, %xmm2 16458; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16459; SSE3-NEXT: psadbw %xmm0, %xmm1 16460; SSE3-NEXT: packuswb %xmm2, %xmm1 16461; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] 16462; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 16463; SSE3-NEXT: retq 16464; 16465; SSSE3-LABEL: ult_30_v4i32: 16466; SSSE3: # %bb.0: 16467; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16468; SSSE3-NEXT: movdqa %xmm0, %xmm2 16469; SSSE3-NEXT: pand %xmm1, %xmm2 16470; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16471; SSSE3-NEXT: movdqa %xmm3, %xmm4 16472; SSSE3-NEXT: pshufb %xmm2, %xmm4 16473; SSSE3-NEXT: psrlw $4, %xmm0 16474; SSSE3-NEXT: pand %xmm1, %xmm0 16475; SSSE3-NEXT: pshufb %xmm0, %xmm3 16476; SSSE3-NEXT: paddb %xmm4, %xmm3 16477; SSSE3-NEXT: pxor %xmm0, %xmm0 16478; SSSE3-NEXT: movdqa %xmm3, %xmm1 16479; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 16480; SSSE3-NEXT: psadbw %xmm0, %xmm1 16481; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 16482; SSSE3-NEXT: psadbw %xmm0, %xmm3 16483; SSSE3-NEXT: packuswb %xmm1, %xmm3 16484; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] 16485; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 16486; SSSE3-NEXT: retq 16487; 16488; SSE41-LABEL: ult_30_v4i32: 16489; SSE41: # %bb.0: 16490; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16491; SSE41-NEXT: movdqa %xmm0, %xmm2 16492; SSE41-NEXT: pand %xmm1, %xmm2 16493; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16494; SSE41-NEXT: movdqa %xmm3, %xmm4 16495; SSE41-NEXT: pshufb %xmm2, %xmm4 16496; SSE41-NEXT: psrlw $4, %xmm0 16497; SSE41-NEXT: pand %xmm1, %xmm0 16498; SSE41-NEXT: pshufb %xmm0, %xmm3 16499; SSE41-NEXT: paddb %xmm4, %xmm3 16500; SSE41-NEXT: pxor %xmm0, %xmm0 16501; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 16502; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 16503; SSE41-NEXT: psadbw %xmm0, %xmm3 16504; SSE41-NEXT: psadbw %xmm0, %xmm1 16505; SSE41-NEXT: packuswb %xmm3, %xmm1 16506; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] 16507; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 16508; SSE41-NEXT: retq 16509; 16510; AVX1-LABEL: ult_30_v4i32: 16511; AVX1: # %bb.0: 16512; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16513; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 16514; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16515; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16516; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 16517; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 16518; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16519; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16520; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 16521; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16522; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16523; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16524; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16525; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16526; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30] 16527; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16528; AVX1-NEXT: retq 16529; 16530; AVX2-LABEL: ult_30_v4i32: 16531; AVX2: # %bb.0: 16532; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16533; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 16534; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16535; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16536; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 16537; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 16538; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16539; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16540; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 16541; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16542; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16543; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16544; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16545; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16546; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] 16547; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16548; AVX2-NEXT: retq 16549; 16550; AVX512VPOPCNTDQ-LABEL: ult_30_v4i32: 16551; AVX512VPOPCNTDQ: # %bb.0: 16552; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16553; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 16554; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] 16555; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16556; AVX512VPOPCNTDQ-NEXT: vzeroupper 16557; AVX512VPOPCNTDQ-NEXT: retq 16558; 16559; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i32: 16560; AVX512VPOPCNTDQVL: # %bb.0: 16561; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 16562; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 16563; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16564; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16565; AVX512VPOPCNTDQVL-NEXT: retq 16566; 16567; BITALG_NOVLX-LABEL: ult_30_v4i32: 16568; BITALG_NOVLX: # %bb.0: 16569; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16570; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 16571; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 16572; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16573; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16574; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16575; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16576; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16577; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] 16578; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16579; BITALG_NOVLX-NEXT: vzeroupper 16580; BITALG_NOVLX-NEXT: retq 16581; 16582; BITALG-LABEL: ult_30_v4i32: 16583; BITALG: # %bb.0: 16584; BITALG-NEXT: vpopcntb %xmm0, %xmm0 16585; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 16586; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16587; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16588; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16589; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16590; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16591; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 16592; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16593; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16594; BITALG-NEXT: retq 16595 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 16596 %3 = icmp ult <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30> 16597 %4 = sext <4 x i1> %3 to <4 x i32> 16598 ret <4 x i32> %4 16599} 16600 16601define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) { 16602; SSE2-LABEL: ugt_30_v4i32: 16603; SSE2: # %bb.0: 16604; SSE2-NEXT: movdqa %xmm0, %xmm1 16605; SSE2-NEXT: psrlw $1, %xmm1 16606; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16607; SSE2-NEXT: psubb %xmm1, %xmm0 16608; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16609; SSE2-NEXT: movdqa %xmm0, %xmm2 16610; SSE2-NEXT: pand %xmm1, %xmm2 16611; SSE2-NEXT: psrlw $2, %xmm0 16612; SSE2-NEXT: pand %xmm1, %xmm0 16613; SSE2-NEXT: paddb %xmm2, %xmm0 16614; SSE2-NEXT: movdqa %xmm0, %xmm1 16615; SSE2-NEXT: psrlw $4, %xmm1 16616; SSE2-NEXT: paddb %xmm0, %xmm1 16617; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16618; SSE2-NEXT: pxor %xmm0, %xmm0 16619; SSE2-NEXT: movdqa %xmm1, %xmm2 16620; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16621; SSE2-NEXT: psadbw %xmm0, %xmm2 16622; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16623; SSE2-NEXT: psadbw %xmm0, %xmm1 16624; SSE2-NEXT: packuswb %xmm2, %xmm1 16625; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 16626; SSE2-NEXT: movdqa %xmm1, %xmm0 16627; SSE2-NEXT: retq 16628; 16629; SSE3-LABEL: ugt_30_v4i32: 16630; SSE3: # %bb.0: 16631; SSE3-NEXT: movdqa %xmm0, %xmm1 16632; SSE3-NEXT: psrlw $1, %xmm1 16633; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16634; SSE3-NEXT: psubb %xmm1, %xmm0 16635; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16636; SSE3-NEXT: movdqa %xmm0, %xmm2 16637; SSE3-NEXT: pand %xmm1, %xmm2 16638; SSE3-NEXT: psrlw $2, %xmm0 16639; SSE3-NEXT: pand %xmm1, %xmm0 16640; SSE3-NEXT: paddb %xmm2, %xmm0 16641; SSE3-NEXT: movdqa %xmm0, %xmm1 16642; SSE3-NEXT: psrlw $4, %xmm1 16643; SSE3-NEXT: paddb %xmm0, %xmm1 16644; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16645; SSE3-NEXT: pxor %xmm0, %xmm0 16646; SSE3-NEXT: movdqa %xmm1, %xmm2 16647; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16648; SSE3-NEXT: psadbw %xmm0, %xmm2 16649; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16650; SSE3-NEXT: psadbw %xmm0, %xmm1 16651; SSE3-NEXT: packuswb %xmm2, %xmm1 16652; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 16653; SSE3-NEXT: movdqa %xmm1, %xmm0 16654; SSE3-NEXT: retq 16655; 16656; SSSE3-LABEL: ugt_30_v4i32: 16657; SSSE3: # %bb.0: 16658; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16659; SSSE3-NEXT: movdqa %xmm0, %xmm3 16660; SSSE3-NEXT: pand %xmm2, %xmm3 16661; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16662; SSSE3-NEXT: movdqa %xmm1, %xmm4 16663; SSSE3-NEXT: pshufb %xmm3, %xmm4 16664; SSSE3-NEXT: psrlw $4, %xmm0 16665; SSSE3-NEXT: pand %xmm2, %xmm0 16666; SSSE3-NEXT: pshufb %xmm0, %xmm1 16667; SSSE3-NEXT: paddb %xmm4, %xmm1 16668; SSSE3-NEXT: pxor %xmm0, %xmm0 16669; SSSE3-NEXT: movdqa %xmm1, %xmm2 16670; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16671; SSSE3-NEXT: psadbw %xmm0, %xmm2 16672; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16673; SSSE3-NEXT: psadbw %xmm0, %xmm1 16674; SSSE3-NEXT: packuswb %xmm2, %xmm1 16675; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 16676; SSSE3-NEXT: movdqa %xmm1, %xmm0 16677; SSSE3-NEXT: retq 16678; 16679; SSE41-LABEL: ugt_30_v4i32: 16680; SSE41: # %bb.0: 16681; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16682; SSE41-NEXT: movdqa %xmm0, %xmm2 16683; SSE41-NEXT: pand %xmm1, %xmm2 16684; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16685; SSE41-NEXT: movdqa %xmm3, %xmm4 16686; SSE41-NEXT: pshufb %xmm2, %xmm4 16687; SSE41-NEXT: psrlw $4, %xmm0 16688; SSE41-NEXT: pand %xmm1, %xmm0 16689; SSE41-NEXT: pshufb %xmm0, %xmm3 16690; SSE41-NEXT: paddb %xmm4, %xmm3 16691; SSE41-NEXT: pxor %xmm1, %xmm1 16692; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 16693; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 16694; SSE41-NEXT: psadbw %xmm1, %xmm3 16695; SSE41-NEXT: psadbw %xmm1, %xmm0 16696; SSE41-NEXT: packuswb %xmm3, %xmm0 16697; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 16698; SSE41-NEXT: retq 16699; 16700; AVX1-LABEL: ugt_30_v4i32: 16701; AVX1: # %bb.0: 16702; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16703; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 16704; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16705; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16706; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 16707; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 16708; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16709; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16710; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 16711; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16712; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16713; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16714; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16715; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16716; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 16717; AVX1-NEXT: retq 16718; 16719; AVX2-LABEL: ugt_30_v4i32: 16720; AVX2: # %bb.0: 16721; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16722; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 16723; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16724; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16725; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 16726; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 16727; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16728; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16729; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 16730; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16731; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16732; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16733; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16734; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16735; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] 16736; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 16737; AVX2-NEXT: retq 16738; 16739; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i32: 16740; AVX512VPOPCNTDQ: # %bb.0: 16741; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16742; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 16743; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] 16744; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 16745; AVX512VPOPCNTDQ-NEXT: vzeroupper 16746; AVX512VPOPCNTDQ-NEXT: retq 16747; 16748; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i32: 16749; AVX512VPOPCNTDQVL: # %bb.0: 16750; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 16751; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 16752; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16753; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16754; AVX512VPOPCNTDQVL-NEXT: retq 16755; 16756; BITALG_NOVLX-LABEL: ugt_30_v4i32: 16757; BITALG_NOVLX: # %bb.0: 16758; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16759; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 16760; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 16761; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16762; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16763; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16764; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16765; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16766; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] 16767; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 16768; BITALG_NOVLX-NEXT: vzeroupper 16769; BITALG_NOVLX-NEXT: retq 16770; 16771; BITALG-LABEL: ugt_30_v4i32: 16772; BITALG: # %bb.0: 16773; BITALG-NEXT: vpopcntb %xmm0, %xmm0 16774; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 16775; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16776; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16777; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16778; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16779; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16780; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 16781; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16782; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16783; BITALG-NEXT: retq 16784 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 16785 %3 = icmp ugt <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30> 16786 %4 = sext <4 x i1> %3 to <4 x i32> 16787 ret <4 x i32> %4 16788} 16789 16790define <4 x i32> @ult_31_v4i32(<4 x i32> %0) { 16791; SSE2-LABEL: ult_31_v4i32: 16792; SSE2: # %bb.0: 16793; SSE2-NEXT: movdqa %xmm0, %xmm1 16794; SSE2-NEXT: psrlw $1, %xmm1 16795; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16796; SSE2-NEXT: psubb %xmm1, %xmm0 16797; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16798; SSE2-NEXT: movdqa %xmm0, %xmm2 16799; SSE2-NEXT: pand %xmm1, %xmm2 16800; SSE2-NEXT: psrlw $2, %xmm0 16801; SSE2-NEXT: pand %xmm1, %xmm0 16802; SSE2-NEXT: paddb %xmm2, %xmm0 16803; SSE2-NEXT: movdqa %xmm0, %xmm1 16804; SSE2-NEXT: psrlw $4, %xmm1 16805; SSE2-NEXT: paddb %xmm0, %xmm1 16806; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 16807; SSE2-NEXT: pxor %xmm0, %xmm0 16808; SSE2-NEXT: movdqa %xmm1, %xmm2 16809; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16810; SSE2-NEXT: psadbw %xmm0, %xmm2 16811; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16812; SSE2-NEXT: psadbw %xmm0, %xmm1 16813; SSE2-NEXT: packuswb %xmm2, %xmm1 16814; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] 16815; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 16816; SSE2-NEXT: retq 16817; 16818; SSE3-LABEL: ult_31_v4i32: 16819; SSE3: # %bb.0: 16820; SSE3-NEXT: movdqa %xmm0, %xmm1 16821; SSE3-NEXT: psrlw $1, %xmm1 16822; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16823; SSE3-NEXT: psubb %xmm1, %xmm0 16824; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 16825; SSE3-NEXT: movdqa %xmm0, %xmm2 16826; SSE3-NEXT: pand %xmm1, %xmm2 16827; SSE3-NEXT: psrlw $2, %xmm0 16828; SSE3-NEXT: pand %xmm1, %xmm0 16829; SSE3-NEXT: paddb %xmm2, %xmm0 16830; SSE3-NEXT: movdqa %xmm0, %xmm1 16831; SSE3-NEXT: psrlw $4, %xmm1 16832; SSE3-NEXT: paddb %xmm0, %xmm1 16833; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 16834; SSE3-NEXT: pxor %xmm0, %xmm0 16835; SSE3-NEXT: movdqa %xmm1, %xmm2 16836; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 16837; SSE3-NEXT: psadbw %xmm0, %xmm2 16838; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 16839; SSE3-NEXT: psadbw %xmm0, %xmm1 16840; SSE3-NEXT: packuswb %xmm2, %xmm1 16841; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] 16842; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 16843; SSE3-NEXT: retq 16844; 16845; SSSE3-LABEL: ult_31_v4i32: 16846; SSSE3: # %bb.0: 16847; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16848; SSSE3-NEXT: movdqa %xmm0, %xmm2 16849; SSSE3-NEXT: pand %xmm1, %xmm2 16850; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16851; SSSE3-NEXT: movdqa %xmm3, %xmm4 16852; SSSE3-NEXT: pshufb %xmm2, %xmm4 16853; SSSE3-NEXT: psrlw $4, %xmm0 16854; SSSE3-NEXT: pand %xmm1, %xmm0 16855; SSSE3-NEXT: pshufb %xmm0, %xmm3 16856; SSSE3-NEXT: paddb %xmm4, %xmm3 16857; SSSE3-NEXT: pxor %xmm0, %xmm0 16858; SSSE3-NEXT: movdqa %xmm3, %xmm1 16859; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 16860; SSSE3-NEXT: psadbw %xmm0, %xmm1 16861; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 16862; SSSE3-NEXT: psadbw %xmm0, %xmm3 16863; SSSE3-NEXT: packuswb %xmm1, %xmm3 16864; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] 16865; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 16866; SSSE3-NEXT: retq 16867; 16868; SSE41-LABEL: ult_31_v4i32: 16869; SSE41: # %bb.0: 16870; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16871; SSE41-NEXT: movdqa %xmm0, %xmm2 16872; SSE41-NEXT: pand %xmm1, %xmm2 16873; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16874; SSE41-NEXT: movdqa %xmm3, %xmm4 16875; SSE41-NEXT: pshufb %xmm2, %xmm4 16876; SSE41-NEXT: psrlw $4, %xmm0 16877; SSE41-NEXT: pand %xmm1, %xmm0 16878; SSE41-NEXT: pshufb %xmm0, %xmm3 16879; SSE41-NEXT: paddb %xmm4, %xmm3 16880; SSE41-NEXT: pxor %xmm0, %xmm0 16881; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero 16882; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] 16883; SSE41-NEXT: psadbw %xmm0, %xmm3 16884; SSE41-NEXT: psadbw %xmm0, %xmm1 16885; SSE41-NEXT: packuswb %xmm3, %xmm1 16886; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] 16887; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 16888; SSE41-NEXT: retq 16889; 16890; AVX1-LABEL: ult_31_v4i32: 16891; AVX1: # %bb.0: 16892; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16893; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 16894; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16895; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16896; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 16897; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 16898; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16899; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16900; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 16901; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16902; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16903; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16904; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16905; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16906; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31,31,31] 16907; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16908; AVX1-NEXT: retq 16909; 16910; AVX2-LABEL: ult_31_v4i32: 16911; AVX2: # %bb.0: 16912; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16913; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 16914; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16915; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 16916; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 16917; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 16918; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 16919; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 16920; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 16921; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16922; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16923; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16924; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16925; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16926; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] 16927; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16928; AVX2-NEXT: retq 16929; 16930; AVX512VPOPCNTDQ-LABEL: ult_31_v4i32: 16931; AVX512VPOPCNTDQ: # %bb.0: 16932; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16933; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 16934; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] 16935; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16936; AVX512VPOPCNTDQ-NEXT: vzeroupper 16937; AVX512VPOPCNTDQ-NEXT: retq 16938; 16939; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i32: 16940; AVX512VPOPCNTDQVL: # %bb.0: 16941; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 16942; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 16943; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16944; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16945; AVX512VPOPCNTDQVL-NEXT: retq 16946; 16947; BITALG_NOVLX-LABEL: ult_31_v4i32: 16948; BITALG_NOVLX: # %bb.0: 16949; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16950; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 16951; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 16952; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16953; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16954; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16955; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16956; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16957; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] 16958; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 16959; BITALG_NOVLX-NEXT: vzeroupper 16960; BITALG_NOVLX-NEXT: retq 16961; 16962; BITALG-LABEL: ult_31_v4i32: 16963; BITALG: # %bb.0: 16964; BITALG-NEXT: vpopcntb %xmm0, %xmm0 16965; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 16966; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 16967; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 16968; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 16969; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 16970; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 16971; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 16972; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 16973; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 16974; BITALG-NEXT: retq 16975 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 16976 %3 = icmp ult <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31> 16977 %4 = sext <4 x i1> %3 to <4 x i32> 16978 ret <4 x i32> %4 16979} 16980 16981define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { 16982; SSE2-LABEL: ugt_1_v2i64: 16983; SSE2: # %bb.0: 16984; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 16985; SSE2-NEXT: movdqa %xmm0, %xmm2 16986; SSE2-NEXT: paddq %xmm1, %xmm2 16987; SSE2-NEXT: pand %xmm0, %xmm2 16988; SSE2-NEXT: pxor %xmm3, %xmm3 16989; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 16990; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] 16991; SSE2-NEXT: pand %xmm3, %xmm0 16992; SSE2-NEXT: pxor %xmm1, %xmm0 16993; SSE2-NEXT: retq 16994; 16995; SSE3-LABEL: ugt_1_v2i64: 16996; SSE3: # %bb.0: 16997; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 16998; SSE3-NEXT: movdqa %xmm0, %xmm2 16999; SSE3-NEXT: paddq %xmm1, %xmm2 17000; SSE3-NEXT: pand %xmm0, %xmm2 17001; SSE3-NEXT: pxor %xmm3, %xmm3 17002; SSE3-NEXT: pcmpeqd %xmm2, %xmm3 17003; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] 17004; SSE3-NEXT: pand %xmm3, %xmm0 17005; SSE3-NEXT: pxor %xmm1, %xmm0 17006; SSE3-NEXT: retq 17007; 17008; SSSE3-LABEL: ugt_1_v2i64: 17009; SSSE3: # %bb.0: 17010; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 17011; SSSE3-NEXT: movdqa %xmm0, %xmm2 17012; SSSE3-NEXT: paddq %xmm1, %xmm2 17013; SSSE3-NEXT: pand %xmm0, %xmm2 17014; SSSE3-NEXT: pxor %xmm3, %xmm3 17015; SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 17016; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] 17017; SSSE3-NEXT: pand %xmm3, %xmm0 17018; SSSE3-NEXT: pxor %xmm1, %xmm0 17019; SSSE3-NEXT: retq 17020; 17021; SSE41-LABEL: ugt_1_v2i64: 17022; SSE41: # %bb.0: 17023; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 17024; SSE41-NEXT: movdqa %xmm0, %xmm1 17025; SSE41-NEXT: paddq %xmm2, %xmm1 17026; SSE41-NEXT: pand %xmm0, %xmm1 17027; SSE41-NEXT: pxor %xmm0, %xmm0 17028; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 17029; SSE41-NEXT: pxor %xmm2, %xmm1 17030; SSE41-NEXT: movdqa %xmm1, %xmm0 17031; SSE41-NEXT: retq 17032; 17033; AVX1-LABEL: ugt_1_v2i64: 17034; AVX1: # %bb.0: 17035; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17036; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm2 17037; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 17038; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 17039; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 17040; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 17041; AVX1-NEXT: retq 17042; 17043; AVX2-LABEL: ugt_1_v2i64: 17044; AVX2: # %bb.0: 17045; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17046; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 17047; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 17048; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 17049; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 17050; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 17051; AVX2-NEXT: retq 17052; 17053; AVX512VPOPCNTDQ-LABEL: ugt_1_v2i64: 17054; AVX512VPOPCNTDQ: # %bb.0: 17055; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17056; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 17057; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17058; AVX512VPOPCNTDQ-NEXT: vzeroupper 17059; AVX512VPOPCNTDQ-NEXT: retq 17060; 17061; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64: 17062; AVX512VPOPCNTDQVL: # %bb.0: 17063; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 17064; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 17065; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17066; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17067; AVX512VPOPCNTDQVL-NEXT: retq 17068; 17069; BITALG_NOVLX-LABEL: ugt_1_v2i64: 17070; BITALG_NOVLX: # %bb.0: 17071; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17072; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 17073; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 17074; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 17075; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 17076; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 17077; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 17078; BITALG_NOVLX-NEXT: vzeroupper 17079; BITALG_NOVLX-NEXT: retq 17080; 17081; BITALG-LABEL: ugt_1_v2i64: 17082; BITALG: # %bb.0: 17083; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17084; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1 17085; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 17086; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 17087; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 17088; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 17089; BITALG-NEXT: retq 17090 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 17091 %3 = icmp ugt <2 x i64> %2, <i64 1, i64 1> 17092 %4 = sext <2 x i1> %3 to <2 x i64> 17093 ret <2 x i64> %4 17094} 17095 17096define <2 x i64> @ult_2_v2i64(<2 x i64> %0) { 17097; SSE2-LABEL: ult_2_v2i64: 17098; SSE2: # %bb.0: 17099; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 17100; SSE2-NEXT: paddq %xmm0, %xmm1 17101; SSE2-NEXT: pand %xmm0, %xmm1 17102; SSE2-NEXT: pxor %xmm2, %xmm2 17103; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 17104; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] 17105; SSE2-NEXT: pand %xmm2, %xmm0 17106; SSE2-NEXT: retq 17107; 17108; SSE3-LABEL: ult_2_v2i64: 17109; SSE3: # %bb.0: 17110; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 17111; SSE3-NEXT: paddq %xmm0, %xmm1 17112; SSE3-NEXT: pand %xmm0, %xmm1 17113; SSE3-NEXT: pxor %xmm2, %xmm2 17114; SSE3-NEXT: pcmpeqd %xmm1, %xmm2 17115; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] 17116; SSE3-NEXT: pand %xmm2, %xmm0 17117; SSE3-NEXT: retq 17118; 17119; SSSE3-LABEL: ult_2_v2i64: 17120; SSSE3: # %bb.0: 17121; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 17122; SSSE3-NEXT: paddq %xmm0, %xmm1 17123; SSSE3-NEXT: pand %xmm0, %xmm1 17124; SSSE3-NEXT: pxor %xmm2, %xmm2 17125; SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 17126; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] 17127; SSSE3-NEXT: pand %xmm2, %xmm0 17128; SSSE3-NEXT: retq 17129; 17130; SSE41-LABEL: ult_2_v2i64: 17131; SSE41: # %bb.0: 17132; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 17133; SSE41-NEXT: paddq %xmm0, %xmm1 17134; SSE41-NEXT: pand %xmm1, %xmm0 17135; SSE41-NEXT: pxor %xmm1, %xmm1 17136; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 17137; SSE41-NEXT: retq 17138; 17139; AVX1-LABEL: ult_2_v2i64: 17140; AVX1: # %bb.0: 17141; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17142; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 17143; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 17144; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 17145; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 17146; AVX1-NEXT: retq 17147; 17148; AVX2-LABEL: ult_2_v2i64: 17149; AVX2: # %bb.0: 17150; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17151; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 17152; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 17153; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 17154; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 17155; AVX2-NEXT: retq 17156; 17157; AVX512VPOPCNTDQ-LABEL: ult_2_v2i64: 17158; AVX512VPOPCNTDQ: # %bb.0: 17159; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17160; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 17161; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2] 17162; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17163; AVX512VPOPCNTDQ-NEXT: vzeroupper 17164; AVX512VPOPCNTDQ-NEXT: retq 17165; 17166; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64: 17167; AVX512VPOPCNTDQVL: # %bb.0: 17168; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 17169; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 17170; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17171; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17172; AVX512VPOPCNTDQVL-NEXT: retq 17173; 17174; BITALG_NOVLX-LABEL: ult_2_v2i64: 17175; BITALG_NOVLX: # %bb.0: 17176; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17177; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 17178; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 17179; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 17180; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 17181; BITALG_NOVLX-NEXT: retq 17182; 17183; BITALG-LABEL: ult_2_v2i64: 17184; BITALG: # %bb.0: 17185; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 17186; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1 17187; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 17188; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 17189; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 17190; BITALG-NEXT: retq 17191 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 17192 %3 = icmp ult <2 x i64> %2, <i64 2, i64 2> 17193 %4 = sext <2 x i1> %3 to <2 x i64> 17194 ret <2 x i64> %4 17195} 17196 17197define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) { 17198; SSE2-LABEL: ugt_2_v2i64: 17199; SSE2: # %bb.0: 17200; SSE2-NEXT: movdqa %xmm0, %xmm1 17201; SSE2-NEXT: psrlw $1, %xmm1 17202; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17203; SSE2-NEXT: psubb %xmm1, %xmm0 17204; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17205; SSE2-NEXT: movdqa %xmm0, %xmm2 17206; SSE2-NEXT: pand %xmm1, %xmm2 17207; SSE2-NEXT: psrlw $2, %xmm0 17208; SSE2-NEXT: pand %xmm1, %xmm0 17209; SSE2-NEXT: paddb %xmm2, %xmm0 17210; SSE2-NEXT: movdqa %xmm0, %xmm1 17211; SSE2-NEXT: psrlw $4, %xmm1 17212; SSE2-NEXT: paddb %xmm0, %xmm1 17213; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17214; SSE2-NEXT: pxor %xmm0, %xmm0 17215; SSE2-NEXT: psadbw %xmm1, %xmm0 17216; SSE2-NEXT: por {{.*}}(%rip), %xmm0 17217; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] 17218; SSE2-NEXT: movdqa %xmm0, %xmm2 17219; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 17220; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17221; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 17222; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17223; SSE2-NEXT: pand %xmm3, %xmm1 17224; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17225; SSE2-NEXT: por %xmm1, %xmm0 17226; SSE2-NEXT: retq 17227; 17228; SSE3-LABEL: ugt_2_v2i64: 17229; SSE3: # %bb.0: 17230; SSE3-NEXT: movdqa %xmm0, %xmm1 17231; SSE3-NEXT: psrlw $1, %xmm1 17232; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17233; SSE3-NEXT: psubb %xmm1, %xmm0 17234; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17235; SSE3-NEXT: movdqa %xmm0, %xmm2 17236; SSE3-NEXT: pand %xmm1, %xmm2 17237; SSE3-NEXT: psrlw $2, %xmm0 17238; SSE3-NEXT: pand %xmm1, %xmm0 17239; SSE3-NEXT: paddb %xmm2, %xmm0 17240; SSE3-NEXT: movdqa %xmm0, %xmm1 17241; SSE3-NEXT: psrlw $4, %xmm1 17242; SSE3-NEXT: paddb %xmm0, %xmm1 17243; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17244; SSE3-NEXT: pxor %xmm0, %xmm0 17245; SSE3-NEXT: psadbw %xmm1, %xmm0 17246; SSE3-NEXT: por {{.*}}(%rip), %xmm0 17247; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] 17248; SSE3-NEXT: movdqa %xmm0, %xmm2 17249; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 17250; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17251; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 17252; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17253; SSE3-NEXT: pand %xmm3, %xmm1 17254; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17255; SSE3-NEXT: por %xmm1, %xmm0 17256; SSE3-NEXT: retq 17257; 17258; SSSE3-LABEL: ugt_2_v2i64: 17259; SSSE3: # %bb.0: 17260; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17261; SSSE3-NEXT: movdqa %xmm0, %xmm2 17262; SSSE3-NEXT: pand %xmm1, %xmm2 17263; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17264; SSSE3-NEXT: movdqa %xmm3, %xmm4 17265; SSSE3-NEXT: pshufb %xmm2, %xmm4 17266; SSSE3-NEXT: psrlw $4, %xmm0 17267; SSSE3-NEXT: pand %xmm1, %xmm0 17268; SSSE3-NEXT: pshufb %xmm0, %xmm3 17269; SSSE3-NEXT: paddb %xmm4, %xmm3 17270; SSSE3-NEXT: pxor %xmm0, %xmm0 17271; SSSE3-NEXT: psadbw %xmm3, %xmm0 17272; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 17273; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] 17274; SSSE3-NEXT: movdqa %xmm0, %xmm2 17275; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 17276; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17277; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 17278; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17279; SSSE3-NEXT: pand %xmm3, %xmm1 17280; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17281; SSSE3-NEXT: por %xmm1, %xmm0 17282; SSSE3-NEXT: retq 17283; 17284; SSE41-LABEL: ugt_2_v2i64: 17285; SSE41: # %bb.0: 17286; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17287; SSE41-NEXT: movdqa %xmm0, %xmm2 17288; SSE41-NEXT: pand %xmm1, %xmm2 17289; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17290; SSE41-NEXT: movdqa %xmm3, %xmm4 17291; SSE41-NEXT: pshufb %xmm2, %xmm4 17292; SSE41-NEXT: psrlw $4, %xmm0 17293; SSE41-NEXT: pand %xmm1, %xmm0 17294; SSE41-NEXT: pshufb %xmm0, %xmm3 17295; SSE41-NEXT: paddb %xmm4, %xmm3 17296; SSE41-NEXT: pxor %xmm0, %xmm0 17297; SSE41-NEXT: psadbw %xmm3, %xmm0 17298; SSE41-NEXT: por {{.*}}(%rip), %xmm0 17299; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] 17300; SSE41-NEXT: movdqa %xmm0, %xmm2 17301; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 17302; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17303; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 17304; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17305; SSE41-NEXT: pand %xmm3, %xmm1 17306; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17307; SSE41-NEXT: por %xmm1, %xmm0 17308; SSE41-NEXT: retq 17309; 17310; AVX1-LABEL: ugt_2_v2i64: 17311; AVX1: # %bb.0: 17312; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17313; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 17314; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17315; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17316; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 17317; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 17318; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17319; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17320; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 17321; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17322; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17323; AVX1-NEXT: retq 17324; 17325; AVX2-LABEL: ugt_2_v2i64: 17326; AVX2: # %bb.0: 17327; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17328; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 17329; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17330; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17331; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 17332; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 17333; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17334; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17335; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 17336; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17337; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17338; AVX2-NEXT: retq 17339; 17340; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64: 17341; AVX512VPOPCNTDQ: # %bb.0: 17342; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17343; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 17344; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17345; AVX512VPOPCNTDQ-NEXT: vzeroupper 17346; AVX512VPOPCNTDQ-NEXT: retq 17347; 17348; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64: 17349; AVX512VPOPCNTDQVL: # %bb.0: 17350; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 17351; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 17352; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17353; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17354; AVX512VPOPCNTDQVL-NEXT: retq 17355; 17356; BITALG_NOVLX-LABEL: ugt_2_v2i64: 17357; BITALG_NOVLX: # %bb.0: 17358; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17359; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 17360; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 17361; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17362; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17363; BITALG_NOVLX-NEXT: vzeroupper 17364; BITALG_NOVLX-NEXT: retq 17365; 17366; BITALG-LABEL: ugt_2_v2i64: 17367; BITALG: # %bb.0: 17368; BITALG-NEXT: vpopcntb %xmm0, %xmm0 17369; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 17370; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17371; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 17372; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17373; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17374; BITALG-NEXT: retq 17375 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 17376 %3 = icmp ugt <2 x i64> %2, <i64 2, i64 2> 17377 %4 = sext <2 x i1> %3 to <2 x i64> 17378 ret <2 x i64> %4 17379} 17380 17381define <2 x i64> @ult_3_v2i64(<2 x i64> %0) { 17382; SSE2-LABEL: ult_3_v2i64: 17383; SSE2: # %bb.0: 17384; SSE2-NEXT: movdqa %xmm0, %xmm1 17385; SSE2-NEXT: psrlw $1, %xmm1 17386; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17387; SSE2-NEXT: psubb %xmm1, %xmm0 17388; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17389; SSE2-NEXT: movdqa %xmm0, %xmm2 17390; SSE2-NEXT: pand %xmm1, %xmm2 17391; SSE2-NEXT: psrlw $2, %xmm0 17392; SSE2-NEXT: pand %xmm1, %xmm0 17393; SSE2-NEXT: paddb %xmm2, %xmm0 17394; SSE2-NEXT: movdqa %xmm0, %xmm1 17395; SSE2-NEXT: psrlw $4, %xmm1 17396; SSE2-NEXT: paddb %xmm0, %xmm1 17397; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17398; SSE2-NEXT: pxor %xmm0, %xmm0 17399; SSE2-NEXT: psadbw %xmm1, %xmm0 17400; SSE2-NEXT: por {{.*}}(%rip), %xmm0 17401; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17402; SSE2-NEXT: movdqa %xmm1, %xmm2 17403; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 17404; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17405; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 17406; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17407; SSE2-NEXT: pand %xmm3, %xmm1 17408; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17409; SSE2-NEXT: por %xmm1, %xmm0 17410; SSE2-NEXT: retq 17411; 17412; SSE3-LABEL: ult_3_v2i64: 17413; SSE3: # %bb.0: 17414; SSE3-NEXT: movdqa %xmm0, %xmm1 17415; SSE3-NEXT: psrlw $1, %xmm1 17416; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17417; SSE3-NEXT: psubb %xmm1, %xmm0 17418; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17419; SSE3-NEXT: movdqa %xmm0, %xmm2 17420; SSE3-NEXT: pand %xmm1, %xmm2 17421; SSE3-NEXT: psrlw $2, %xmm0 17422; SSE3-NEXT: pand %xmm1, %xmm0 17423; SSE3-NEXT: paddb %xmm2, %xmm0 17424; SSE3-NEXT: movdqa %xmm0, %xmm1 17425; SSE3-NEXT: psrlw $4, %xmm1 17426; SSE3-NEXT: paddb %xmm0, %xmm1 17427; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17428; SSE3-NEXT: pxor %xmm0, %xmm0 17429; SSE3-NEXT: psadbw %xmm1, %xmm0 17430; SSE3-NEXT: por {{.*}}(%rip), %xmm0 17431; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17432; SSE3-NEXT: movdqa %xmm1, %xmm2 17433; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 17434; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17435; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 17436; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17437; SSE3-NEXT: pand %xmm3, %xmm1 17438; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17439; SSE3-NEXT: por %xmm1, %xmm0 17440; SSE3-NEXT: retq 17441; 17442; SSSE3-LABEL: ult_3_v2i64: 17443; SSSE3: # %bb.0: 17444; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17445; SSSE3-NEXT: movdqa %xmm0, %xmm2 17446; SSSE3-NEXT: pand %xmm1, %xmm2 17447; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17448; SSSE3-NEXT: movdqa %xmm3, %xmm4 17449; SSSE3-NEXT: pshufb %xmm2, %xmm4 17450; SSSE3-NEXT: psrlw $4, %xmm0 17451; SSSE3-NEXT: pand %xmm1, %xmm0 17452; SSSE3-NEXT: pshufb %xmm0, %xmm3 17453; SSSE3-NEXT: paddb %xmm4, %xmm3 17454; SSSE3-NEXT: pxor %xmm0, %xmm0 17455; SSSE3-NEXT: psadbw %xmm3, %xmm0 17456; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 17457; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17458; SSSE3-NEXT: movdqa %xmm1, %xmm2 17459; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 17460; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17461; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 17462; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17463; SSSE3-NEXT: pand %xmm3, %xmm1 17464; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17465; SSSE3-NEXT: por %xmm1, %xmm0 17466; SSSE3-NEXT: retq 17467; 17468; SSE41-LABEL: ult_3_v2i64: 17469; SSE41: # %bb.0: 17470; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17471; SSE41-NEXT: movdqa %xmm0, %xmm2 17472; SSE41-NEXT: pand %xmm1, %xmm2 17473; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17474; SSE41-NEXT: movdqa %xmm3, %xmm4 17475; SSE41-NEXT: pshufb %xmm2, %xmm4 17476; SSE41-NEXT: psrlw $4, %xmm0 17477; SSE41-NEXT: pand %xmm1, %xmm0 17478; SSE41-NEXT: pshufb %xmm0, %xmm3 17479; SSE41-NEXT: paddb %xmm4, %xmm3 17480; SSE41-NEXT: pxor %xmm0, %xmm0 17481; SSE41-NEXT: psadbw %xmm3, %xmm0 17482; SSE41-NEXT: por {{.*}}(%rip), %xmm0 17483; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17484; SSE41-NEXT: movdqa %xmm1, %xmm2 17485; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 17486; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17487; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 17488; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17489; SSE41-NEXT: pand %xmm3, %xmm1 17490; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17491; SSE41-NEXT: por %xmm1, %xmm0 17492; SSE41-NEXT: retq 17493; 17494; AVX1-LABEL: ult_3_v2i64: 17495; AVX1: # %bb.0: 17496; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17497; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 17498; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17499; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17500; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 17501; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 17502; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17503; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17504; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 17505; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17506; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] 17507; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17508; AVX1-NEXT: retq 17509; 17510; AVX2-LABEL: ult_3_v2i64: 17511; AVX2: # %bb.0: 17512; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17513; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 17514; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17515; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17516; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 17517; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 17518; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17519; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17520; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 17521; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17522; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] 17523; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17524; AVX2-NEXT: retq 17525; 17526; AVX512VPOPCNTDQ-LABEL: ult_3_v2i64: 17527; AVX512VPOPCNTDQ: # %bb.0: 17528; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17529; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 17530; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] 17531; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17532; AVX512VPOPCNTDQ-NEXT: vzeroupper 17533; AVX512VPOPCNTDQ-NEXT: retq 17534; 17535; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64: 17536; AVX512VPOPCNTDQVL: # %bb.0: 17537; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 17538; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 17539; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17540; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17541; AVX512VPOPCNTDQVL-NEXT: retq 17542; 17543; BITALG_NOVLX-LABEL: ult_3_v2i64: 17544; BITALG_NOVLX: # %bb.0: 17545; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17546; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 17547; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 17548; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17549; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] 17550; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17551; BITALG_NOVLX-NEXT: vzeroupper 17552; BITALG_NOVLX-NEXT: retq 17553; 17554; BITALG-LABEL: ult_3_v2i64: 17555; BITALG: # %bb.0: 17556; BITALG-NEXT: vpopcntb %xmm0, %xmm0 17557; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 17558; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17559; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 17560; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17561; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17562; BITALG-NEXT: retq 17563 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 17564 %3 = icmp ult <2 x i64> %2, <i64 3, i64 3> 17565 %4 = sext <2 x i1> %3 to <2 x i64> 17566 ret <2 x i64> %4 17567} 17568 17569define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) { 17570; SSE2-LABEL: ugt_3_v2i64: 17571; SSE2: # %bb.0: 17572; SSE2-NEXT: movdqa %xmm0, %xmm1 17573; SSE2-NEXT: psrlw $1, %xmm1 17574; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17575; SSE2-NEXT: psubb %xmm1, %xmm0 17576; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17577; SSE2-NEXT: movdqa %xmm0, %xmm2 17578; SSE2-NEXT: pand %xmm1, %xmm2 17579; SSE2-NEXT: psrlw $2, %xmm0 17580; SSE2-NEXT: pand %xmm1, %xmm0 17581; SSE2-NEXT: paddb %xmm2, %xmm0 17582; SSE2-NEXT: movdqa %xmm0, %xmm1 17583; SSE2-NEXT: psrlw $4, %xmm1 17584; SSE2-NEXT: paddb %xmm0, %xmm1 17585; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17586; SSE2-NEXT: pxor %xmm0, %xmm0 17587; SSE2-NEXT: psadbw %xmm1, %xmm0 17588; SSE2-NEXT: por {{.*}}(%rip), %xmm0 17589; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17590; SSE2-NEXT: movdqa %xmm0, %xmm2 17591; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 17592; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17593; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 17594; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17595; SSE2-NEXT: pand %xmm3, %xmm1 17596; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17597; SSE2-NEXT: por %xmm1, %xmm0 17598; SSE2-NEXT: retq 17599; 17600; SSE3-LABEL: ugt_3_v2i64: 17601; SSE3: # %bb.0: 17602; SSE3-NEXT: movdqa %xmm0, %xmm1 17603; SSE3-NEXT: psrlw $1, %xmm1 17604; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17605; SSE3-NEXT: psubb %xmm1, %xmm0 17606; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17607; SSE3-NEXT: movdqa %xmm0, %xmm2 17608; SSE3-NEXT: pand %xmm1, %xmm2 17609; SSE3-NEXT: psrlw $2, %xmm0 17610; SSE3-NEXT: pand %xmm1, %xmm0 17611; SSE3-NEXT: paddb %xmm2, %xmm0 17612; SSE3-NEXT: movdqa %xmm0, %xmm1 17613; SSE3-NEXT: psrlw $4, %xmm1 17614; SSE3-NEXT: paddb %xmm0, %xmm1 17615; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17616; SSE3-NEXT: pxor %xmm0, %xmm0 17617; SSE3-NEXT: psadbw %xmm1, %xmm0 17618; SSE3-NEXT: por {{.*}}(%rip), %xmm0 17619; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17620; SSE3-NEXT: movdqa %xmm0, %xmm2 17621; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 17622; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17623; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 17624; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17625; SSE3-NEXT: pand %xmm3, %xmm1 17626; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17627; SSE3-NEXT: por %xmm1, %xmm0 17628; SSE3-NEXT: retq 17629; 17630; SSSE3-LABEL: ugt_3_v2i64: 17631; SSSE3: # %bb.0: 17632; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17633; SSSE3-NEXT: movdqa %xmm0, %xmm2 17634; SSSE3-NEXT: pand %xmm1, %xmm2 17635; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17636; SSSE3-NEXT: movdqa %xmm3, %xmm4 17637; SSSE3-NEXT: pshufb %xmm2, %xmm4 17638; SSSE3-NEXT: psrlw $4, %xmm0 17639; SSSE3-NEXT: pand %xmm1, %xmm0 17640; SSSE3-NEXT: pshufb %xmm0, %xmm3 17641; SSSE3-NEXT: paddb %xmm4, %xmm3 17642; SSSE3-NEXT: pxor %xmm0, %xmm0 17643; SSSE3-NEXT: psadbw %xmm3, %xmm0 17644; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 17645; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17646; SSSE3-NEXT: movdqa %xmm0, %xmm2 17647; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 17648; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17649; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 17650; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17651; SSSE3-NEXT: pand %xmm3, %xmm1 17652; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17653; SSSE3-NEXT: por %xmm1, %xmm0 17654; SSSE3-NEXT: retq 17655; 17656; SSE41-LABEL: ugt_3_v2i64: 17657; SSE41: # %bb.0: 17658; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17659; SSE41-NEXT: movdqa %xmm0, %xmm2 17660; SSE41-NEXT: pand %xmm1, %xmm2 17661; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17662; SSE41-NEXT: movdqa %xmm3, %xmm4 17663; SSE41-NEXT: pshufb %xmm2, %xmm4 17664; SSE41-NEXT: psrlw $4, %xmm0 17665; SSE41-NEXT: pand %xmm1, %xmm0 17666; SSE41-NEXT: pshufb %xmm0, %xmm3 17667; SSE41-NEXT: paddb %xmm4, %xmm3 17668; SSE41-NEXT: pxor %xmm0, %xmm0 17669; SSE41-NEXT: psadbw %xmm3, %xmm0 17670; SSE41-NEXT: por {{.*}}(%rip), %xmm0 17671; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] 17672; SSE41-NEXT: movdqa %xmm0, %xmm2 17673; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 17674; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17675; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 17676; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17677; SSE41-NEXT: pand %xmm3, %xmm1 17678; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17679; SSE41-NEXT: por %xmm1, %xmm0 17680; SSE41-NEXT: retq 17681; 17682; AVX1-LABEL: ugt_3_v2i64: 17683; AVX1: # %bb.0: 17684; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17685; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 17686; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17687; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17688; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 17689; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 17690; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17691; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17692; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 17693; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17694; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17695; AVX1-NEXT: retq 17696; 17697; AVX2-LABEL: ugt_3_v2i64: 17698; AVX2: # %bb.0: 17699; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17700; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 17701; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17702; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17703; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 17704; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 17705; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17706; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17707; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 17708; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17709; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17710; AVX2-NEXT: retq 17711; 17712; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64: 17713; AVX512VPOPCNTDQ: # %bb.0: 17714; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17715; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 17716; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17717; AVX512VPOPCNTDQ-NEXT: vzeroupper 17718; AVX512VPOPCNTDQ-NEXT: retq 17719; 17720; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64: 17721; AVX512VPOPCNTDQVL: # %bb.0: 17722; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 17723; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 17724; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17725; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17726; AVX512VPOPCNTDQVL-NEXT: retq 17727; 17728; BITALG_NOVLX-LABEL: ugt_3_v2i64: 17729; BITALG_NOVLX: # %bb.0: 17730; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17731; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 17732; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 17733; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17734; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 17735; BITALG_NOVLX-NEXT: vzeroupper 17736; BITALG_NOVLX-NEXT: retq 17737; 17738; BITALG-LABEL: ugt_3_v2i64: 17739; BITALG: # %bb.0: 17740; BITALG-NEXT: vpopcntb %xmm0, %xmm0 17741; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 17742; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17743; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 17744; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17745; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17746; BITALG-NEXT: retq 17747 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 17748 %3 = icmp ugt <2 x i64> %2, <i64 3, i64 3> 17749 %4 = sext <2 x i1> %3 to <2 x i64> 17750 ret <2 x i64> %4 17751} 17752 17753define <2 x i64> @ult_4_v2i64(<2 x i64> %0) { 17754; SSE2-LABEL: ult_4_v2i64: 17755; SSE2: # %bb.0: 17756; SSE2-NEXT: movdqa %xmm0, %xmm1 17757; SSE2-NEXT: psrlw $1, %xmm1 17758; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17759; SSE2-NEXT: psubb %xmm1, %xmm0 17760; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17761; SSE2-NEXT: movdqa %xmm0, %xmm2 17762; SSE2-NEXT: pand %xmm1, %xmm2 17763; SSE2-NEXT: psrlw $2, %xmm0 17764; SSE2-NEXT: pand %xmm1, %xmm0 17765; SSE2-NEXT: paddb %xmm2, %xmm0 17766; SSE2-NEXT: movdqa %xmm0, %xmm1 17767; SSE2-NEXT: psrlw $4, %xmm1 17768; SSE2-NEXT: paddb %xmm0, %xmm1 17769; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17770; SSE2-NEXT: pxor %xmm0, %xmm0 17771; SSE2-NEXT: psadbw %xmm1, %xmm0 17772; SSE2-NEXT: por {{.*}}(%rip), %xmm0 17773; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 17774; SSE2-NEXT: movdqa %xmm1, %xmm2 17775; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 17776; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17777; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 17778; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17779; SSE2-NEXT: pand %xmm3, %xmm1 17780; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17781; SSE2-NEXT: por %xmm1, %xmm0 17782; SSE2-NEXT: retq 17783; 17784; SSE3-LABEL: ult_4_v2i64: 17785; SSE3: # %bb.0: 17786; SSE3-NEXT: movdqa %xmm0, %xmm1 17787; SSE3-NEXT: psrlw $1, %xmm1 17788; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17789; SSE3-NEXT: psubb %xmm1, %xmm0 17790; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17791; SSE3-NEXT: movdqa %xmm0, %xmm2 17792; SSE3-NEXT: pand %xmm1, %xmm2 17793; SSE3-NEXT: psrlw $2, %xmm0 17794; SSE3-NEXT: pand %xmm1, %xmm0 17795; SSE3-NEXT: paddb %xmm2, %xmm0 17796; SSE3-NEXT: movdqa %xmm0, %xmm1 17797; SSE3-NEXT: psrlw $4, %xmm1 17798; SSE3-NEXT: paddb %xmm0, %xmm1 17799; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17800; SSE3-NEXT: pxor %xmm0, %xmm0 17801; SSE3-NEXT: psadbw %xmm1, %xmm0 17802; SSE3-NEXT: por {{.*}}(%rip), %xmm0 17803; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 17804; SSE3-NEXT: movdqa %xmm1, %xmm2 17805; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 17806; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17807; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 17808; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17809; SSE3-NEXT: pand %xmm3, %xmm1 17810; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17811; SSE3-NEXT: por %xmm1, %xmm0 17812; SSE3-NEXT: retq 17813; 17814; SSSE3-LABEL: ult_4_v2i64: 17815; SSSE3: # %bb.0: 17816; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17817; SSSE3-NEXT: movdqa %xmm0, %xmm2 17818; SSSE3-NEXT: pand %xmm1, %xmm2 17819; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17820; SSSE3-NEXT: movdqa %xmm3, %xmm4 17821; SSSE3-NEXT: pshufb %xmm2, %xmm4 17822; SSSE3-NEXT: psrlw $4, %xmm0 17823; SSSE3-NEXT: pand %xmm1, %xmm0 17824; SSSE3-NEXT: pshufb %xmm0, %xmm3 17825; SSSE3-NEXT: paddb %xmm4, %xmm3 17826; SSSE3-NEXT: pxor %xmm0, %xmm0 17827; SSSE3-NEXT: psadbw %xmm3, %xmm0 17828; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 17829; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 17830; SSSE3-NEXT: movdqa %xmm1, %xmm2 17831; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 17832; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17833; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 17834; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17835; SSSE3-NEXT: pand %xmm3, %xmm1 17836; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17837; SSSE3-NEXT: por %xmm1, %xmm0 17838; SSSE3-NEXT: retq 17839; 17840; SSE41-LABEL: ult_4_v2i64: 17841; SSE41: # %bb.0: 17842; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17843; SSE41-NEXT: movdqa %xmm0, %xmm2 17844; SSE41-NEXT: pand %xmm1, %xmm2 17845; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17846; SSE41-NEXT: movdqa %xmm3, %xmm4 17847; SSE41-NEXT: pshufb %xmm2, %xmm4 17848; SSE41-NEXT: psrlw $4, %xmm0 17849; SSE41-NEXT: pand %xmm1, %xmm0 17850; SSE41-NEXT: pshufb %xmm0, %xmm3 17851; SSE41-NEXT: paddb %xmm4, %xmm3 17852; SSE41-NEXT: pxor %xmm0, %xmm0 17853; SSE41-NEXT: psadbw %xmm3, %xmm0 17854; SSE41-NEXT: por {{.*}}(%rip), %xmm0 17855; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 17856; SSE41-NEXT: movdqa %xmm1, %xmm2 17857; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 17858; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17859; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 17860; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17861; SSE41-NEXT: pand %xmm3, %xmm1 17862; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17863; SSE41-NEXT: por %xmm1, %xmm0 17864; SSE41-NEXT: retq 17865; 17866; AVX1-LABEL: ult_4_v2i64: 17867; AVX1: # %bb.0: 17868; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17869; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 17870; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17871; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17872; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 17873; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 17874; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17875; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17876; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 17877; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17878; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] 17879; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17880; AVX1-NEXT: retq 17881; 17882; AVX2-LABEL: ult_4_v2i64: 17883; AVX2: # %bb.0: 17884; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17885; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 17886; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 17887; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 17888; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 17889; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 17890; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 17891; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 17892; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 17893; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17894; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] 17895; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17896; AVX2-NEXT: retq 17897; 17898; AVX512VPOPCNTDQ-LABEL: ult_4_v2i64: 17899; AVX512VPOPCNTDQ: # %bb.0: 17900; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17901; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 17902; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] 17903; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17904; AVX512VPOPCNTDQ-NEXT: vzeroupper 17905; AVX512VPOPCNTDQ-NEXT: retq 17906; 17907; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64: 17908; AVX512VPOPCNTDQVL: # %bb.0: 17909; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 17910; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 17911; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17912; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17913; AVX512VPOPCNTDQVL-NEXT: retq 17914; 17915; BITALG_NOVLX-LABEL: ult_4_v2i64: 17916; BITALG_NOVLX: # %bb.0: 17917; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17918; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 17919; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 17920; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17921; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] 17922; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 17923; BITALG_NOVLX-NEXT: vzeroupper 17924; BITALG_NOVLX-NEXT: retq 17925; 17926; BITALG-LABEL: ult_4_v2i64: 17927; BITALG: # %bb.0: 17928; BITALG-NEXT: vpopcntb %xmm0, %xmm0 17929; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 17930; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 17931; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 17932; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 17933; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 17934; BITALG-NEXT: retq 17935 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 17936 %3 = icmp ult <2 x i64> %2, <i64 4, i64 4> 17937 %4 = sext <2 x i1> %3 to <2 x i64> 17938 ret <2 x i64> %4 17939} 17940 17941define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { 17942; SSE2-LABEL: ugt_4_v2i64: 17943; SSE2: # %bb.0: 17944; SSE2-NEXT: movdqa %xmm0, %xmm1 17945; SSE2-NEXT: psrlw $1, %xmm1 17946; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17947; SSE2-NEXT: psubb %xmm1, %xmm0 17948; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17949; SSE2-NEXT: movdqa %xmm0, %xmm2 17950; SSE2-NEXT: pand %xmm1, %xmm2 17951; SSE2-NEXT: psrlw $2, %xmm0 17952; SSE2-NEXT: pand %xmm1, %xmm0 17953; SSE2-NEXT: paddb %xmm2, %xmm0 17954; SSE2-NEXT: movdqa %xmm0, %xmm1 17955; SSE2-NEXT: psrlw $4, %xmm1 17956; SSE2-NEXT: paddb %xmm0, %xmm1 17957; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 17958; SSE2-NEXT: pxor %xmm0, %xmm0 17959; SSE2-NEXT: psadbw %xmm1, %xmm0 17960; SSE2-NEXT: por {{.*}}(%rip), %xmm0 17961; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 17962; SSE2-NEXT: movdqa %xmm0, %xmm2 17963; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 17964; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17965; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 17966; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17967; SSE2-NEXT: pand %xmm3, %xmm1 17968; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17969; SSE2-NEXT: por %xmm1, %xmm0 17970; SSE2-NEXT: retq 17971; 17972; SSE3-LABEL: ugt_4_v2i64: 17973; SSE3: # %bb.0: 17974; SSE3-NEXT: movdqa %xmm0, %xmm1 17975; SSE3-NEXT: psrlw $1, %xmm1 17976; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17977; SSE3-NEXT: psubb %xmm1, %xmm0 17978; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 17979; SSE3-NEXT: movdqa %xmm0, %xmm2 17980; SSE3-NEXT: pand %xmm1, %xmm2 17981; SSE3-NEXT: psrlw $2, %xmm0 17982; SSE3-NEXT: pand %xmm1, %xmm0 17983; SSE3-NEXT: paddb %xmm2, %xmm0 17984; SSE3-NEXT: movdqa %xmm0, %xmm1 17985; SSE3-NEXT: psrlw $4, %xmm1 17986; SSE3-NEXT: paddb %xmm0, %xmm1 17987; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 17988; SSE3-NEXT: pxor %xmm0, %xmm0 17989; SSE3-NEXT: psadbw %xmm1, %xmm0 17990; SSE3-NEXT: por {{.*}}(%rip), %xmm0 17991; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 17992; SSE3-NEXT: movdqa %xmm0, %xmm2 17993; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 17994; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 17995; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 17996; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 17997; SSE3-NEXT: pand %xmm3, %xmm1 17998; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 17999; SSE3-NEXT: por %xmm1, %xmm0 18000; SSE3-NEXT: retq 18001; 18002; SSSE3-LABEL: ugt_4_v2i64: 18003; SSSE3: # %bb.0: 18004; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18005; SSSE3-NEXT: movdqa %xmm0, %xmm2 18006; SSSE3-NEXT: pand %xmm1, %xmm2 18007; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18008; SSSE3-NEXT: movdqa %xmm3, %xmm4 18009; SSSE3-NEXT: pshufb %xmm2, %xmm4 18010; SSSE3-NEXT: psrlw $4, %xmm0 18011; SSSE3-NEXT: pand %xmm1, %xmm0 18012; SSSE3-NEXT: pshufb %xmm0, %xmm3 18013; SSSE3-NEXT: paddb %xmm4, %xmm3 18014; SSSE3-NEXT: pxor %xmm0, %xmm0 18015; SSSE3-NEXT: psadbw %xmm3, %xmm0 18016; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 18017; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 18018; SSSE3-NEXT: movdqa %xmm0, %xmm2 18019; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 18020; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18021; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 18022; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18023; SSSE3-NEXT: pand %xmm3, %xmm1 18024; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18025; SSSE3-NEXT: por %xmm1, %xmm0 18026; SSSE3-NEXT: retq 18027; 18028; SSE41-LABEL: ugt_4_v2i64: 18029; SSE41: # %bb.0: 18030; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18031; SSE41-NEXT: movdqa %xmm0, %xmm2 18032; SSE41-NEXT: pand %xmm1, %xmm2 18033; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18034; SSE41-NEXT: movdqa %xmm3, %xmm4 18035; SSE41-NEXT: pshufb %xmm2, %xmm4 18036; SSE41-NEXT: psrlw $4, %xmm0 18037; SSE41-NEXT: pand %xmm1, %xmm0 18038; SSE41-NEXT: pshufb %xmm0, %xmm3 18039; SSE41-NEXT: paddb %xmm4, %xmm3 18040; SSE41-NEXT: pxor %xmm0, %xmm0 18041; SSE41-NEXT: psadbw %xmm3, %xmm0 18042; SSE41-NEXT: por {{.*}}(%rip), %xmm0 18043; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] 18044; SSE41-NEXT: movdqa %xmm0, %xmm2 18045; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 18046; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18047; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 18048; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18049; SSE41-NEXT: pand %xmm3, %xmm1 18050; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18051; SSE41-NEXT: por %xmm1, %xmm0 18052; SSE41-NEXT: retq 18053; 18054; AVX1-LABEL: ugt_4_v2i64: 18055; AVX1: # %bb.0: 18056; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18057; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 18058; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18059; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18060; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 18061; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 18062; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18063; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18064; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 18065; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18066; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18067; AVX1-NEXT: retq 18068; 18069; AVX2-LABEL: ugt_4_v2i64: 18070; AVX2: # %bb.0: 18071; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18072; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 18073; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18074; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18075; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 18076; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 18077; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18078; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18079; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 18080; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18081; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18082; AVX2-NEXT: retq 18083; 18084; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64: 18085; AVX512VPOPCNTDQ: # %bb.0: 18086; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18087; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 18088; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18089; AVX512VPOPCNTDQ-NEXT: vzeroupper 18090; AVX512VPOPCNTDQ-NEXT: retq 18091; 18092; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64: 18093; AVX512VPOPCNTDQVL: # %bb.0: 18094; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 18095; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 18096; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18097; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18098; AVX512VPOPCNTDQVL-NEXT: retq 18099; 18100; BITALG_NOVLX-LABEL: ugt_4_v2i64: 18101; BITALG_NOVLX: # %bb.0: 18102; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18103; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 18104; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 18105; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18106; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18107; BITALG_NOVLX-NEXT: vzeroupper 18108; BITALG_NOVLX-NEXT: retq 18109; 18110; BITALG-LABEL: ugt_4_v2i64: 18111; BITALG: # %bb.0: 18112; BITALG-NEXT: vpopcntb %xmm0, %xmm0 18113; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 18114; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18115; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 18116; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18117; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18118; BITALG-NEXT: retq 18119 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 18120 %3 = icmp ugt <2 x i64> %2, <i64 4, i64 4> 18121 %4 = sext <2 x i1> %3 to <2 x i64> 18122 ret <2 x i64> %4 18123} 18124 18125define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { 18126; SSE2-LABEL: ult_5_v2i64: 18127; SSE2: # %bb.0: 18128; SSE2-NEXT: movdqa %xmm0, %xmm1 18129; SSE2-NEXT: psrlw $1, %xmm1 18130; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18131; SSE2-NEXT: psubb %xmm1, %xmm0 18132; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18133; SSE2-NEXT: movdqa %xmm0, %xmm2 18134; SSE2-NEXT: pand %xmm1, %xmm2 18135; SSE2-NEXT: psrlw $2, %xmm0 18136; SSE2-NEXT: pand %xmm1, %xmm0 18137; SSE2-NEXT: paddb %xmm2, %xmm0 18138; SSE2-NEXT: movdqa %xmm0, %xmm1 18139; SSE2-NEXT: psrlw $4, %xmm1 18140; SSE2-NEXT: paddb %xmm0, %xmm1 18141; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18142; SSE2-NEXT: pxor %xmm0, %xmm0 18143; SSE2-NEXT: psadbw %xmm1, %xmm0 18144; SSE2-NEXT: por {{.*}}(%rip), %xmm0 18145; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18146; SSE2-NEXT: movdqa %xmm1, %xmm2 18147; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 18148; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18149; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 18150; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18151; SSE2-NEXT: pand %xmm3, %xmm1 18152; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18153; SSE2-NEXT: por %xmm1, %xmm0 18154; SSE2-NEXT: retq 18155; 18156; SSE3-LABEL: ult_5_v2i64: 18157; SSE3: # %bb.0: 18158; SSE3-NEXT: movdqa %xmm0, %xmm1 18159; SSE3-NEXT: psrlw $1, %xmm1 18160; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18161; SSE3-NEXT: psubb %xmm1, %xmm0 18162; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18163; SSE3-NEXT: movdqa %xmm0, %xmm2 18164; SSE3-NEXT: pand %xmm1, %xmm2 18165; SSE3-NEXT: psrlw $2, %xmm0 18166; SSE3-NEXT: pand %xmm1, %xmm0 18167; SSE3-NEXT: paddb %xmm2, %xmm0 18168; SSE3-NEXT: movdqa %xmm0, %xmm1 18169; SSE3-NEXT: psrlw $4, %xmm1 18170; SSE3-NEXT: paddb %xmm0, %xmm1 18171; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18172; SSE3-NEXT: pxor %xmm0, %xmm0 18173; SSE3-NEXT: psadbw %xmm1, %xmm0 18174; SSE3-NEXT: por {{.*}}(%rip), %xmm0 18175; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18176; SSE3-NEXT: movdqa %xmm1, %xmm2 18177; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 18178; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18179; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 18180; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18181; SSE3-NEXT: pand %xmm3, %xmm1 18182; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18183; SSE3-NEXT: por %xmm1, %xmm0 18184; SSE3-NEXT: retq 18185; 18186; SSSE3-LABEL: ult_5_v2i64: 18187; SSSE3: # %bb.0: 18188; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18189; SSSE3-NEXT: movdqa %xmm0, %xmm2 18190; SSSE3-NEXT: pand %xmm1, %xmm2 18191; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18192; SSSE3-NEXT: movdqa %xmm3, %xmm4 18193; SSSE3-NEXT: pshufb %xmm2, %xmm4 18194; SSSE3-NEXT: psrlw $4, %xmm0 18195; SSSE3-NEXT: pand %xmm1, %xmm0 18196; SSSE3-NEXT: pshufb %xmm0, %xmm3 18197; SSSE3-NEXT: paddb %xmm4, %xmm3 18198; SSSE3-NEXT: pxor %xmm0, %xmm0 18199; SSSE3-NEXT: psadbw %xmm3, %xmm0 18200; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 18201; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18202; SSSE3-NEXT: movdqa %xmm1, %xmm2 18203; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 18204; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18205; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 18206; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18207; SSSE3-NEXT: pand %xmm3, %xmm1 18208; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18209; SSSE3-NEXT: por %xmm1, %xmm0 18210; SSSE3-NEXT: retq 18211; 18212; SSE41-LABEL: ult_5_v2i64: 18213; SSE41: # %bb.0: 18214; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18215; SSE41-NEXT: movdqa %xmm0, %xmm2 18216; SSE41-NEXT: pand %xmm1, %xmm2 18217; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18218; SSE41-NEXT: movdqa %xmm3, %xmm4 18219; SSE41-NEXT: pshufb %xmm2, %xmm4 18220; SSE41-NEXT: psrlw $4, %xmm0 18221; SSE41-NEXT: pand %xmm1, %xmm0 18222; SSE41-NEXT: pshufb %xmm0, %xmm3 18223; SSE41-NEXT: paddb %xmm4, %xmm3 18224; SSE41-NEXT: pxor %xmm0, %xmm0 18225; SSE41-NEXT: psadbw %xmm3, %xmm0 18226; SSE41-NEXT: por {{.*}}(%rip), %xmm0 18227; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18228; SSE41-NEXT: movdqa %xmm1, %xmm2 18229; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 18230; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18231; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 18232; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18233; SSE41-NEXT: pand %xmm3, %xmm1 18234; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18235; SSE41-NEXT: por %xmm1, %xmm0 18236; SSE41-NEXT: retq 18237; 18238; AVX1-LABEL: ult_5_v2i64: 18239; AVX1: # %bb.0: 18240; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18241; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 18242; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18243; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18244; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 18245; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 18246; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18247; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18248; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 18249; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18250; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] 18251; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18252; AVX1-NEXT: retq 18253; 18254; AVX2-LABEL: ult_5_v2i64: 18255; AVX2: # %bb.0: 18256; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18257; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 18258; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18259; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18260; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 18261; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 18262; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18263; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18264; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 18265; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18266; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] 18267; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18268; AVX2-NEXT: retq 18269; 18270; AVX512VPOPCNTDQ-LABEL: ult_5_v2i64: 18271; AVX512VPOPCNTDQ: # %bb.0: 18272; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18273; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 18274; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] 18275; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18276; AVX512VPOPCNTDQ-NEXT: vzeroupper 18277; AVX512VPOPCNTDQ-NEXT: retq 18278; 18279; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64: 18280; AVX512VPOPCNTDQVL: # %bb.0: 18281; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 18282; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 18283; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18284; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18285; AVX512VPOPCNTDQVL-NEXT: retq 18286; 18287; BITALG_NOVLX-LABEL: ult_5_v2i64: 18288; BITALG_NOVLX: # %bb.0: 18289; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18290; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 18291; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 18292; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18293; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] 18294; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18295; BITALG_NOVLX-NEXT: vzeroupper 18296; BITALG_NOVLX-NEXT: retq 18297; 18298; BITALG-LABEL: ult_5_v2i64: 18299; BITALG: # %bb.0: 18300; BITALG-NEXT: vpopcntb %xmm0, %xmm0 18301; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 18302; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18303; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 18304; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18305; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18306; BITALG-NEXT: retq 18307 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 18308 %3 = icmp ult <2 x i64> %2, <i64 5, i64 5> 18309 %4 = sext <2 x i1> %3 to <2 x i64> 18310 ret <2 x i64> %4 18311} 18312 18313define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { 18314; SSE2-LABEL: ugt_5_v2i64: 18315; SSE2: # %bb.0: 18316; SSE2-NEXT: movdqa %xmm0, %xmm1 18317; SSE2-NEXT: psrlw $1, %xmm1 18318; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18319; SSE2-NEXT: psubb %xmm1, %xmm0 18320; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18321; SSE2-NEXT: movdqa %xmm0, %xmm2 18322; SSE2-NEXT: pand %xmm1, %xmm2 18323; SSE2-NEXT: psrlw $2, %xmm0 18324; SSE2-NEXT: pand %xmm1, %xmm0 18325; SSE2-NEXT: paddb %xmm2, %xmm0 18326; SSE2-NEXT: movdqa %xmm0, %xmm1 18327; SSE2-NEXT: psrlw $4, %xmm1 18328; SSE2-NEXT: paddb %xmm0, %xmm1 18329; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18330; SSE2-NEXT: pxor %xmm0, %xmm0 18331; SSE2-NEXT: psadbw %xmm1, %xmm0 18332; SSE2-NEXT: por {{.*}}(%rip), %xmm0 18333; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18334; SSE2-NEXT: movdqa %xmm0, %xmm2 18335; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 18336; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18337; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 18338; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18339; SSE2-NEXT: pand %xmm3, %xmm1 18340; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18341; SSE2-NEXT: por %xmm1, %xmm0 18342; SSE2-NEXT: retq 18343; 18344; SSE3-LABEL: ugt_5_v2i64: 18345; SSE3: # %bb.0: 18346; SSE3-NEXT: movdqa %xmm0, %xmm1 18347; SSE3-NEXT: psrlw $1, %xmm1 18348; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18349; SSE3-NEXT: psubb %xmm1, %xmm0 18350; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18351; SSE3-NEXT: movdqa %xmm0, %xmm2 18352; SSE3-NEXT: pand %xmm1, %xmm2 18353; SSE3-NEXT: psrlw $2, %xmm0 18354; SSE3-NEXT: pand %xmm1, %xmm0 18355; SSE3-NEXT: paddb %xmm2, %xmm0 18356; SSE3-NEXT: movdqa %xmm0, %xmm1 18357; SSE3-NEXT: psrlw $4, %xmm1 18358; SSE3-NEXT: paddb %xmm0, %xmm1 18359; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18360; SSE3-NEXT: pxor %xmm0, %xmm0 18361; SSE3-NEXT: psadbw %xmm1, %xmm0 18362; SSE3-NEXT: por {{.*}}(%rip), %xmm0 18363; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18364; SSE3-NEXT: movdqa %xmm0, %xmm2 18365; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 18366; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18367; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 18368; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18369; SSE3-NEXT: pand %xmm3, %xmm1 18370; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18371; SSE3-NEXT: por %xmm1, %xmm0 18372; SSE3-NEXT: retq 18373; 18374; SSSE3-LABEL: ugt_5_v2i64: 18375; SSSE3: # %bb.0: 18376; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18377; SSSE3-NEXT: movdqa %xmm0, %xmm2 18378; SSSE3-NEXT: pand %xmm1, %xmm2 18379; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18380; SSSE3-NEXT: movdqa %xmm3, %xmm4 18381; SSSE3-NEXT: pshufb %xmm2, %xmm4 18382; SSSE3-NEXT: psrlw $4, %xmm0 18383; SSSE3-NEXT: pand %xmm1, %xmm0 18384; SSSE3-NEXT: pshufb %xmm0, %xmm3 18385; SSSE3-NEXT: paddb %xmm4, %xmm3 18386; SSSE3-NEXT: pxor %xmm0, %xmm0 18387; SSSE3-NEXT: psadbw %xmm3, %xmm0 18388; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 18389; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18390; SSSE3-NEXT: movdqa %xmm0, %xmm2 18391; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 18392; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18393; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 18394; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18395; SSSE3-NEXT: pand %xmm3, %xmm1 18396; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18397; SSSE3-NEXT: por %xmm1, %xmm0 18398; SSSE3-NEXT: retq 18399; 18400; SSE41-LABEL: ugt_5_v2i64: 18401; SSE41: # %bb.0: 18402; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18403; SSE41-NEXT: movdqa %xmm0, %xmm2 18404; SSE41-NEXT: pand %xmm1, %xmm2 18405; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18406; SSE41-NEXT: movdqa %xmm3, %xmm4 18407; SSE41-NEXT: pshufb %xmm2, %xmm4 18408; SSE41-NEXT: psrlw $4, %xmm0 18409; SSE41-NEXT: pand %xmm1, %xmm0 18410; SSE41-NEXT: pshufb %xmm0, %xmm3 18411; SSE41-NEXT: paddb %xmm4, %xmm3 18412; SSE41-NEXT: pxor %xmm0, %xmm0 18413; SSE41-NEXT: psadbw %xmm3, %xmm0 18414; SSE41-NEXT: por {{.*}}(%rip), %xmm0 18415; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] 18416; SSE41-NEXT: movdqa %xmm0, %xmm2 18417; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 18418; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18419; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 18420; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18421; SSE41-NEXT: pand %xmm3, %xmm1 18422; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18423; SSE41-NEXT: por %xmm1, %xmm0 18424; SSE41-NEXT: retq 18425; 18426; AVX1-LABEL: ugt_5_v2i64: 18427; AVX1: # %bb.0: 18428; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18429; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 18430; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18431; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18432; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 18433; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 18434; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18435; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18436; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 18437; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18438; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18439; AVX1-NEXT: retq 18440; 18441; AVX2-LABEL: ugt_5_v2i64: 18442; AVX2: # %bb.0: 18443; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18444; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 18445; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18446; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18447; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 18448; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 18449; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18450; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18451; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 18452; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18453; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18454; AVX2-NEXT: retq 18455; 18456; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64: 18457; AVX512VPOPCNTDQ: # %bb.0: 18458; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18459; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 18460; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18461; AVX512VPOPCNTDQ-NEXT: vzeroupper 18462; AVX512VPOPCNTDQ-NEXT: retq 18463; 18464; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64: 18465; AVX512VPOPCNTDQVL: # %bb.0: 18466; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 18467; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 18468; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18469; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18470; AVX512VPOPCNTDQVL-NEXT: retq 18471; 18472; BITALG_NOVLX-LABEL: ugt_5_v2i64: 18473; BITALG_NOVLX: # %bb.0: 18474; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18475; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 18476; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 18477; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18478; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18479; BITALG_NOVLX-NEXT: vzeroupper 18480; BITALG_NOVLX-NEXT: retq 18481; 18482; BITALG-LABEL: ugt_5_v2i64: 18483; BITALG: # %bb.0: 18484; BITALG-NEXT: vpopcntb %xmm0, %xmm0 18485; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 18486; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18487; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 18488; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18489; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18490; BITALG-NEXT: retq 18491 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 18492 %3 = icmp ugt <2 x i64> %2, <i64 5, i64 5> 18493 %4 = sext <2 x i1> %3 to <2 x i64> 18494 ret <2 x i64> %4 18495} 18496 18497define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { 18498; SSE2-LABEL: ult_6_v2i64: 18499; SSE2: # %bb.0: 18500; SSE2-NEXT: movdqa %xmm0, %xmm1 18501; SSE2-NEXT: psrlw $1, %xmm1 18502; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18503; SSE2-NEXT: psubb %xmm1, %xmm0 18504; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18505; SSE2-NEXT: movdqa %xmm0, %xmm2 18506; SSE2-NEXT: pand %xmm1, %xmm2 18507; SSE2-NEXT: psrlw $2, %xmm0 18508; SSE2-NEXT: pand %xmm1, %xmm0 18509; SSE2-NEXT: paddb %xmm2, %xmm0 18510; SSE2-NEXT: movdqa %xmm0, %xmm1 18511; SSE2-NEXT: psrlw $4, %xmm1 18512; SSE2-NEXT: paddb %xmm0, %xmm1 18513; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18514; SSE2-NEXT: pxor %xmm0, %xmm0 18515; SSE2-NEXT: psadbw %xmm1, %xmm0 18516; SSE2-NEXT: por {{.*}}(%rip), %xmm0 18517; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18518; SSE2-NEXT: movdqa %xmm1, %xmm2 18519; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 18520; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18521; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 18522; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18523; SSE2-NEXT: pand %xmm3, %xmm1 18524; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18525; SSE2-NEXT: por %xmm1, %xmm0 18526; SSE2-NEXT: retq 18527; 18528; SSE3-LABEL: ult_6_v2i64: 18529; SSE3: # %bb.0: 18530; SSE3-NEXT: movdqa %xmm0, %xmm1 18531; SSE3-NEXT: psrlw $1, %xmm1 18532; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18533; SSE3-NEXT: psubb %xmm1, %xmm0 18534; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18535; SSE3-NEXT: movdqa %xmm0, %xmm2 18536; SSE3-NEXT: pand %xmm1, %xmm2 18537; SSE3-NEXT: psrlw $2, %xmm0 18538; SSE3-NEXT: pand %xmm1, %xmm0 18539; SSE3-NEXT: paddb %xmm2, %xmm0 18540; SSE3-NEXT: movdqa %xmm0, %xmm1 18541; SSE3-NEXT: psrlw $4, %xmm1 18542; SSE3-NEXT: paddb %xmm0, %xmm1 18543; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18544; SSE3-NEXT: pxor %xmm0, %xmm0 18545; SSE3-NEXT: psadbw %xmm1, %xmm0 18546; SSE3-NEXT: por {{.*}}(%rip), %xmm0 18547; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18548; SSE3-NEXT: movdqa %xmm1, %xmm2 18549; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 18550; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18551; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 18552; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18553; SSE3-NEXT: pand %xmm3, %xmm1 18554; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18555; SSE3-NEXT: por %xmm1, %xmm0 18556; SSE3-NEXT: retq 18557; 18558; SSSE3-LABEL: ult_6_v2i64: 18559; SSSE3: # %bb.0: 18560; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18561; SSSE3-NEXT: movdqa %xmm0, %xmm2 18562; SSSE3-NEXT: pand %xmm1, %xmm2 18563; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18564; SSSE3-NEXT: movdqa %xmm3, %xmm4 18565; SSSE3-NEXT: pshufb %xmm2, %xmm4 18566; SSSE3-NEXT: psrlw $4, %xmm0 18567; SSSE3-NEXT: pand %xmm1, %xmm0 18568; SSSE3-NEXT: pshufb %xmm0, %xmm3 18569; SSSE3-NEXT: paddb %xmm4, %xmm3 18570; SSSE3-NEXT: pxor %xmm0, %xmm0 18571; SSSE3-NEXT: psadbw %xmm3, %xmm0 18572; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 18573; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18574; SSSE3-NEXT: movdqa %xmm1, %xmm2 18575; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 18576; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18577; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 18578; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18579; SSSE3-NEXT: pand %xmm3, %xmm1 18580; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18581; SSSE3-NEXT: por %xmm1, %xmm0 18582; SSSE3-NEXT: retq 18583; 18584; SSE41-LABEL: ult_6_v2i64: 18585; SSE41: # %bb.0: 18586; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18587; SSE41-NEXT: movdqa %xmm0, %xmm2 18588; SSE41-NEXT: pand %xmm1, %xmm2 18589; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18590; SSE41-NEXT: movdqa %xmm3, %xmm4 18591; SSE41-NEXT: pshufb %xmm2, %xmm4 18592; SSE41-NEXT: psrlw $4, %xmm0 18593; SSE41-NEXT: pand %xmm1, %xmm0 18594; SSE41-NEXT: pshufb %xmm0, %xmm3 18595; SSE41-NEXT: paddb %xmm4, %xmm3 18596; SSE41-NEXT: pxor %xmm0, %xmm0 18597; SSE41-NEXT: psadbw %xmm3, %xmm0 18598; SSE41-NEXT: por {{.*}}(%rip), %xmm0 18599; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18600; SSE41-NEXT: movdqa %xmm1, %xmm2 18601; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 18602; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18603; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 18604; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18605; SSE41-NEXT: pand %xmm3, %xmm1 18606; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18607; SSE41-NEXT: por %xmm1, %xmm0 18608; SSE41-NEXT: retq 18609; 18610; AVX1-LABEL: ult_6_v2i64: 18611; AVX1: # %bb.0: 18612; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18613; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 18614; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18615; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18616; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 18617; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 18618; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18619; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18620; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 18621; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18622; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] 18623; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18624; AVX1-NEXT: retq 18625; 18626; AVX2-LABEL: ult_6_v2i64: 18627; AVX2: # %bb.0: 18628; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18629; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 18630; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18631; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18632; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 18633; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 18634; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18635; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18636; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 18637; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18638; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] 18639; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18640; AVX2-NEXT: retq 18641; 18642; AVX512VPOPCNTDQ-LABEL: ult_6_v2i64: 18643; AVX512VPOPCNTDQ: # %bb.0: 18644; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18645; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 18646; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] 18647; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18648; AVX512VPOPCNTDQ-NEXT: vzeroupper 18649; AVX512VPOPCNTDQ-NEXT: retq 18650; 18651; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64: 18652; AVX512VPOPCNTDQVL: # %bb.0: 18653; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 18654; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 18655; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18656; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18657; AVX512VPOPCNTDQVL-NEXT: retq 18658; 18659; BITALG_NOVLX-LABEL: ult_6_v2i64: 18660; BITALG_NOVLX: # %bb.0: 18661; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18662; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 18663; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 18664; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18665; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] 18666; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18667; BITALG_NOVLX-NEXT: vzeroupper 18668; BITALG_NOVLX-NEXT: retq 18669; 18670; BITALG-LABEL: ult_6_v2i64: 18671; BITALG: # %bb.0: 18672; BITALG-NEXT: vpopcntb %xmm0, %xmm0 18673; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 18674; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18675; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 18676; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18677; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18678; BITALG-NEXT: retq 18679 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 18680 %3 = icmp ult <2 x i64> %2, <i64 6, i64 6> 18681 %4 = sext <2 x i1> %3 to <2 x i64> 18682 ret <2 x i64> %4 18683} 18684 18685define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { 18686; SSE2-LABEL: ugt_6_v2i64: 18687; SSE2: # %bb.0: 18688; SSE2-NEXT: movdqa %xmm0, %xmm1 18689; SSE2-NEXT: psrlw $1, %xmm1 18690; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18691; SSE2-NEXT: psubb %xmm1, %xmm0 18692; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18693; SSE2-NEXT: movdqa %xmm0, %xmm2 18694; SSE2-NEXT: pand %xmm1, %xmm2 18695; SSE2-NEXT: psrlw $2, %xmm0 18696; SSE2-NEXT: pand %xmm1, %xmm0 18697; SSE2-NEXT: paddb %xmm2, %xmm0 18698; SSE2-NEXT: movdqa %xmm0, %xmm1 18699; SSE2-NEXT: psrlw $4, %xmm1 18700; SSE2-NEXT: paddb %xmm0, %xmm1 18701; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18702; SSE2-NEXT: pxor %xmm0, %xmm0 18703; SSE2-NEXT: psadbw %xmm1, %xmm0 18704; SSE2-NEXT: por {{.*}}(%rip), %xmm0 18705; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18706; SSE2-NEXT: movdqa %xmm0, %xmm2 18707; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 18708; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18709; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 18710; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18711; SSE2-NEXT: pand %xmm3, %xmm1 18712; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18713; SSE2-NEXT: por %xmm1, %xmm0 18714; SSE2-NEXT: retq 18715; 18716; SSE3-LABEL: ugt_6_v2i64: 18717; SSE3: # %bb.0: 18718; SSE3-NEXT: movdqa %xmm0, %xmm1 18719; SSE3-NEXT: psrlw $1, %xmm1 18720; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18721; SSE3-NEXT: psubb %xmm1, %xmm0 18722; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18723; SSE3-NEXT: movdqa %xmm0, %xmm2 18724; SSE3-NEXT: pand %xmm1, %xmm2 18725; SSE3-NEXT: psrlw $2, %xmm0 18726; SSE3-NEXT: pand %xmm1, %xmm0 18727; SSE3-NEXT: paddb %xmm2, %xmm0 18728; SSE3-NEXT: movdqa %xmm0, %xmm1 18729; SSE3-NEXT: psrlw $4, %xmm1 18730; SSE3-NEXT: paddb %xmm0, %xmm1 18731; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18732; SSE3-NEXT: pxor %xmm0, %xmm0 18733; SSE3-NEXT: psadbw %xmm1, %xmm0 18734; SSE3-NEXT: por {{.*}}(%rip), %xmm0 18735; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18736; SSE3-NEXT: movdqa %xmm0, %xmm2 18737; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 18738; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18739; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 18740; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18741; SSE3-NEXT: pand %xmm3, %xmm1 18742; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18743; SSE3-NEXT: por %xmm1, %xmm0 18744; SSE3-NEXT: retq 18745; 18746; SSSE3-LABEL: ugt_6_v2i64: 18747; SSSE3: # %bb.0: 18748; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18749; SSSE3-NEXT: movdqa %xmm0, %xmm2 18750; SSSE3-NEXT: pand %xmm1, %xmm2 18751; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18752; SSSE3-NEXT: movdqa %xmm3, %xmm4 18753; SSSE3-NEXT: pshufb %xmm2, %xmm4 18754; SSSE3-NEXT: psrlw $4, %xmm0 18755; SSSE3-NEXT: pand %xmm1, %xmm0 18756; SSSE3-NEXT: pshufb %xmm0, %xmm3 18757; SSSE3-NEXT: paddb %xmm4, %xmm3 18758; SSSE3-NEXT: pxor %xmm0, %xmm0 18759; SSSE3-NEXT: psadbw %xmm3, %xmm0 18760; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 18761; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18762; SSSE3-NEXT: movdqa %xmm0, %xmm2 18763; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 18764; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18765; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 18766; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18767; SSSE3-NEXT: pand %xmm3, %xmm1 18768; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18769; SSSE3-NEXT: por %xmm1, %xmm0 18770; SSSE3-NEXT: retq 18771; 18772; SSE41-LABEL: ugt_6_v2i64: 18773; SSE41: # %bb.0: 18774; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18775; SSE41-NEXT: movdqa %xmm0, %xmm2 18776; SSE41-NEXT: pand %xmm1, %xmm2 18777; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18778; SSE41-NEXT: movdqa %xmm3, %xmm4 18779; SSE41-NEXT: pshufb %xmm2, %xmm4 18780; SSE41-NEXT: psrlw $4, %xmm0 18781; SSE41-NEXT: pand %xmm1, %xmm0 18782; SSE41-NEXT: pshufb %xmm0, %xmm3 18783; SSE41-NEXT: paddb %xmm4, %xmm3 18784; SSE41-NEXT: pxor %xmm0, %xmm0 18785; SSE41-NEXT: psadbw %xmm3, %xmm0 18786; SSE41-NEXT: por {{.*}}(%rip), %xmm0 18787; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] 18788; SSE41-NEXT: movdqa %xmm0, %xmm2 18789; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 18790; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18791; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 18792; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18793; SSE41-NEXT: pand %xmm3, %xmm1 18794; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18795; SSE41-NEXT: por %xmm1, %xmm0 18796; SSE41-NEXT: retq 18797; 18798; AVX1-LABEL: ugt_6_v2i64: 18799; AVX1: # %bb.0: 18800; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18801; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 18802; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18803; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18804; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 18805; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 18806; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18807; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18808; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 18809; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18810; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18811; AVX1-NEXT: retq 18812; 18813; AVX2-LABEL: ugt_6_v2i64: 18814; AVX2: # %bb.0: 18815; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18816; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 18817; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18818; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18819; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 18820; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 18821; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18822; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18823; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 18824; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18825; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18826; AVX2-NEXT: retq 18827; 18828; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64: 18829; AVX512VPOPCNTDQ: # %bb.0: 18830; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18831; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 18832; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18833; AVX512VPOPCNTDQ-NEXT: vzeroupper 18834; AVX512VPOPCNTDQ-NEXT: retq 18835; 18836; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64: 18837; AVX512VPOPCNTDQVL: # %bb.0: 18838; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 18839; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 18840; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18841; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18842; AVX512VPOPCNTDQVL-NEXT: retq 18843; 18844; BITALG_NOVLX-LABEL: ugt_6_v2i64: 18845; BITALG_NOVLX: # %bb.0: 18846; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18847; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 18848; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 18849; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18850; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 18851; BITALG_NOVLX-NEXT: vzeroupper 18852; BITALG_NOVLX-NEXT: retq 18853; 18854; BITALG-LABEL: ugt_6_v2i64: 18855; BITALG: # %bb.0: 18856; BITALG-NEXT: vpopcntb %xmm0, %xmm0 18857; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 18858; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18859; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 18860; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 18861; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 18862; BITALG-NEXT: retq 18863 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 18864 %3 = icmp ugt <2 x i64> %2, <i64 6, i64 6> 18865 %4 = sext <2 x i1> %3 to <2 x i64> 18866 ret <2 x i64> %4 18867} 18868 18869define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { 18870; SSE2-LABEL: ult_7_v2i64: 18871; SSE2: # %bb.0: 18872; SSE2-NEXT: movdqa %xmm0, %xmm1 18873; SSE2-NEXT: psrlw $1, %xmm1 18874; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18875; SSE2-NEXT: psubb %xmm1, %xmm0 18876; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18877; SSE2-NEXT: movdqa %xmm0, %xmm2 18878; SSE2-NEXT: pand %xmm1, %xmm2 18879; SSE2-NEXT: psrlw $2, %xmm0 18880; SSE2-NEXT: pand %xmm1, %xmm0 18881; SSE2-NEXT: paddb %xmm2, %xmm0 18882; SSE2-NEXT: movdqa %xmm0, %xmm1 18883; SSE2-NEXT: psrlw $4, %xmm1 18884; SSE2-NEXT: paddb %xmm0, %xmm1 18885; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 18886; SSE2-NEXT: pxor %xmm0, %xmm0 18887; SSE2-NEXT: psadbw %xmm1, %xmm0 18888; SSE2-NEXT: por {{.*}}(%rip), %xmm0 18889; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 18890; SSE2-NEXT: movdqa %xmm1, %xmm2 18891; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 18892; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18893; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 18894; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18895; SSE2-NEXT: pand %xmm3, %xmm1 18896; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18897; SSE2-NEXT: por %xmm1, %xmm0 18898; SSE2-NEXT: retq 18899; 18900; SSE3-LABEL: ult_7_v2i64: 18901; SSE3: # %bb.0: 18902; SSE3-NEXT: movdqa %xmm0, %xmm1 18903; SSE3-NEXT: psrlw $1, %xmm1 18904; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18905; SSE3-NEXT: psubb %xmm1, %xmm0 18906; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 18907; SSE3-NEXT: movdqa %xmm0, %xmm2 18908; SSE3-NEXT: pand %xmm1, %xmm2 18909; SSE3-NEXT: psrlw $2, %xmm0 18910; SSE3-NEXT: pand %xmm1, %xmm0 18911; SSE3-NEXT: paddb %xmm2, %xmm0 18912; SSE3-NEXT: movdqa %xmm0, %xmm1 18913; SSE3-NEXT: psrlw $4, %xmm1 18914; SSE3-NEXT: paddb %xmm0, %xmm1 18915; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 18916; SSE3-NEXT: pxor %xmm0, %xmm0 18917; SSE3-NEXT: psadbw %xmm1, %xmm0 18918; SSE3-NEXT: por {{.*}}(%rip), %xmm0 18919; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 18920; SSE3-NEXT: movdqa %xmm1, %xmm2 18921; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 18922; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18923; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 18924; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18925; SSE3-NEXT: pand %xmm3, %xmm1 18926; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18927; SSE3-NEXT: por %xmm1, %xmm0 18928; SSE3-NEXT: retq 18929; 18930; SSSE3-LABEL: ult_7_v2i64: 18931; SSSE3: # %bb.0: 18932; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18933; SSSE3-NEXT: movdqa %xmm0, %xmm2 18934; SSSE3-NEXT: pand %xmm1, %xmm2 18935; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18936; SSSE3-NEXT: movdqa %xmm3, %xmm4 18937; SSSE3-NEXT: pshufb %xmm2, %xmm4 18938; SSSE3-NEXT: psrlw $4, %xmm0 18939; SSSE3-NEXT: pand %xmm1, %xmm0 18940; SSSE3-NEXT: pshufb %xmm0, %xmm3 18941; SSSE3-NEXT: paddb %xmm4, %xmm3 18942; SSSE3-NEXT: pxor %xmm0, %xmm0 18943; SSSE3-NEXT: psadbw %xmm3, %xmm0 18944; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 18945; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 18946; SSSE3-NEXT: movdqa %xmm1, %xmm2 18947; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 18948; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18949; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 18950; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18951; SSSE3-NEXT: pand %xmm3, %xmm1 18952; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18953; SSSE3-NEXT: por %xmm1, %xmm0 18954; SSSE3-NEXT: retq 18955; 18956; SSE41-LABEL: ult_7_v2i64: 18957; SSE41: # %bb.0: 18958; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18959; SSE41-NEXT: movdqa %xmm0, %xmm2 18960; SSE41-NEXT: pand %xmm1, %xmm2 18961; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18962; SSE41-NEXT: movdqa %xmm3, %xmm4 18963; SSE41-NEXT: pshufb %xmm2, %xmm4 18964; SSE41-NEXT: psrlw $4, %xmm0 18965; SSE41-NEXT: pand %xmm1, %xmm0 18966; SSE41-NEXT: pshufb %xmm0, %xmm3 18967; SSE41-NEXT: paddb %xmm4, %xmm3 18968; SSE41-NEXT: pxor %xmm0, %xmm0 18969; SSE41-NEXT: psadbw %xmm3, %xmm0 18970; SSE41-NEXT: por {{.*}}(%rip), %xmm0 18971; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 18972; SSE41-NEXT: movdqa %xmm1, %xmm2 18973; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 18974; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 18975; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 18976; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 18977; SSE41-NEXT: pand %xmm3, %xmm1 18978; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 18979; SSE41-NEXT: por %xmm1, %xmm0 18980; SSE41-NEXT: retq 18981; 18982; AVX1-LABEL: ult_7_v2i64: 18983; AVX1: # %bb.0: 18984; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 18985; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 18986; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18987; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 18988; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 18989; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 18990; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 18991; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 18992; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 18993; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 18994; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] 18995; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 18996; AVX1-NEXT: retq 18997; 18998; AVX2-LABEL: ult_7_v2i64: 18999; AVX2: # %bb.0: 19000; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19001; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 19002; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19003; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19004; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 19005; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 19006; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19007; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19008; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 19009; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19010; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] 19011; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19012; AVX2-NEXT: retq 19013; 19014; AVX512VPOPCNTDQ-LABEL: ult_7_v2i64: 19015; AVX512VPOPCNTDQ: # %bb.0: 19016; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19017; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 19018; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] 19019; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19020; AVX512VPOPCNTDQ-NEXT: vzeroupper 19021; AVX512VPOPCNTDQ-NEXT: retq 19022; 19023; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64: 19024; AVX512VPOPCNTDQVL: # %bb.0: 19025; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 19026; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 19027; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19028; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19029; AVX512VPOPCNTDQVL-NEXT: retq 19030; 19031; BITALG_NOVLX-LABEL: ult_7_v2i64: 19032; BITALG_NOVLX: # %bb.0: 19033; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19034; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 19035; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 19036; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19037; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] 19038; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19039; BITALG_NOVLX-NEXT: vzeroupper 19040; BITALG_NOVLX-NEXT: retq 19041; 19042; BITALG-LABEL: ult_7_v2i64: 19043; BITALG: # %bb.0: 19044; BITALG-NEXT: vpopcntb %xmm0, %xmm0 19045; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 19046; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19047; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 19048; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19049; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19050; BITALG-NEXT: retq 19051 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 19052 %3 = icmp ult <2 x i64> %2, <i64 7, i64 7> 19053 %4 = sext <2 x i1> %3 to <2 x i64> 19054 ret <2 x i64> %4 19055} 19056 19057define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { 19058; SSE2-LABEL: ugt_7_v2i64: 19059; SSE2: # %bb.0: 19060; SSE2-NEXT: movdqa %xmm0, %xmm1 19061; SSE2-NEXT: psrlw $1, %xmm1 19062; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19063; SSE2-NEXT: psubb %xmm1, %xmm0 19064; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19065; SSE2-NEXT: movdqa %xmm0, %xmm2 19066; SSE2-NEXT: pand %xmm1, %xmm2 19067; SSE2-NEXT: psrlw $2, %xmm0 19068; SSE2-NEXT: pand %xmm1, %xmm0 19069; SSE2-NEXT: paddb %xmm2, %xmm0 19070; SSE2-NEXT: movdqa %xmm0, %xmm1 19071; SSE2-NEXT: psrlw $4, %xmm1 19072; SSE2-NEXT: paddb %xmm0, %xmm1 19073; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19074; SSE2-NEXT: pxor %xmm0, %xmm0 19075; SSE2-NEXT: psadbw %xmm1, %xmm0 19076; SSE2-NEXT: por {{.*}}(%rip), %xmm0 19077; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 19078; SSE2-NEXT: movdqa %xmm0, %xmm2 19079; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 19080; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19081; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 19082; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19083; SSE2-NEXT: pand %xmm3, %xmm1 19084; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19085; SSE2-NEXT: por %xmm1, %xmm0 19086; SSE2-NEXT: retq 19087; 19088; SSE3-LABEL: ugt_7_v2i64: 19089; SSE3: # %bb.0: 19090; SSE3-NEXT: movdqa %xmm0, %xmm1 19091; SSE3-NEXT: psrlw $1, %xmm1 19092; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19093; SSE3-NEXT: psubb %xmm1, %xmm0 19094; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19095; SSE3-NEXT: movdqa %xmm0, %xmm2 19096; SSE3-NEXT: pand %xmm1, %xmm2 19097; SSE3-NEXT: psrlw $2, %xmm0 19098; SSE3-NEXT: pand %xmm1, %xmm0 19099; SSE3-NEXT: paddb %xmm2, %xmm0 19100; SSE3-NEXT: movdqa %xmm0, %xmm1 19101; SSE3-NEXT: psrlw $4, %xmm1 19102; SSE3-NEXT: paddb %xmm0, %xmm1 19103; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19104; SSE3-NEXT: pxor %xmm0, %xmm0 19105; SSE3-NEXT: psadbw %xmm1, %xmm0 19106; SSE3-NEXT: por {{.*}}(%rip), %xmm0 19107; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 19108; SSE3-NEXT: movdqa %xmm0, %xmm2 19109; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 19110; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19111; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 19112; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19113; SSE3-NEXT: pand %xmm3, %xmm1 19114; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19115; SSE3-NEXT: por %xmm1, %xmm0 19116; SSE3-NEXT: retq 19117; 19118; SSSE3-LABEL: ugt_7_v2i64: 19119; SSSE3: # %bb.0: 19120; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19121; SSSE3-NEXT: movdqa %xmm0, %xmm2 19122; SSSE3-NEXT: pand %xmm1, %xmm2 19123; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19124; SSSE3-NEXT: movdqa %xmm3, %xmm4 19125; SSSE3-NEXT: pshufb %xmm2, %xmm4 19126; SSSE3-NEXT: psrlw $4, %xmm0 19127; SSSE3-NEXT: pand %xmm1, %xmm0 19128; SSSE3-NEXT: pshufb %xmm0, %xmm3 19129; SSSE3-NEXT: paddb %xmm4, %xmm3 19130; SSSE3-NEXT: pxor %xmm0, %xmm0 19131; SSSE3-NEXT: psadbw %xmm3, %xmm0 19132; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 19133; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 19134; SSSE3-NEXT: movdqa %xmm0, %xmm2 19135; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 19136; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19137; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 19138; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19139; SSSE3-NEXT: pand %xmm3, %xmm1 19140; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19141; SSSE3-NEXT: por %xmm1, %xmm0 19142; SSSE3-NEXT: retq 19143; 19144; SSE41-LABEL: ugt_7_v2i64: 19145; SSE41: # %bb.0: 19146; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19147; SSE41-NEXT: movdqa %xmm0, %xmm2 19148; SSE41-NEXT: pand %xmm1, %xmm2 19149; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19150; SSE41-NEXT: movdqa %xmm3, %xmm4 19151; SSE41-NEXT: pshufb %xmm2, %xmm4 19152; SSE41-NEXT: psrlw $4, %xmm0 19153; SSE41-NEXT: pand %xmm1, %xmm0 19154; SSE41-NEXT: pshufb %xmm0, %xmm3 19155; SSE41-NEXT: paddb %xmm4, %xmm3 19156; SSE41-NEXT: pxor %xmm0, %xmm0 19157; SSE41-NEXT: psadbw %xmm3, %xmm0 19158; SSE41-NEXT: por {{.*}}(%rip), %xmm0 19159; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] 19160; SSE41-NEXT: movdqa %xmm0, %xmm2 19161; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 19162; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19163; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 19164; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19165; SSE41-NEXT: pand %xmm3, %xmm1 19166; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19167; SSE41-NEXT: por %xmm1, %xmm0 19168; SSE41-NEXT: retq 19169; 19170; AVX1-LABEL: ugt_7_v2i64: 19171; AVX1: # %bb.0: 19172; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19173; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 19174; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19175; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19176; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 19177; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 19178; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19179; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19180; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19181; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19182; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19183; AVX1-NEXT: retq 19184; 19185; AVX2-LABEL: ugt_7_v2i64: 19186; AVX2: # %bb.0: 19187; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19188; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 19189; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19190; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19191; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 19192; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 19193; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19194; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19195; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 19196; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19197; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19198; AVX2-NEXT: retq 19199; 19200; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64: 19201; AVX512VPOPCNTDQ: # %bb.0: 19202; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19203; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 19204; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19205; AVX512VPOPCNTDQ-NEXT: vzeroupper 19206; AVX512VPOPCNTDQ-NEXT: retq 19207; 19208; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64: 19209; AVX512VPOPCNTDQVL: # %bb.0: 19210; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 19211; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 19212; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19213; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19214; AVX512VPOPCNTDQVL-NEXT: retq 19215; 19216; BITALG_NOVLX-LABEL: ugt_7_v2i64: 19217; BITALG_NOVLX: # %bb.0: 19218; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19219; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 19220; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 19221; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19222; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19223; BITALG_NOVLX-NEXT: vzeroupper 19224; BITALG_NOVLX-NEXT: retq 19225; 19226; BITALG-LABEL: ugt_7_v2i64: 19227; BITALG: # %bb.0: 19228; BITALG-NEXT: vpopcntb %xmm0, %xmm0 19229; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 19230; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19231; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 19232; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19233; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19234; BITALG-NEXT: retq 19235 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 19236 %3 = icmp ugt <2 x i64> %2, <i64 7, i64 7> 19237 %4 = sext <2 x i1> %3 to <2 x i64> 19238 ret <2 x i64> %4 19239} 19240 19241define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { 19242; SSE2-LABEL: ult_8_v2i64: 19243; SSE2: # %bb.0: 19244; SSE2-NEXT: movdqa %xmm0, %xmm1 19245; SSE2-NEXT: psrlw $1, %xmm1 19246; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19247; SSE2-NEXT: psubb %xmm1, %xmm0 19248; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19249; SSE2-NEXT: movdqa %xmm0, %xmm2 19250; SSE2-NEXT: pand %xmm1, %xmm2 19251; SSE2-NEXT: psrlw $2, %xmm0 19252; SSE2-NEXT: pand %xmm1, %xmm0 19253; SSE2-NEXT: paddb %xmm2, %xmm0 19254; SSE2-NEXT: movdqa %xmm0, %xmm1 19255; SSE2-NEXT: psrlw $4, %xmm1 19256; SSE2-NEXT: paddb %xmm0, %xmm1 19257; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19258; SSE2-NEXT: pxor %xmm0, %xmm0 19259; SSE2-NEXT: psadbw %xmm1, %xmm0 19260; SSE2-NEXT: por {{.*}}(%rip), %xmm0 19261; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19262; SSE2-NEXT: movdqa %xmm1, %xmm2 19263; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 19264; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19265; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 19266; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19267; SSE2-NEXT: pand %xmm3, %xmm1 19268; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19269; SSE2-NEXT: por %xmm1, %xmm0 19270; SSE2-NEXT: retq 19271; 19272; SSE3-LABEL: ult_8_v2i64: 19273; SSE3: # %bb.0: 19274; SSE3-NEXT: movdqa %xmm0, %xmm1 19275; SSE3-NEXT: psrlw $1, %xmm1 19276; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19277; SSE3-NEXT: psubb %xmm1, %xmm0 19278; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19279; SSE3-NEXT: movdqa %xmm0, %xmm2 19280; SSE3-NEXT: pand %xmm1, %xmm2 19281; SSE3-NEXT: psrlw $2, %xmm0 19282; SSE3-NEXT: pand %xmm1, %xmm0 19283; SSE3-NEXT: paddb %xmm2, %xmm0 19284; SSE3-NEXT: movdqa %xmm0, %xmm1 19285; SSE3-NEXT: psrlw $4, %xmm1 19286; SSE3-NEXT: paddb %xmm0, %xmm1 19287; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19288; SSE3-NEXT: pxor %xmm0, %xmm0 19289; SSE3-NEXT: psadbw %xmm1, %xmm0 19290; SSE3-NEXT: por {{.*}}(%rip), %xmm0 19291; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19292; SSE3-NEXT: movdqa %xmm1, %xmm2 19293; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 19294; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19295; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 19296; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19297; SSE3-NEXT: pand %xmm3, %xmm1 19298; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19299; SSE3-NEXT: por %xmm1, %xmm0 19300; SSE3-NEXT: retq 19301; 19302; SSSE3-LABEL: ult_8_v2i64: 19303; SSSE3: # %bb.0: 19304; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19305; SSSE3-NEXT: movdqa %xmm0, %xmm2 19306; SSSE3-NEXT: pand %xmm1, %xmm2 19307; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19308; SSSE3-NEXT: movdqa %xmm3, %xmm4 19309; SSSE3-NEXT: pshufb %xmm2, %xmm4 19310; SSSE3-NEXT: psrlw $4, %xmm0 19311; SSSE3-NEXT: pand %xmm1, %xmm0 19312; SSSE3-NEXT: pshufb %xmm0, %xmm3 19313; SSSE3-NEXT: paddb %xmm4, %xmm3 19314; SSSE3-NEXT: pxor %xmm0, %xmm0 19315; SSSE3-NEXT: psadbw %xmm3, %xmm0 19316; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 19317; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19318; SSSE3-NEXT: movdqa %xmm1, %xmm2 19319; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 19320; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19321; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 19322; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19323; SSSE3-NEXT: pand %xmm3, %xmm1 19324; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19325; SSSE3-NEXT: por %xmm1, %xmm0 19326; SSSE3-NEXT: retq 19327; 19328; SSE41-LABEL: ult_8_v2i64: 19329; SSE41: # %bb.0: 19330; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19331; SSE41-NEXT: movdqa %xmm0, %xmm2 19332; SSE41-NEXT: pand %xmm1, %xmm2 19333; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19334; SSE41-NEXT: movdqa %xmm3, %xmm4 19335; SSE41-NEXT: pshufb %xmm2, %xmm4 19336; SSE41-NEXT: psrlw $4, %xmm0 19337; SSE41-NEXT: pand %xmm1, %xmm0 19338; SSE41-NEXT: pshufb %xmm0, %xmm3 19339; SSE41-NEXT: paddb %xmm4, %xmm3 19340; SSE41-NEXT: pxor %xmm0, %xmm0 19341; SSE41-NEXT: psadbw %xmm3, %xmm0 19342; SSE41-NEXT: por {{.*}}(%rip), %xmm0 19343; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19344; SSE41-NEXT: movdqa %xmm1, %xmm2 19345; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 19346; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19347; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 19348; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19349; SSE41-NEXT: pand %xmm3, %xmm1 19350; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19351; SSE41-NEXT: por %xmm1, %xmm0 19352; SSE41-NEXT: retq 19353; 19354; AVX1-LABEL: ult_8_v2i64: 19355; AVX1: # %bb.0: 19356; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19357; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 19358; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19359; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19360; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 19361; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 19362; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19363; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19364; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19365; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19366; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] 19367; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19368; AVX1-NEXT: retq 19369; 19370; AVX2-LABEL: ult_8_v2i64: 19371; AVX2: # %bb.0: 19372; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19373; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 19374; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19375; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19376; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 19377; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 19378; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19379; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19380; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 19381; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19382; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] 19383; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19384; AVX2-NEXT: retq 19385; 19386; AVX512VPOPCNTDQ-LABEL: ult_8_v2i64: 19387; AVX512VPOPCNTDQ: # %bb.0: 19388; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19389; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 19390; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] 19391; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19392; AVX512VPOPCNTDQ-NEXT: vzeroupper 19393; AVX512VPOPCNTDQ-NEXT: retq 19394; 19395; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64: 19396; AVX512VPOPCNTDQVL: # %bb.0: 19397; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 19398; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 19399; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19400; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19401; AVX512VPOPCNTDQVL-NEXT: retq 19402; 19403; BITALG_NOVLX-LABEL: ult_8_v2i64: 19404; BITALG_NOVLX: # %bb.0: 19405; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19406; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 19407; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 19408; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19409; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] 19410; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19411; BITALG_NOVLX-NEXT: vzeroupper 19412; BITALG_NOVLX-NEXT: retq 19413; 19414; BITALG-LABEL: ult_8_v2i64: 19415; BITALG: # %bb.0: 19416; BITALG-NEXT: vpopcntb %xmm0, %xmm0 19417; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 19418; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19419; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 19420; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19421; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19422; BITALG-NEXT: retq 19423 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 19424 %3 = icmp ult <2 x i64> %2, <i64 8, i64 8> 19425 %4 = sext <2 x i1> %3 to <2 x i64> 19426 ret <2 x i64> %4 19427} 19428 19429define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { 19430; SSE2-LABEL: ugt_8_v2i64: 19431; SSE2: # %bb.0: 19432; SSE2-NEXT: movdqa %xmm0, %xmm1 19433; SSE2-NEXT: psrlw $1, %xmm1 19434; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19435; SSE2-NEXT: psubb %xmm1, %xmm0 19436; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19437; SSE2-NEXT: movdqa %xmm0, %xmm2 19438; SSE2-NEXT: pand %xmm1, %xmm2 19439; SSE2-NEXT: psrlw $2, %xmm0 19440; SSE2-NEXT: pand %xmm1, %xmm0 19441; SSE2-NEXT: paddb %xmm2, %xmm0 19442; SSE2-NEXT: movdqa %xmm0, %xmm1 19443; SSE2-NEXT: psrlw $4, %xmm1 19444; SSE2-NEXT: paddb %xmm0, %xmm1 19445; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19446; SSE2-NEXT: pxor %xmm0, %xmm0 19447; SSE2-NEXT: psadbw %xmm1, %xmm0 19448; SSE2-NEXT: por {{.*}}(%rip), %xmm0 19449; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19450; SSE2-NEXT: movdqa %xmm0, %xmm2 19451; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 19452; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19453; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 19454; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19455; SSE2-NEXT: pand %xmm3, %xmm1 19456; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19457; SSE2-NEXT: por %xmm1, %xmm0 19458; SSE2-NEXT: retq 19459; 19460; SSE3-LABEL: ugt_8_v2i64: 19461; SSE3: # %bb.0: 19462; SSE3-NEXT: movdqa %xmm0, %xmm1 19463; SSE3-NEXT: psrlw $1, %xmm1 19464; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19465; SSE3-NEXT: psubb %xmm1, %xmm0 19466; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19467; SSE3-NEXT: movdqa %xmm0, %xmm2 19468; SSE3-NEXT: pand %xmm1, %xmm2 19469; SSE3-NEXT: psrlw $2, %xmm0 19470; SSE3-NEXT: pand %xmm1, %xmm0 19471; SSE3-NEXT: paddb %xmm2, %xmm0 19472; SSE3-NEXT: movdqa %xmm0, %xmm1 19473; SSE3-NEXT: psrlw $4, %xmm1 19474; SSE3-NEXT: paddb %xmm0, %xmm1 19475; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19476; SSE3-NEXT: pxor %xmm0, %xmm0 19477; SSE3-NEXT: psadbw %xmm1, %xmm0 19478; SSE3-NEXT: por {{.*}}(%rip), %xmm0 19479; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19480; SSE3-NEXT: movdqa %xmm0, %xmm2 19481; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 19482; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19483; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 19484; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19485; SSE3-NEXT: pand %xmm3, %xmm1 19486; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19487; SSE3-NEXT: por %xmm1, %xmm0 19488; SSE3-NEXT: retq 19489; 19490; SSSE3-LABEL: ugt_8_v2i64: 19491; SSSE3: # %bb.0: 19492; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19493; SSSE3-NEXT: movdqa %xmm0, %xmm2 19494; SSSE3-NEXT: pand %xmm1, %xmm2 19495; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19496; SSSE3-NEXT: movdqa %xmm3, %xmm4 19497; SSSE3-NEXT: pshufb %xmm2, %xmm4 19498; SSSE3-NEXT: psrlw $4, %xmm0 19499; SSSE3-NEXT: pand %xmm1, %xmm0 19500; SSSE3-NEXT: pshufb %xmm0, %xmm3 19501; SSSE3-NEXT: paddb %xmm4, %xmm3 19502; SSSE3-NEXT: pxor %xmm0, %xmm0 19503; SSSE3-NEXT: psadbw %xmm3, %xmm0 19504; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 19505; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19506; SSSE3-NEXT: movdqa %xmm0, %xmm2 19507; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 19508; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19509; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 19510; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19511; SSSE3-NEXT: pand %xmm3, %xmm1 19512; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19513; SSSE3-NEXT: por %xmm1, %xmm0 19514; SSSE3-NEXT: retq 19515; 19516; SSE41-LABEL: ugt_8_v2i64: 19517; SSE41: # %bb.0: 19518; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19519; SSE41-NEXT: movdqa %xmm0, %xmm2 19520; SSE41-NEXT: pand %xmm1, %xmm2 19521; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19522; SSE41-NEXT: movdqa %xmm3, %xmm4 19523; SSE41-NEXT: pshufb %xmm2, %xmm4 19524; SSE41-NEXT: psrlw $4, %xmm0 19525; SSE41-NEXT: pand %xmm1, %xmm0 19526; SSE41-NEXT: pshufb %xmm0, %xmm3 19527; SSE41-NEXT: paddb %xmm4, %xmm3 19528; SSE41-NEXT: pxor %xmm0, %xmm0 19529; SSE41-NEXT: psadbw %xmm3, %xmm0 19530; SSE41-NEXT: por {{.*}}(%rip), %xmm0 19531; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] 19532; SSE41-NEXT: movdqa %xmm0, %xmm2 19533; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 19534; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19535; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 19536; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19537; SSE41-NEXT: pand %xmm3, %xmm1 19538; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19539; SSE41-NEXT: por %xmm1, %xmm0 19540; SSE41-NEXT: retq 19541; 19542; AVX1-LABEL: ugt_8_v2i64: 19543; AVX1: # %bb.0: 19544; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19545; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 19546; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19547; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19548; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 19549; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 19550; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19551; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19552; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19553; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19554; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19555; AVX1-NEXT: retq 19556; 19557; AVX2-LABEL: ugt_8_v2i64: 19558; AVX2: # %bb.0: 19559; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19560; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 19561; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19562; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19563; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 19564; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 19565; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19566; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19567; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 19568; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19569; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19570; AVX2-NEXT: retq 19571; 19572; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64: 19573; AVX512VPOPCNTDQ: # %bb.0: 19574; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19575; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 19576; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19577; AVX512VPOPCNTDQ-NEXT: vzeroupper 19578; AVX512VPOPCNTDQ-NEXT: retq 19579; 19580; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64: 19581; AVX512VPOPCNTDQVL: # %bb.0: 19582; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 19583; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 19584; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19585; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19586; AVX512VPOPCNTDQVL-NEXT: retq 19587; 19588; BITALG_NOVLX-LABEL: ugt_8_v2i64: 19589; BITALG_NOVLX: # %bb.0: 19590; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19591; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 19592; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 19593; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19594; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19595; BITALG_NOVLX-NEXT: vzeroupper 19596; BITALG_NOVLX-NEXT: retq 19597; 19598; BITALG-LABEL: ugt_8_v2i64: 19599; BITALG: # %bb.0: 19600; BITALG-NEXT: vpopcntb %xmm0, %xmm0 19601; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 19602; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19603; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 19604; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19605; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19606; BITALG-NEXT: retq 19607 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 19608 %3 = icmp ugt <2 x i64> %2, <i64 8, i64 8> 19609 %4 = sext <2 x i1> %3 to <2 x i64> 19610 ret <2 x i64> %4 19611} 19612 19613define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { 19614; SSE2-LABEL: ult_9_v2i64: 19615; SSE2: # %bb.0: 19616; SSE2-NEXT: movdqa %xmm0, %xmm1 19617; SSE2-NEXT: psrlw $1, %xmm1 19618; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19619; SSE2-NEXT: psubb %xmm1, %xmm0 19620; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19621; SSE2-NEXT: movdqa %xmm0, %xmm2 19622; SSE2-NEXT: pand %xmm1, %xmm2 19623; SSE2-NEXT: psrlw $2, %xmm0 19624; SSE2-NEXT: pand %xmm1, %xmm0 19625; SSE2-NEXT: paddb %xmm2, %xmm0 19626; SSE2-NEXT: movdqa %xmm0, %xmm1 19627; SSE2-NEXT: psrlw $4, %xmm1 19628; SSE2-NEXT: paddb %xmm0, %xmm1 19629; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19630; SSE2-NEXT: pxor %xmm0, %xmm0 19631; SSE2-NEXT: psadbw %xmm1, %xmm0 19632; SSE2-NEXT: por {{.*}}(%rip), %xmm0 19633; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19634; SSE2-NEXT: movdqa %xmm1, %xmm2 19635; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 19636; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19637; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 19638; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19639; SSE2-NEXT: pand %xmm3, %xmm1 19640; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19641; SSE2-NEXT: por %xmm1, %xmm0 19642; SSE2-NEXT: retq 19643; 19644; SSE3-LABEL: ult_9_v2i64: 19645; SSE3: # %bb.0: 19646; SSE3-NEXT: movdqa %xmm0, %xmm1 19647; SSE3-NEXT: psrlw $1, %xmm1 19648; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19649; SSE3-NEXT: psubb %xmm1, %xmm0 19650; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19651; SSE3-NEXT: movdqa %xmm0, %xmm2 19652; SSE3-NEXT: pand %xmm1, %xmm2 19653; SSE3-NEXT: psrlw $2, %xmm0 19654; SSE3-NEXT: pand %xmm1, %xmm0 19655; SSE3-NEXT: paddb %xmm2, %xmm0 19656; SSE3-NEXT: movdqa %xmm0, %xmm1 19657; SSE3-NEXT: psrlw $4, %xmm1 19658; SSE3-NEXT: paddb %xmm0, %xmm1 19659; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19660; SSE3-NEXT: pxor %xmm0, %xmm0 19661; SSE3-NEXT: psadbw %xmm1, %xmm0 19662; SSE3-NEXT: por {{.*}}(%rip), %xmm0 19663; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19664; SSE3-NEXT: movdqa %xmm1, %xmm2 19665; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 19666; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19667; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 19668; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19669; SSE3-NEXT: pand %xmm3, %xmm1 19670; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19671; SSE3-NEXT: por %xmm1, %xmm0 19672; SSE3-NEXT: retq 19673; 19674; SSSE3-LABEL: ult_9_v2i64: 19675; SSSE3: # %bb.0: 19676; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19677; SSSE3-NEXT: movdqa %xmm0, %xmm2 19678; SSSE3-NEXT: pand %xmm1, %xmm2 19679; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19680; SSSE3-NEXT: movdqa %xmm3, %xmm4 19681; SSSE3-NEXT: pshufb %xmm2, %xmm4 19682; SSSE3-NEXT: psrlw $4, %xmm0 19683; SSSE3-NEXT: pand %xmm1, %xmm0 19684; SSSE3-NEXT: pshufb %xmm0, %xmm3 19685; SSSE3-NEXT: paddb %xmm4, %xmm3 19686; SSSE3-NEXT: pxor %xmm0, %xmm0 19687; SSSE3-NEXT: psadbw %xmm3, %xmm0 19688; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 19689; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19690; SSSE3-NEXT: movdqa %xmm1, %xmm2 19691; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 19692; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19693; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 19694; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19695; SSSE3-NEXT: pand %xmm3, %xmm1 19696; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19697; SSSE3-NEXT: por %xmm1, %xmm0 19698; SSSE3-NEXT: retq 19699; 19700; SSE41-LABEL: ult_9_v2i64: 19701; SSE41: # %bb.0: 19702; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19703; SSE41-NEXT: movdqa %xmm0, %xmm2 19704; SSE41-NEXT: pand %xmm1, %xmm2 19705; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19706; SSE41-NEXT: movdqa %xmm3, %xmm4 19707; SSE41-NEXT: pshufb %xmm2, %xmm4 19708; SSE41-NEXT: psrlw $4, %xmm0 19709; SSE41-NEXT: pand %xmm1, %xmm0 19710; SSE41-NEXT: pshufb %xmm0, %xmm3 19711; SSE41-NEXT: paddb %xmm4, %xmm3 19712; SSE41-NEXT: pxor %xmm0, %xmm0 19713; SSE41-NEXT: psadbw %xmm3, %xmm0 19714; SSE41-NEXT: por {{.*}}(%rip), %xmm0 19715; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19716; SSE41-NEXT: movdqa %xmm1, %xmm2 19717; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 19718; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19719; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 19720; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19721; SSE41-NEXT: pand %xmm3, %xmm1 19722; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19723; SSE41-NEXT: por %xmm1, %xmm0 19724; SSE41-NEXT: retq 19725; 19726; AVX1-LABEL: ult_9_v2i64: 19727; AVX1: # %bb.0: 19728; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19729; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 19730; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19731; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19732; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 19733; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 19734; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19735; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19736; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19737; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19738; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] 19739; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19740; AVX1-NEXT: retq 19741; 19742; AVX2-LABEL: ult_9_v2i64: 19743; AVX2: # %bb.0: 19744; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19745; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 19746; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19747; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19748; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 19749; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 19750; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19751; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19752; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 19753; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19754; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] 19755; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19756; AVX2-NEXT: retq 19757; 19758; AVX512VPOPCNTDQ-LABEL: ult_9_v2i64: 19759; AVX512VPOPCNTDQ: # %bb.0: 19760; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19761; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 19762; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] 19763; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19764; AVX512VPOPCNTDQ-NEXT: vzeroupper 19765; AVX512VPOPCNTDQ-NEXT: retq 19766; 19767; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64: 19768; AVX512VPOPCNTDQVL: # %bb.0: 19769; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 19770; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 19771; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19772; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19773; AVX512VPOPCNTDQVL-NEXT: retq 19774; 19775; BITALG_NOVLX-LABEL: ult_9_v2i64: 19776; BITALG_NOVLX: # %bb.0: 19777; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19778; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 19779; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 19780; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19781; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] 19782; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 19783; BITALG_NOVLX-NEXT: vzeroupper 19784; BITALG_NOVLX-NEXT: retq 19785; 19786; BITALG-LABEL: ult_9_v2i64: 19787; BITALG: # %bb.0: 19788; BITALG-NEXT: vpopcntb %xmm0, %xmm0 19789; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 19790; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19791; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 19792; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19793; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19794; BITALG-NEXT: retq 19795 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 19796 %3 = icmp ult <2 x i64> %2, <i64 9, i64 9> 19797 %4 = sext <2 x i1> %3 to <2 x i64> 19798 ret <2 x i64> %4 19799} 19800 19801define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { 19802; SSE2-LABEL: ugt_9_v2i64: 19803; SSE2: # %bb.0: 19804; SSE2-NEXT: movdqa %xmm0, %xmm1 19805; SSE2-NEXT: psrlw $1, %xmm1 19806; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19807; SSE2-NEXT: psubb %xmm1, %xmm0 19808; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19809; SSE2-NEXT: movdqa %xmm0, %xmm2 19810; SSE2-NEXT: pand %xmm1, %xmm2 19811; SSE2-NEXT: psrlw $2, %xmm0 19812; SSE2-NEXT: pand %xmm1, %xmm0 19813; SSE2-NEXT: paddb %xmm2, %xmm0 19814; SSE2-NEXT: movdqa %xmm0, %xmm1 19815; SSE2-NEXT: psrlw $4, %xmm1 19816; SSE2-NEXT: paddb %xmm0, %xmm1 19817; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19818; SSE2-NEXT: pxor %xmm0, %xmm0 19819; SSE2-NEXT: psadbw %xmm1, %xmm0 19820; SSE2-NEXT: por {{.*}}(%rip), %xmm0 19821; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19822; SSE2-NEXT: movdqa %xmm0, %xmm2 19823; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 19824; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19825; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 19826; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19827; SSE2-NEXT: pand %xmm3, %xmm1 19828; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19829; SSE2-NEXT: por %xmm1, %xmm0 19830; SSE2-NEXT: retq 19831; 19832; SSE3-LABEL: ugt_9_v2i64: 19833; SSE3: # %bb.0: 19834; SSE3-NEXT: movdqa %xmm0, %xmm1 19835; SSE3-NEXT: psrlw $1, %xmm1 19836; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19837; SSE3-NEXT: psubb %xmm1, %xmm0 19838; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19839; SSE3-NEXT: movdqa %xmm0, %xmm2 19840; SSE3-NEXT: pand %xmm1, %xmm2 19841; SSE3-NEXT: psrlw $2, %xmm0 19842; SSE3-NEXT: pand %xmm1, %xmm0 19843; SSE3-NEXT: paddb %xmm2, %xmm0 19844; SSE3-NEXT: movdqa %xmm0, %xmm1 19845; SSE3-NEXT: psrlw $4, %xmm1 19846; SSE3-NEXT: paddb %xmm0, %xmm1 19847; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 19848; SSE3-NEXT: pxor %xmm0, %xmm0 19849; SSE3-NEXT: psadbw %xmm1, %xmm0 19850; SSE3-NEXT: por {{.*}}(%rip), %xmm0 19851; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19852; SSE3-NEXT: movdqa %xmm0, %xmm2 19853; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 19854; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19855; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 19856; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19857; SSE3-NEXT: pand %xmm3, %xmm1 19858; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19859; SSE3-NEXT: por %xmm1, %xmm0 19860; SSE3-NEXT: retq 19861; 19862; SSSE3-LABEL: ugt_9_v2i64: 19863; SSSE3: # %bb.0: 19864; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19865; SSSE3-NEXT: movdqa %xmm0, %xmm2 19866; SSSE3-NEXT: pand %xmm1, %xmm2 19867; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19868; SSSE3-NEXT: movdqa %xmm3, %xmm4 19869; SSSE3-NEXT: pshufb %xmm2, %xmm4 19870; SSSE3-NEXT: psrlw $4, %xmm0 19871; SSSE3-NEXT: pand %xmm1, %xmm0 19872; SSSE3-NEXT: pshufb %xmm0, %xmm3 19873; SSSE3-NEXT: paddb %xmm4, %xmm3 19874; SSSE3-NEXT: pxor %xmm0, %xmm0 19875; SSSE3-NEXT: psadbw %xmm3, %xmm0 19876; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 19877; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19878; SSSE3-NEXT: movdqa %xmm0, %xmm2 19879; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 19880; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19881; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 19882; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19883; SSSE3-NEXT: pand %xmm3, %xmm1 19884; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19885; SSSE3-NEXT: por %xmm1, %xmm0 19886; SSSE3-NEXT: retq 19887; 19888; SSE41-LABEL: ugt_9_v2i64: 19889; SSE41: # %bb.0: 19890; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19891; SSE41-NEXT: movdqa %xmm0, %xmm2 19892; SSE41-NEXT: pand %xmm1, %xmm2 19893; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19894; SSE41-NEXT: movdqa %xmm3, %xmm4 19895; SSE41-NEXT: pshufb %xmm2, %xmm4 19896; SSE41-NEXT: psrlw $4, %xmm0 19897; SSE41-NEXT: pand %xmm1, %xmm0 19898; SSE41-NEXT: pshufb %xmm0, %xmm3 19899; SSE41-NEXT: paddb %xmm4, %xmm3 19900; SSE41-NEXT: pxor %xmm0, %xmm0 19901; SSE41-NEXT: psadbw %xmm3, %xmm0 19902; SSE41-NEXT: por {{.*}}(%rip), %xmm0 19903; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] 19904; SSE41-NEXT: movdqa %xmm0, %xmm2 19905; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 19906; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 19907; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 19908; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 19909; SSE41-NEXT: pand %xmm3, %xmm1 19910; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 19911; SSE41-NEXT: por %xmm1, %xmm0 19912; SSE41-NEXT: retq 19913; 19914; AVX1-LABEL: ugt_9_v2i64: 19915; AVX1: # %bb.0: 19916; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19917; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 19918; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19919; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19920; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 19921; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 19922; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19923; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19924; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19925; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19926; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19927; AVX1-NEXT: retq 19928; 19929; AVX2-LABEL: ugt_9_v2i64: 19930; AVX2: # %bb.0: 19931; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 19932; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 19933; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 19934; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 19935; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 19936; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 19937; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 19938; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 19939; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 19940; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19941; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19942; AVX2-NEXT: retq 19943; 19944; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64: 19945; AVX512VPOPCNTDQ: # %bb.0: 19946; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19947; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 19948; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19949; AVX512VPOPCNTDQ-NEXT: vzeroupper 19950; AVX512VPOPCNTDQ-NEXT: retq 19951; 19952; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64: 19953; AVX512VPOPCNTDQVL: # %bb.0: 19954; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 19955; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 19956; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19957; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19958; AVX512VPOPCNTDQVL-NEXT: retq 19959; 19960; BITALG_NOVLX-LABEL: ugt_9_v2i64: 19961; BITALG_NOVLX: # %bb.0: 19962; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19963; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 19964; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 19965; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19966; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 19967; BITALG_NOVLX-NEXT: vzeroupper 19968; BITALG_NOVLX-NEXT: retq 19969; 19970; BITALG-LABEL: ugt_9_v2i64: 19971; BITALG: # %bb.0: 19972; BITALG-NEXT: vpopcntb %xmm0, %xmm0 19973; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 19974; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 19975; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 19976; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 19977; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 19978; BITALG-NEXT: retq 19979 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 19980 %3 = icmp ugt <2 x i64> %2, <i64 9, i64 9> 19981 %4 = sext <2 x i1> %3 to <2 x i64> 19982 ret <2 x i64> %4 19983} 19984 19985define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { 19986; SSE2-LABEL: ult_10_v2i64: 19987; SSE2: # %bb.0: 19988; SSE2-NEXT: movdqa %xmm0, %xmm1 19989; SSE2-NEXT: psrlw $1, %xmm1 19990; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19991; SSE2-NEXT: psubb %xmm1, %xmm0 19992; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19993; SSE2-NEXT: movdqa %xmm0, %xmm2 19994; SSE2-NEXT: pand %xmm1, %xmm2 19995; SSE2-NEXT: psrlw $2, %xmm0 19996; SSE2-NEXT: pand %xmm1, %xmm0 19997; SSE2-NEXT: paddb %xmm2, %xmm0 19998; SSE2-NEXT: movdqa %xmm0, %xmm1 19999; SSE2-NEXT: psrlw $4, %xmm1 20000; SSE2-NEXT: paddb %xmm0, %xmm1 20001; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20002; SSE2-NEXT: pxor %xmm0, %xmm0 20003; SSE2-NEXT: psadbw %xmm1, %xmm0 20004; SSE2-NEXT: por {{.*}}(%rip), %xmm0 20005; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20006; SSE2-NEXT: movdqa %xmm1, %xmm2 20007; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 20008; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20009; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 20010; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20011; SSE2-NEXT: pand %xmm3, %xmm1 20012; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20013; SSE2-NEXT: por %xmm1, %xmm0 20014; SSE2-NEXT: retq 20015; 20016; SSE3-LABEL: ult_10_v2i64: 20017; SSE3: # %bb.0: 20018; SSE3-NEXT: movdqa %xmm0, %xmm1 20019; SSE3-NEXT: psrlw $1, %xmm1 20020; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20021; SSE3-NEXT: psubb %xmm1, %xmm0 20022; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20023; SSE3-NEXT: movdqa %xmm0, %xmm2 20024; SSE3-NEXT: pand %xmm1, %xmm2 20025; SSE3-NEXT: psrlw $2, %xmm0 20026; SSE3-NEXT: pand %xmm1, %xmm0 20027; SSE3-NEXT: paddb %xmm2, %xmm0 20028; SSE3-NEXT: movdqa %xmm0, %xmm1 20029; SSE3-NEXT: psrlw $4, %xmm1 20030; SSE3-NEXT: paddb %xmm0, %xmm1 20031; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20032; SSE3-NEXT: pxor %xmm0, %xmm0 20033; SSE3-NEXT: psadbw %xmm1, %xmm0 20034; SSE3-NEXT: por {{.*}}(%rip), %xmm0 20035; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20036; SSE3-NEXT: movdqa %xmm1, %xmm2 20037; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 20038; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20039; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 20040; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20041; SSE3-NEXT: pand %xmm3, %xmm1 20042; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20043; SSE3-NEXT: por %xmm1, %xmm0 20044; SSE3-NEXT: retq 20045; 20046; SSSE3-LABEL: ult_10_v2i64: 20047; SSSE3: # %bb.0: 20048; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20049; SSSE3-NEXT: movdqa %xmm0, %xmm2 20050; SSSE3-NEXT: pand %xmm1, %xmm2 20051; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20052; SSSE3-NEXT: movdqa %xmm3, %xmm4 20053; SSSE3-NEXT: pshufb %xmm2, %xmm4 20054; SSSE3-NEXT: psrlw $4, %xmm0 20055; SSSE3-NEXT: pand %xmm1, %xmm0 20056; SSSE3-NEXT: pshufb %xmm0, %xmm3 20057; SSSE3-NEXT: paddb %xmm4, %xmm3 20058; SSSE3-NEXT: pxor %xmm0, %xmm0 20059; SSSE3-NEXT: psadbw %xmm3, %xmm0 20060; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 20061; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20062; SSSE3-NEXT: movdqa %xmm1, %xmm2 20063; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 20064; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20065; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 20066; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20067; SSSE3-NEXT: pand %xmm3, %xmm1 20068; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20069; SSSE3-NEXT: por %xmm1, %xmm0 20070; SSSE3-NEXT: retq 20071; 20072; SSE41-LABEL: ult_10_v2i64: 20073; SSE41: # %bb.0: 20074; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20075; SSE41-NEXT: movdqa %xmm0, %xmm2 20076; SSE41-NEXT: pand %xmm1, %xmm2 20077; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20078; SSE41-NEXT: movdqa %xmm3, %xmm4 20079; SSE41-NEXT: pshufb %xmm2, %xmm4 20080; SSE41-NEXT: psrlw $4, %xmm0 20081; SSE41-NEXT: pand %xmm1, %xmm0 20082; SSE41-NEXT: pshufb %xmm0, %xmm3 20083; SSE41-NEXT: paddb %xmm4, %xmm3 20084; SSE41-NEXT: pxor %xmm0, %xmm0 20085; SSE41-NEXT: psadbw %xmm3, %xmm0 20086; SSE41-NEXT: por {{.*}}(%rip), %xmm0 20087; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20088; SSE41-NEXT: movdqa %xmm1, %xmm2 20089; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 20090; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20091; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 20092; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20093; SSE41-NEXT: pand %xmm3, %xmm1 20094; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20095; SSE41-NEXT: por %xmm1, %xmm0 20096; SSE41-NEXT: retq 20097; 20098; AVX1-LABEL: ult_10_v2i64: 20099; AVX1: # %bb.0: 20100; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20101; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 20102; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20103; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20104; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 20105; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 20106; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20107; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20108; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 20109; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20110; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] 20111; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20112; AVX1-NEXT: retq 20113; 20114; AVX2-LABEL: ult_10_v2i64: 20115; AVX2: # %bb.0: 20116; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20117; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 20118; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20119; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20120; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 20121; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 20122; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20123; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20124; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 20125; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20126; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] 20127; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20128; AVX2-NEXT: retq 20129; 20130; AVX512VPOPCNTDQ-LABEL: ult_10_v2i64: 20131; AVX512VPOPCNTDQ: # %bb.0: 20132; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20133; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 20134; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] 20135; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20136; AVX512VPOPCNTDQ-NEXT: vzeroupper 20137; AVX512VPOPCNTDQ-NEXT: retq 20138; 20139; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64: 20140; AVX512VPOPCNTDQVL: # %bb.0: 20141; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 20142; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 20143; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20144; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20145; AVX512VPOPCNTDQVL-NEXT: retq 20146; 20147; BITALG_NOVLX-LABEL: ult_10_v2i64: 20148; BITALG_NOVLX: # %bb.0: 20149; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20150; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 20151; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 20152; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20153; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] 20154; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20155; BITALG_NOVLX-NEXT: vzeroupper 20156; BITALG_NOVLX-NEXT: retq 20157; 20158; BITALG-LABEL: ult_10_v2i64: 20159; BITALG: # %bb.0: 20160; BITALG-NEXT: vpopcntb %xmm0, %xmm0 20161; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 20162; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20163; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 20164; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20165; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20166; BITALG-NEXT: retq 20167 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 20168 %3 = icmp ult <2 x i64> %2, <i64 10, i64 10> 20169 %4 = sext <2 x i1> %3 to <2 x i64> 20170 ret <2 x i64> %4 20171} 20172 20173define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { 20174; SSE2-LABEL: ugt_10_v2i64: 20175; SSE2: # %bb.0: 20176; SSE2-NEXT: movdqa %xmm0, %xmm1 20177; SSE2-NEXT: psrlw $1, %xmm1 20178; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20179; SSE2-NEXT: psubb %xmm1, %xmm0 20180; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20181; SSE2-NEXT: movdqa %xmm0, %xmm2 20182; SSE2-NEXT: pand %xmm1, %xmm2 20183; SSE2-NEXT: psrlw $2, %xmm0 20184; SSE2-NEXT: pand %xmm1, %xmm0 20185; SSE2-NEXT: paddb %xmm2, %xmm0 20186; SSE2-NEXT: movdqa %xmm0, %xmm1 20187; SSE2-NEXT: psrlw $4, %xmm1 20188; SSE2-NEXT: paddb %xmm0, %xmm1 20189; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20190; SSE2-NEXT: pxor %xmm0, %xmm0 20191; SSE2-NEXT: psadbw %xmm1, %xmm0 20192; SSE2-NEXT: por {{.*}}(%rip), %xmm0 20193; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20194; SSE2-NEXT: movdqa %xmm0, %xmm2 20195; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 20196; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20197; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 20198; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20199; SSE2-NEXT: pand %xmm3, %xmm1 20200; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20201; SSE2-NEXT: por %xmm1, %xmm0 20202; SSE2-NEXT: retq 20203; 20204; SSE3-LABEL: ugt_10_v2i64: 20205; SSE3: # %bb.0: 20206; SSE3-NEXT: movdqa %xmm0, %xmm1 20207; SSE3-NEXT: psrlw $1, %xmm1 20208; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20209; SSE3-NEXT: psubb %xmm1, %xmm0 20210; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20211; SSE3-NEXT: movdqa %xmm0, %xmm2 20212; SSE3-NEXT: pand %xmm1, %xmm2 20213; SSE3-NEXT: psrlw $2, %xmm0 20214; SSE3-NEXT: pand %xmm1, %xmm0 20215; SSE3-NEXT: paddb %xmm2, %xmm0 20216; SSE3-NEXT: movdqa %xmm0, %xmm1 20217; SSE3-NEXT: psrlw $4, %xmm1 20218; SSE3-NEXT: paddb %xmm0, %xmm1 20219; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20220; SSE3-NEXT: pxor %xmm0, %xmm0 20221; SSE3-NEXT: psadbw %xmm1, %xmm0 20222; SSE3-NEXT: por {{.*}}(%rip), %xmm0 20223; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20224; SSE3-NEXT: movdqa %xmm0, %xmm2 20225; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 20226; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20227; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 20228; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20229; SSE3-NEXT: pand %xmm3, %xmm1 20230; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20231; SSE3-NEXT: por %xmm1, %xmm0 20232; SSE3-NEXT: retq 20233; 20234; SSSE3-LABEL: ugt_10_v2i64: 20235; SSSE3: # %bb.0: 20236; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20237; SSSE3-NEXT: movdqa %xmm0, %xmm2 20238; SSSE3-NEXT: pand %xmm1, %xmm2 20239; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20240; SSSE3-NEXT: movdqa %xmm3, %xmm4 20241; SSSE3-NEXT: pshufb %xmm2, %xmm4 20242; SSSE3-NEXT: psrlw $4, %xmm0 20243; SSSE3-NEXT: pand %xmm1, %xmm0 20244; SSSE3-NEXT: pshufb %xmm0, %xmm3 20245; SSSE3-NEXT: paddb %xmm4, %xmm3 20246; SSSE3-NEXT: pxor %xmm0, %xmm0 20247; SSSE3-NEXT: psadbw %xmm3, %xmm0 20248; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 20249; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20250; SSSE3-NEXT: movdqa %xmm0, %xmm2 20251; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 20252; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20253; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 20254; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20255; SSSE3-NEXT: pand %xmm3, %xmm1 20256; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20257; SSSE3-NEXT: por %xmm1, %xmm0 20258; SSSE3-NEXT: retq 20259; 20260; SSE41-LABEL: ugt_10_v2i64: 20261; SSE41: # %bb.0: 20262; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20263; SSE41-NEXT: movdqa %xmm0, %xmm2 20264; SSE41-NEXT: pand %xmm1, %xmm2 20265; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20266; SSE41-NEXT: movdqa %xmm3, %xmm4 20267; SSE41-NEXT: pshufb %xmm2, %xmm4 20268; SSE41-NEXT: psrlw $4, %xmm0 20269; SSE41-NEXT: pand %xmm1, %xmm0 20270; SSE41-NEXT: pshufb %xmm0, %xmm3 20271; SSE41-NEXT: paddb %xmm4, %xmm3 20272; SSE41-NEXT: pxor %xmm0, %xmm0 20273; SSE41-NEXT: psadbw %xmm3, %xmm0 20274; SSE41-NEXT: por {{.*}}(%rip), %xmm0 20275; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] 20276; SSE41-NEXT: movdqa %xmm0, %xmm2 20277; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 20278; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20279; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 20280; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20281; SSE41-NEXT: pand %xmm3, %xmm1 20282; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20283; SSE41-NEXT: por %xmm1, %xmm0 20284; SSE41-NEXT: retq 20285; 20286; AVX1-LABEL: ugt_10_v2i64: 20287; AVX1: # %bb.0: 20288; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20289; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 20290; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20291; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20292; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 20293; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 20294; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20295; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20296; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 20297; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20298; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20299; AVX1-NEXT: retq 20300; 20301; AVX2-LABEL: ugt_10_v2i64: 20302; AVX2: # %bb.0: 20303; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20304; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 20305; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20306; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20307; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 20308; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 20309; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20310; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20311; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 20312; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20313; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20314; AVX2-NEXT: retq 20315; 20316; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64: 20317; AVX512VPOPCNTDQ: # %bb.0: 20318; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20319; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 20320; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20321; AVX512VPOPCNTDQ-NEXT: vzeroupper 20322; AVX512VPOPCNTDQ-NEXT: retq 20323; 20324; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64: 20325; AVX512VPOPCNTDQVL: # %bb.0: 20326; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 20327; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 20328; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20329; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20330; AVX512VPOPCNTDQVL-NEXT: retq 20331; 20332; BITALG_NOVLX-LABEL: ugt_10_v2i64: 20333; BITALG_NOVLX: # %bb.0: 20334; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20335; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 20336; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 20337; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20338; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20339; BITALG_NOVLX-NEXT: vzeroupper 20340; BITALG_NOVLX-NEXT: retq 20341; 20342; BITALG-LABEL: ugt_10_v2i64: 20343; BITALG: # %bb.0: 20344; BITALG-NEXT: vpopcntb %xmm0, %xmm0 20345; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 20346; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20347; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 20348; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20349; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20350; BITALG-NEXT: retq 20351 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 20352 %3 = icmp ugt <2 x i64> %2, <i64 10, i64 10> 20353 %4 = sext <2 x i1> %3 to <2 x i64> 20354 ret <2 x i64> %4 20355} 20356 20357define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { 20358; SSE2-LABEL: ult_11_v2i64: 20359; SSE2: # %bb.0: 20360; SSE2-NEXT: movdqa %xmm0, %xmm1 20361; SSE2-NEXT: psrlw $1, %xmm1 20362; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20363; SSE2-NEXT: psubb %xmm1, %xmm0 20364; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20365; SSE2-NEXT: movdqa %xmm0, %xmm2 20366; SSE2-NEXT: pand %xmm1, %xmm2 20367; SSE2-NEXT: psrlw $2, %xmm0 20368; SSE2-NEXT: pand %xmm1, %xmm0 20369; SSE2-NEXT: paddb %xmm2, %xmm0 20370; SSE2-NEXT: movdqa %xmm0, %xmm1 20371; SSE2-NEXT: psrlw $4, %xmm1 20372; SSE2-NEXT: paddb %xmm0, %xmm1 20373; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20374; SSE2-NEXT: pxor %xmm0, %xmm0 20375; SSE2-NEXT: psadbw %xmm1, %xmm0 20376; SSE2-NEXT: por {{.*}}(%rip), %xmm0 20377; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20378; SSE2-NEXT: movdqa %xmm1, %xmm2 20379; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 20380; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20381; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 20382; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20383; SSE2-NEXT: pand %xmm3, %xmm1 20384; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20385; SSE2-NEXT: por %xmm1, %xmm0 20386; SSE2-NEXT: retq 20387; 20388; SSE3-LABEL: ult_11_v2i64: 20389; SSE3: # %bb.0: 20390; SSE3-NEXT: movdqa %xmm0, %xmm1 20391; SSE3-NEXT: psrlw $1, %xmm1 20392; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20393; SSE3-NEXT: psubb %xmm1, %xmm0 20394; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20395; SSE3-NEXT: movdqa %xmm0, %xmm2 20396; SSE3-NEXT: pand %xmm1, %xmm2 20397; SSE3-NEXT: psrlw $2, %xmm0 20398; SSE3-NEXT: pand %xmm1, %xmm0 20399; SSE3-NEXT: paddb %xmm2, %xmm0 20400; SSE3-NEXT: movdqa %xmm0, %xmm1 20401; SSE3-NEXT: psrlw $4, %xmm1 20402; SSE3-NEXT: paddb %xmm0, %xmm1 20403; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20404; SSE3-NEXT: pxor %xmm0, %xmm0 20405; SSE3-NEXT: psadbw %xmm1, %xmm0 20406; SSE3-NEXT: por {{.*}}(%rip), %xmm0 20407; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20408; SSE3-NEXT: movdqa %xmm1, %xmm2 20409; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 20410; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20411; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 20412; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20413; SSE3-NEXT: pand %xmm3, %xmm1 20414; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20415; SSE3-NEXT: por %xmm1, %xmm0 20416; SSE3-NEXT: retq 20417; 20418; SSSE3-LABEL: ult_11_v2i64: 20419; SSSE3: # %bb.0: 20420; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20421; SSSE3-NEXT: movdqa %xmm0, %xmm2 20422; SSSE3-NEXT: pand %xmm1, %xmm2 20423; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20424; SSSE3-NEXT: movdqa %xmm3, %xmm4 20425; SSSE3-NEXT: pshufb %xmm2, %xmm4 20426; SSSE3-NEXT: psrlw $4, %xmm0 20427; SSSE3-NEXT: pand %xmm1, %xmm0 20428; SSSE3-NEXT: pshufb %xmm0, %xmm3 20429; SSSE3-NEXT: paddb %xmm4, %xmm3 20430; SSSE3-NEXT: pxor %xmm0, %xmm0 20431; SSSE3-NEXT: psadbw %xmm3, %xmm0 20432; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 20433; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20434; SSSE3-NEXT: movdqa %xmm1, %xmm2 20435; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 20436; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20437; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 20438; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20439; SSSE3-NEXT: pand %xmm3, %xmm1 20440; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20441; SSSE3-NEXT: por %xmm1, %xmm0 20442; SSSE3-NEXT: retq 20443; 20444; SSE41-LABEL: ult_11_v2i64: 20445; SSE41: # %bb.0: 20446; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20447; SSE41-NEXT: movdqa %xmm0, %xmm2 20448; SSE41-NEXT: pand %xmm1, %xmm2 20449; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20450; SSE41-NEXT: movdqa %xmm3, %xmm4 20451; SSE41-NEXT: pshufb %xmm2, %xmm4 20452; SSE41-NEXT: psrlw $4, %xmm0 20453; SSE41-NEXT: pand %xmm1, %xmm0 20454; SSE41-NEXT: pshufb %xmm0, %xmm3 20455; SSE41-NEXT: paddb %xmm4, %xmm3 20456; SSE41-NEXT: pxor %xmm0, %xmm0 20457; SSE41-NEXT: psadbw %xmm3, %xmm0 20458; SSE41-NEXT: por {{.*}}(%rip), %xmm0 20459; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20460; SSE41-NEXT: movdqa %xmm1, %xmm2 20461; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 20462; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20463; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 20464; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20465; SSE41-NEXT: pand %xmm3, %xmm1 20466; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20467; SSE41-NEXT: por %xmm1, %xmm0 20468; SSE41-NEXT: retq 20469; 20470; AVX1-LABEL: ult_11_v2i64: 20471; AVX1: # %bb.0: 20472; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20473; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 20474; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20475; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20476; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 20477; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 20478; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20479; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20480; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 20481; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20482; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] 20483; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20484; AVX1-NEXT: retq 20485; 20486; AVX2-LABEL: ult_11_v2i64: 20487; AVX2: # %bb.0: 20488; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20489; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 20490; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20491; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20492; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 20493; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 20494; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20495; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20496; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 20497; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20498; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] 20499; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20500; AVX2-NEXT: retq 20501; 20502; AVX512VPOPCNTDQ-LABEL: ult_11_v2i64: 20503; AVX512VPOPCNTDQ: # %bb.0: 20504; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20505; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 20506; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] 20507; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20508; AVX512VPOPCNTDQ-NEXT: vzeroupper 20509; AVX512VPOPCNTDQ-NEXT: retq 20510; 20511; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64: 20512; AVX512VPOPCNTDQVL: # %bb.0: 20513; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 20514; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 20515; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20516; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20517; AVX512VPOPCNTDQVL-NEXT: retq 20518; 20519; BITALG_NOVLX-LABEL: ult_11_v2i64: 20520; BITALG_NOVLX: # %bb.0: 20521; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20522; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 20523; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 20524; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20525; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] 20526; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20527; BITALG_NOVLX-NEXT: vzeroupper 20528; BITALG_NOVLX-NEXT: retq 20529; 20530; BITALG-LABEL: ult_11_v2i64: 20531; BITALG: # %bb.0: 20532; BITALG-NEXT: vpopcntb %xmm0, %xmm0 20533; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 20534; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20535; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 20536; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20537; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20538; BITALG-NEXT: retq 20539 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 20540 %3 = icmp ult <2 x i64> %2, <i64 11, i64 11> 20541 %4 = sext <2 x i1> %3 to <2 x i64> 20542 ret <2 x i64> %4 20543} 20544 20545define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { 20546; SSE2-LABEL: ugt_11_v2i64: 20547; SSE2: # %bb.0: 20548; SSE2-NEXT: movdqa %xmm0, %xmm1 20549; SSE2-NEXT: psrlw $1, %xmm1 20550; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20551; SSE2-NEXT: psubb %xmm1, %xmm0 20552; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20553; SSE2-NEXT: movdqa %xmm0, %xmm2 20554; SSE2-NEXT: pand %xmm1, %xmm2 20555; SSE2-NEXT: psrlw $2, %xmm0 20556; SSE2-NEXT: pand %xmm1, %xmm0 20557; SSE2-NEXT: paddb %xmm2, %xmm0 20558; SSE2-NEXT: movdqa %xmm0, %xmm1 20559; SSE2-NEXT: psrlw $4, %xmm1 20560; SSE2-NEXT: paddb %xmm0, %xmm1 20561; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20562; SSE2-NEXT: pxor %xmm0, %xmm0 20563; SSE2-NEXT: psadbw %xmm1, %xmm0 20564; SSE2-NEXT: por {{.*}}(%rip), %xmm0 20565; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20566; SSE2-NEXT: movdqa %xmm0, %xmm2 20567; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 20568; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20569; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 20570; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20571; SSE2-NEXT: pand %xmm3, %xmm1 20572; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20573; SSE2-NEXT: por %xmm1, %xmm0 20574; SSE2-NEXT: retq 20575; 20576; SSE3-LABEL: ugt_11_v2i64: 20577; SSE3: # %bb.0: 20578; SSE3-NEXT: movdqa %xmm0, %xmm1 20579; SSE3-NEXT: psrlw $1, %xmm1 20580; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20581; SSE3-NEXT: psubb %xmm1, %xmm0 20582; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20583; SSE3-NEXT: movdqa %xmm0, %xmm2 20584; SSE3-NEXT: pand %xmm1, %xmm2 20585; SSE3-NEXT: psrlw $2, %xmm0 20586; SSE3-NEXT: pand %xmm1, %xmm0 20587; SSE3-NEXT: paddb %xmm2, %xmm0 20588; SSE3-NEXT: movdqa %xmm0, %xmm1 20589; SSE3-NEXT: psrlw $4, %xmm1 20590; SSE3-NEXT: paddb %xmm0, %xmm1 20591; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20592; SSE3-NEXT: pxor %xmm0, %xmm0 20593; SSE3-NEXT: psadbw %xmm1, %xmm0 20594; SSE3-NEXT: por {{.*}}(%rip), %xmm0 20595; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20596; SSE3-NEXT: movdqa %xmm0, %xmm2 20597; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 20598; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20599; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 20600; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20601; SSE3-NEXT: pand %xmm3, %xmm1 20602; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20603; SSE3-NEXT: por %xmm1, %xmm0 20604; SSE3-NEXT: retq 20605; 20606; SSSE3-LABEL: ugt_11_v2i64: 20607; SSSE3: # %bb.0: 20608; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20609; SSSE3-NEXT: movdqa %xmm0, %xmm2 20610; SSSE3-NEXT: pand %xmm1, %xmm2 20611; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20612; SSSE3-NEXT: movdqa %xmm3, %xmm4 20613; SSSE3-NEXT: pshufb %xmm2, %xmm4 20614; SSSE3-NEXT: psrlw $4, %xmm0 20615; SSSE3-NEXT: pand %xmm1, %xmm0 20616; SSSE3-NEXT: pshufb %xmm0, %xmm3 20617; SSSE3-NEXT: paddb %xmm4, %xmm3 20618; SSSE3-NEXT: pxor %xmm0, %xmm0 20619; SSSE3-NEXT: psadbw %xmm3, %xmm0 20620; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 20621; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20622; SSSE3-NEXT: movdqa %xmm0, %xmm2 20623; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 20624; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20625; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 20626; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20627; SSSE3-NEXT: pand %xmm3, %xmm1 20628; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20629; SSSE3-NEXT: por %xmm1, %xmm0 20630; SSSE3-NEXT: retq 20631; 20632; SSE41-LABEL: ugt_11_v2i64: 20633; SSE41: # %bb.0: 20634; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20635; SSE41-NEXT: movdqa %xmm0, %xmm2 20636; SSE41-NEXT: pand %xmm1, %xmm2 20637; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20638; SSE41-NEXT: movdqa %xmm3, %xmm4 20639; SSE41-NEXT: pshufb %xmm2, %xmm4 20640; SSE41-NEXT: psrlw $4, %xmm0 20641; SSE41-NEXT: pand %xmm1, %xmm0 20642; SSE41-NEXT: pshufb %xmm0, %xmm3 20643; SSE41-NEXT: paddb %xmm4, %xmm3 20644; SSE41-NEXT: pxor %xmm0, %xmm0 20645; SSE41-NEXT: psadbw %xmm3, %xmm0 20646; SSE41-NEXT: por {{.*}}(%rip), %xmm0 20647; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] 20648; SSE41-NEXT: movdqa %xmm0, %xmm2 20649; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 20650; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20651; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 20652; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20653; SSE41-NEXT: pand %xmm3, %xmm1 20654; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20655; SSE41-NEXT: por %xmm1, %xmm0 20656; SSE41-NEXT: retq 20657; 20658; AVX1-LABEL: ugt_11_v2i64: 20659; AVX1: # %bb.0: 20660; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20661; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 20662; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20663; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20664; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 20665; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 20666; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20667; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20668; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 20669; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20670; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20671; AVX1-NEXT: retq 20672; 20673; AVX2-LABEL: ugt_11_v2i64: 20674; AVX2: # %bb.0: 20675; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20676; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 20677; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20678; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20679; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 20680; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 20681; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20682; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20683; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 20684; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20685; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20686; AVX2-NEXT: retq 20687; 20688; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64: 20689; AVX512VPOPCNTDQ: # %bb.0: 20690; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20691; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 20692; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20693; AVX512VPOPCNTDQ-NEXT: vzeroupper 20694; AVX512VPOPCNTDQ-NEXT: retq 20695; 20696; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64: 20697; AVX512VPOPCNTDQVL: # %bb.0: 20698; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 20699; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 20700; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20701; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20702; AVX512VPOPCNTDQVL-NEXT: retq 20703; 20704; BITALG_NOVLX-LABEL: ugt_11_v2i64: 20705; BITALG_NOVLX: # %bb.0: 20706; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20707; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 20708; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 20709; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20710; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 20711; BITALG_NOVLX-NEXT: vzeroupper 20712; BITALG_NOVLX-NEXT: retq 20713; 20714; BITALG-LABEL: ugt_11_v2i64: 20715; BITALG: # %bb.0: 20716; BITALG-NEXT: vpopcntb %xmm0, %xmm0 20717; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 20718; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20719; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 20720; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20721; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20722; BITALG-NEXT: retq 20723 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 20724 %3 = icmp ugt <2 x i64> %2, <i64 11, i64 11> 20725 %4 = sext <2 x i1> %3 to <2 x i64> 20726 ret <2 x i64> %4 20727} 20728 20729define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { 20730; SSE2-LABEL: ult_12_v2i64: 20731; SSE2: # %bb.0: 20732; SSE2-NEXT: movdqa %xmm0, %xmm1 20733; SSE2-NEXT: psrlw $1, %xmm1 20734; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20735; SSE2-NEXT: psubb %xmm1, %xmm0 20736; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20737; SSE2-NEXT: movdqa %xmm0, %xmm2 20738; SSE2-NEXT: pand %xmm1, %xmm2 20739; SSE2-NEXT: psrlw $2, %xmm0 20740; SSE2-NEXT: pand %xmm1, %xmm0 20741; SSE2-NEXT: paddb %xmm2, %xmm0 20742; SSE2-NEXT: movdqa %xmm0, %xmm1 20743; SSE2-NEXT: psrlw $4, %xmm1 20744; SSE2-NEXT: paddb %xmm0, %xmm1 20745; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20746; SSE2-NEXT: pxor %xmm0, %xmm0 20747; SSE2-NEXT: psadbw %xmm1, %xmm0 20748; SSE2-NEXT: por {{.*}}(%rip), %xmm0 20749; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 20750; SSE2-NEXT: movdqa %xmm1, %xmm2 20751; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 20752; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20753; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 20754; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20755; SSE2-NEXT: pand %xmm3, %xmm1 20756; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20757; SSE2-NEXT: por %xmm1, %xmm0 20758; SSE2-NEXT: retq 20759; 20760; SSE3-LABEL: ult_12_v2i64: 20761; SSE3: # %bb.0: 20762; SSE3-NEXT: movdqa %xmm0, %xmm1 20763; SSE3-NEXT: psrlw $1, %xmm1 20764; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20765; SSE3-NEXT: psubb %xmm1, %xmm0 20766; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20767; SSE3-NEXT: movdqa %xmm0, %xmm2 20768; SSE3-NEXT: pand %xmm1, %xmm2 20769; SSE3-NEXT: psrlw $2, %xmm0 20770; SSE3-NEXT: pand %xmm1, %xmm0 20771; SSE3-NEXT: paddb %xmm2, %xmm0 20772; SSE3-NEXT: movdqa %xmm0, %xmm1 20773; SSE3-NEXT: psrlw $4, %xmm1 20774; SSE3-NEXT: paddb %xmm0, %xmm1 20775; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20776; SSE3-NEXT: pxor %xmm0, %xmm0 20777; SSE3-NEXT: psadbw %xmm1, %xmm0 20778; SSE3-NEXT: por {{.*}}(%rip), %xmm0 20779; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 20780; SSE3-NEXT: movdqa %xmm1, %xmm2 20781; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 20782; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20783; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 20784; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20785; SSE3-NEXT: pand %xmm3, %xmm1 20786; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20787; SSE3-NEXT: por %xmm1, %xmm0 20788; SSE3-NEXT: retq 20789; 20790; SSSE3-LABEL: ult_12_v2i64: 20791; SSSE3: # %bb.0: 20792; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20793; SSSE3-NEXT: movdqa %xmm0, %xmm2 20794; SSSE3-NEXT: pand %xmm1, %xmm2 20795; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20796; SSSE3-NEXT: movdqa %xmm3, %xmm4 20797; SSSE3-NEXT: pshufb %xmm2, %xmm4 20798; SSSE3-NEXT: psrlw $4, %xmm0 20799; SSSE3-NEXT: pand %xmm1, %xmm0 20800; SSSE3-NEXT: pshufb %xmm0, %xmm3 20801; SSSE3-NEXT: paddb %xmm4, %xmm3 20802; SSSE3-NEXT: pxor %xmm0, %xmm0 20803; SSSE3-NEXT: psadbw %xmm3, %xmm0 20804; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 20805; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 20806; SSSE3-NEXT: movdqa %xmm1, %xmm2 20807; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 20808; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20809; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 20810; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20811; SSSE3-NEXT: pand %xmm3, %xmm1 20812; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20813; SSSE3-NEXT: por %xmm1, %xmm0 20814; SSSE3-NEXT: retq 20815; 20816; SSE41-LABEL: ult_12_v2i64: 20817; SSE41: # %bb.0: 20818; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20819; SSE41-NEXT: movdqa %xmm0, %xmm2 20820; SSE41-NEXT: pand %xmm1, %xmm2 20821; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20822; SSE41-NEXT: movdqa %xmm3, %xmm4 20823; SSE41-NEXT: pshufb %xmm2, %xmm4 20824; SSE41-NEXT: psrlw $4, %xmm0 20825; SSE41-NEXT: pand %xmm1, %xmm0 20826; SSE41-NEXT: pshufb %xmm0, %xmm3 20827; SSE41-NEXT: paddb %xmm4, %xmm3 20828; SSE41-NEXT: pxor %xmm0, %xmm0 20829; SSE41-NEXT: psadbw %xmm3, %xmm0 20830; SSE41-NEXT: por {{.*}}(%rip), %xmm0 20831; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 20832; SSE41-NEXT: movdqa %xmm1, %xmm2 20833; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 20834; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20835; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 20836; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20837; SSE41-NEXT: pand %xmm3, %xmm1 20838; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20839; SSE41-NEXT: por %xmm1, %xmm0 20840; SSE41-NEXT: retq 20841; 20842; AVX1-LABEL: ult_12_v2i64: 20843; AVX1: # %bb.0: 20844; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20845; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 20846; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20847; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20848; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 20849; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 20850; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20851; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20852; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 20853; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20854; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] 20855; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20856; AVX1-NEXT: retq 20857; 20858; AVX2-LABEL: ult_12_v2i64: 20859; AVX2: # %bb.0: 20860; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20861; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 20862; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20863; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 20864; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 20865; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 20866; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 20867; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 20868; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 20869; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20870; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] 20871; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20872; AVX2-NEXT: retq 20873; 20874; AVX512VPOPCNTDQ-LABEL: ult_12_v2i64: 20875; AVX512VPOPCNTDQ: # %bb.0: 20876; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20877; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 20878; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] 20879; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20880; AVX512VPOPCNTDQ-NEXT: vzeroupper 20881; AVX512VPOPCNTDQ-NEXT: retq 20882; 20883; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64: 20884; AVX512VPOPCNTDQVL: # %bb.0: 20885; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 20886; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 20887; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20888; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20889; AVX512VPOPCNTDQVL-NEXT: retq 20890; 20891; BITALG_NOVLX-LABEL: ult_12_v2i64: 20892; BITALG_NOVLX: # %bb.0: 20893; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20894; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 20895; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 20896; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20897; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] 20898; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 20899; BITALG_NOVLX-NEXT: vzeroupper 20900; BITALG_NOVLX-NEXT: retq 20901; 20902; BITALG-LABEL: ult_12_v2i64: 20903; BITALG: # %bb.0: 20904; BITALG-NEXT: vpopcntb %xmm0, %xmm0 20905; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 20906; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 20907; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 20908; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 20909; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 20910; BITALG-NEXT: retq 20911 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 20912 %3 = icmp ult <2 x i64> %2, <i64 12, i64 12> 20913 %4 = sext <2 x i1> %3 to <2 x i64> 20914 ret <2 x i64> %4 20915} 20916 20917define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { 20918; SSE2-LABEL: ugt_12_v2i64: 20919; SSE2: # %bb.0: 20920; SSE2-NEXT: movdqa %xmm0, %xmm1 20921; SSE2-NEXT: psrlw $1, %xmm1 20922; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20923; SSE2-NEXT: psubb %xmm1, %xmm0 20924; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20925; SSE2-NEXT: movdqa %xmm0, %xmm2 20926; SSE2-NEXT: pand %xmm1, %xmm2 20927; SSE2-NEXT: psrlw $2, %xmm0 20928; SSE2-NEXT: pand %xmm1, %xmm0 20929; SSE2-NEXT: paddb %xmm2, %xmm0 20930; SSE2-NEXT: movdqa %xmm0, %xmm1 20931; SSE2-NEXT: psrlw $4, %xmm1 20932; SSE2-NEXT: paddb %xmm0, %xmm1 20933; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 20934; SSE2-NEXT: pxor %xmm0, %xmm0 20935; SSE2-NEXT: psadbw %xmm1, %xmm0 20936; SSE2-NEXT: por {{.*}}(%rip), %xmm0 20937; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 20938; SSE2-NEXT: movdqa %xmm0, %xmm2 20939; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 20940; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20941; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 20942; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20943; SSE2-NEXT: pand %xmm3, %xmm1 20944; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20945; SSE2-NEXT: por %xmm1, %xmm0 20946; SSE2-NEXT: retq 20947; 20948; SSE3-LABEL: ugt_12_v2i64: 20949; SSE3: # %bb.0: 20950; SSE3-NEXT: movdqa %xmm0, %xmm1 20951; SSE3-NEXT: psrlw $1, %xmm1 20952; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20953; SSE3-NEXT: psubb %xmm1, %xmm0 20954; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 20955; SSE3-NEXT: movdqa %xmm0, %xmm2 20956; SSE3-NEXT: pand %xmm1, %xmm2 20957; SSE3-NEXT: psrlw $2, %xmm0 20958; SSE3-NEXT: pand %xmm1, %xmm0 20959; SSE3-NEXT: paddb %xmm2, %xmm0 20960; SSE3-NEXT: movdqa %xmm0, %xmm1 20961; SSE3-NEXT: psrlw $4, %xmm1 20962; SSE3-NEXT: paddb %xmm0, %xmm1 20963; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 20964; SSE3-NEXT: pxor %xmm0, %xmm0 20965; SSE3-NEXT: psadbw %xmm1, %xmm0 20966; SSE3-NEXT: por {{.*}}(%rip), %xmm0 20967; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 20968; SSE3-NEXT: movdqa %xmm0, %xmm2 20969; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 20970; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20971; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 20972; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20973; SSE3-NEXT: pand %xmm3, %xmm1 20974; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 20975; SSE3-NEXT: por %xmm1, %xmm0 20976; SSE3-NEXT: retq 20977; 20978; SSSE3-LABEL: ugt_12_v2i64: 20979; SSSE3: # %bb.0: 20980; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 20981; SSSE3-NEXT: movdqa %xmm0, %xmm2 20982; SSSE3-NEXT: pand %xmm1, %xmm2 20983; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 20984; SSSE3-NEXT: movdqa %xmm3, %xmm4 20985; SSSE3-NEXT: pshufb %xmm2, %xmm4 20986; SSSE3-NEXT: psrlw $4, %xmm0 20987; SSSE3-NEXT: pand %xmm1, %xmm0 20988; SSSE3-NEXT: pshufb %xmm0, %xmm3 20989; SSSE3-NEXT: paddb %xmm4, %xmm3 20990; SSSE3-NEXT: pxor %xmm0, %xmm0 20991; SSSE3-NEXT: psadbw %xmm3, %xmm0 20992; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 20993; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 20994; SSSE3-NEXT: movdqa %xmm0, %xmm2 20995; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 20996; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 20997; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 20998; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 20999; SSSE3-NEXT: pand %xmm3, %xmm1 21000; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21001; SSSE3-NEXT: por %xmm1, %xmm0 21002; SSSE3-NEXT: retq 21003; 21004; SSE41-LABEL: ugt_12_v2i64: 21005; SSE41: # %bb.0: 21006; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21007; SSE41-NEXT: movdqa %xmm0, %xmm2 21008; SSE41-NEXT: pand %xmm1, %xmm2 21009; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21010; SSE41-NEXT: movdqa %xmm3, %xmm4 21011; SSE41-NEXT: pshufb %xmm2, %xmm4 21012; SSE41-NEXT: psrlw $4, %xmm0 21013; SSE41-NEXT: pand %xmm1, %xmm0 21014; SSE41-NEXT: pshufb %xmm0, %xmm3 21015; SSE41-NEXT: paddb %xmm4, %xmm3 21016; SSE41-NEXT: pxor %xmm0, %xmm0 21017; SSE41-NEXT: psadbw %xmm3, %xmm0 21018; SSE41-NEXT: por {{.*}}(%rip), %xmm0 21019; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] 21020; SSE41-NEXT: movdqa %xmm0, %xmm2 21021; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 21022; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21023; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 21024; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21025; SSE41-NEXT: pand %xmm3, %xmm1 21026; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21027; SSE41-NEXT: por %xmm1, %xmm0 21028; SSE41-NEXT: retq 21029; 21030; AVX1-LABEL: ugt_12_v2i64: 21031; AVX1: # %bb.0: 21032; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21033; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 21034; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21035; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21036; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 21037; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 21038; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21039; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21040; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 21041; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21042; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21043; AVX1-NEXT: retq 21044; 21045; AVX2-LABEL: ugt_12_v2i64: 21046; AVX2: # %bb.0: 21047; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21048; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 21049; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21050; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21051; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 21052; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 21053; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21054; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21055; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 21056; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21057; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21058; AVX2-NEXT: retq 21059; 21060; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64: 21061; AVX512VPOPCNTDQ: # %bb.0: 21062; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21063; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 21064; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21065; AVX512VPOPCNTDQ-NEXT: vzeroupper 21066; AVX512VPOPCNTDQ-NEXT: retq 21067; 21068; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64: 21069; AVX512VPOPCNTDQVL: # %bb.0: 21070; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 21071; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 21072; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21073; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21074; AVX512VPOPCNTDQVL-NEXT: retq 21075; 21076; BITALG_NOVLX-LABEL: ugt_12_v2i64: 21077; BITALG_NOVLX: # %bb.0: 21078; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21079; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 21080; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 21081; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21082; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21083; BITALG_NOVLX-NEXT: vzeroupper 21084; BITALG_NOVLX-NEXT: retq 21085; 21086; BITALG-LABEL: ugt_12_v2i64: 21087; BITALG: # %bb.0: 21088; BITALG-NEXT: vpopcntb %xmm0, %xmm0 21089; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 21090; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21091; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 21092; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21093; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21094; BITALG-NEXT: retq 21095 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 21096 %3 = icmp ugt <2 x i64> %2, <i64 12, i64 12> 21097 %4 = sext <2 x i1> %3 to <2 x i64> 21098 ret <2 x i64> %4 21099} 21100 21101define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { 21102; SSE2-LABEL: ult_13_v2i64: 21103; SSE2: # %bb.0: 21104; SSE2-NEXT: movdqa %xmm0, %xmm1 21105; SSE2-NEXT: psrlw $1, %xmm1 21106; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21107; SSE2-NEXT: psubb %xmm1, %xmm0 21108; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21109; SSE2-NEXT: movdqa %xmm0, %xmm2 21110; SSE2-NEXT: pand %xmm1, %xmm2 21111; SSE2-NEXT: psrlw $2, %xmm0 21112; SSE2-NEXT: pand %xmm1, %xmm0 21113; SSE2-NEXT: paddb %xmm2, %xmm0 21114; SSE2-NEXT: movdqa %xmm0, %xmm1 21115; SSE2-NEXT: psrlw $4, %xmm1 21116; SSE2-NEXT: paddb %xmm0, %xmm1 21117; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21118; SSE2-NEXT: pxor %xmm0, %xmm0 21119; SSE2-NEXT: psadbw %xmm1, %xmm0 21120; SSE2-NEXT: por {{.*}}(%rip), %xmm0 21121; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21122; SSE2-NEXT: movdqa %xmm1, %xmm2 21123; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 21124; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21125; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 21126; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21127; SSE2-NEXT: pand %xmm3, %xmm1 21128; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21129; SSE2-NEXT: por %xmm1, %xmm0 21130; SSE2-NEXT: retq 21131; 21132; SSE3-LABEL: ult_13_v2i64: 21133; SSE3: # %bb.0: 21134; SSE3-NEXT: movdqa %xmm0, %xmm1 21135; SSE3-NEXT: psrlw $1, %xmm1 21136; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21137; SSE3-NEXT: psubb %xmm1, %xmm0 21138; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21139; SSE3-NEXT: movdqa %xmm0, %xmm2 21140; SSE3-NEXT: pand %xmm1, %xmm2 21141; SSE3-NEXT: psrlw $2, %xmm0 21142; SSE3-NEXT: pand %xmm1, %xmm0 21143; SSE3-NEXT: paddb %xmm2, %xmm0 21144; SSE3-NEXT: movdqa %xmm0, %xmm1 21145; SSE3-NEXT: psrlw $4, %xmm1 21146; SSE3-NEXT: paddb %xmm0, %xmm1 21147; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21148; SSE3-NEXT: pxor %xmm0, %xmm0 21149; SSE3-NEXT: psadbw %xmm1, %xmm0 21150; SSE3-NEXT: por {{.*}}(%rip), %xmm0 21151; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21152; SSE3-NEXT: movdqa %xmm1, %xmm2 21153; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 21154; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21155; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 21156; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21157; SSE3-NEXT: pand %xmm3, %xmm1 21158; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21159; SSE3-NEXT: por %xmm1, %xmm0 21160; SSE3-NEXT: retq 21161; 21162; SSSE3-LABEL: ult_13_v2i64: 21163; SSSE3: # %bb.0: 21164; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21165; SSSE3-NEXT: movdqa %xmm0, %xmm2 21166; SSSE3-NEXT: pand %xmm1, %xmm2 21167; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21168; SSSE3-NEXT: movdqa %xmm3, %xmm4 21169; SSSE3-NEXT: pshufb %xmm2, %xmm4 21170; SSSE3-NEXT: psrlw $4, %xmm0 21171; SSSE3-NEXT: pand %xmm1, %xmm0 21172; SSSE3-NEXT: pshufb %xmm0, %xmm3 21173; SSSE3-NEXT: paddb %xmm4, %xmm3 21174; SSSE3-NEXT: pxor %xmm0, %xmm0 21175; SSSE3-NEXT: psadbw %xmm3, %xmm0 21176; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 21177; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21178; SSSE3-NEXT: movdqa %xmm1, %xmm2 21179; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 21180; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21181; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 21182; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21183; SSSE3-NEXT: pand %xmm3, %xmm1 21184; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21185; SSSE3-NEXT: por %xmm1, %xmm0 21186; SSSE3-NEXT: retq 21187; 21188; SSE41-LABEL: ult_13_v2i64: 21189; SSE41: # %bb.0: 21190; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21191; SSE41-NEXT: movdqa %xmm0, %xmm2 21192; SSE41-NEXT: pand %xmm1, %xmm2 21193; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21194; SSE41-NEXT: movdqa %xmm3, %xmm4 21195; SSE41-NEXT: pshufb %xmm2, %xmm4 21196; SSE41-NEXT: psrlw $4, %xmm0 21197; SSE41-NEXT: pand %xmm1, %xmm0 21198; SSE41-NEXT: pshufb %xmm0, %xmm3 21199; SSE41-NEXT: paddb %xmm4, %xmm3 21200; SSE41-NEXT: pxor %xmm0, %xmm0 21201; SSE41-NEXT: psadbw %xmm3, %xmm0 21202; SSE41-NEXT: por {{.*}}(%rip), %xmm0 21203; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21204; SSE41-NEXT: movdqa %xmm1, %xmm2 21205; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 21206; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21207; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 21208; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21209; SSE41-NEXT: pand %xmm3, %xmm1 21210; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21211; SSE41-NEXT: por %xmm1, %xmm0 21212; SSE41-NEXT: retq 21213; 21214; AVX1-LABEL: ult_13_v2i64: 21215; AVX1: # %bb.0: 21216; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21217; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 21218; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21219; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21220; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 21221; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 21222; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21223; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21224; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 21225; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21226; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] 21227; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21228; AVX1-NEXT: retq 21229; 21230; AVX2-LABEL: ult_13_v2i64: 21231; AVX2: # %bb.0: 21232; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21233; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 21234; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21235; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21236; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 21237; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 21238; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21239; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21240; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 21241; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21242; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] 21243; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21244; AVX2-NEXT: retq 21245; 21246; AVX512VPOPCNTDQ-LABEL: ult_13_v2i64: 21247; AVX512VPOPCNTDQ: # %bb.0: 21248; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21249; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 21250; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] 21251; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21252; AVX512VPOPCNTDQ-NEXT: vzeroupper 21253; AVX512VPOPCNTDQ-NEXT: retq 21254; 21255; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64: 21256; AVX512VPOPCNTDQVL: # %bb.0: 21257; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 21258; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 21259; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21260; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21261; AVX512VPOPCNTDQVL-NEXT: retq 21262; 21263; BITALG_NOVLX-LABEL: ult_13_v2i64: 21264; BITALG_NOVLX: # %bb.0: 21265; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21266; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 21267; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 21268; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21269; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] 21270; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21271; BITALG_NOVLX-NEXT: vzeroupper 21272; BITALG_NOVLX-NEXT: retq 21273; 21274; BITALG-LABEL: ult_13_v2i64: 21275; BITALG: # %bb.0: 21276; BITALG-NEXT: vpopcntb %xmm0, %xmm0 21277; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 21278; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21279; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 21280; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21281; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21282; BITALG-NEXT: retq 21283 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 21284 %3 = icmp ult <2 x i64> %2, <i64 13, i64 13> 21285 %4 = sext <2 x i1> %3 to <2 x i64> 21286 ret <2 x i64> %4 21287} 21288 21289define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { 21290; SSE2-LABEL: ugt_13_v2i64: 21291; SSE2: # %bb.0: 21292; SSE2-NEXT: movdqa %xmm0, %xmm1 21293; SSE2-NEXT: psrlw $1, %xmm1 21294; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21295; SSE2-NEXT: psubb %xmm1, %xmm0 21296; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21297; SSE2-NEXT: movdqa %xmm0, %xmm2 21298; SSE2-NEXT: pand %xmm1, %xmm2 21299; SSE2-NEXT: psrlw $2, %xmm0 21300; SSE2-NEXT: pand %xmm1, %xmm0 21301; SSE2-NEXT: paddb %xmm2, %xmm0 21302; SSE2-NEXT: movdqa %xmm0, %xmm1 21303; SSE2-NEXT: psrlw $4, %xmm1 21304; SSE2-NEXT: paddb %xmm0, %xmm1 21305; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21306; SSE2-NEXT: pxor %xmm0, %xmm0 21307; SSE2-NEXT: psadbw %xmm1, %xmm0 21308; SSE2-NEXT: por {{.*}}(%rip), %xmm0 21309; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21310; SSE2-NEXT: movdqa %xmm0, %xmm2 21311; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 21312; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21313; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 21314; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21315; SSE2-NEXT: pand %xmm3, %xmm1 21316; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21317; SSE2-NEXT: por %xmm1, %xmm0 21318; SSE2-NEXT: retq 21319; 21320; SSE3-LABEL: ugt_13_v2i64: 21321; SSE3: # %bb.0: 21322; SSE3-NEXT: movdqa %xmm0, %xmm1 21323; SSE3-NEXT: psrlw $1, %xmm1 21324; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21325; SSE3-NEXT: psubb %xmm1, %xmm0 21326; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21327; SSE3-NEXT: movdqa %xmm0, %xmm2 21328; SSE3-NEXT: pand %xmm1, %xmm2 21329; SSE3-NEXT: psrlw $2, %xmm0 21330; SSE3-NEXT: pand %xmm1, %xmm0 21331; SSE3-NEXT: paddb %xmm2, %xmm0 21332; SSE3-NEXT: movdqa %xmm0, %xmm1 21333; SSE3-NEXT: psrlw $4, %xmm1 21334; SSE3-NEXT: paddb %xmm0, %xmm1 21335; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21336; SSE3-NEXT: pxor %xmm0, %xmm0 21337; SSE3-NEXT: psadbw %xmm1, %xmm0 21338; SSE3-NEXT: por {{.*}}(%rip), %xmm0 21339; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21340; SSE3-NEXT: movdqa %xmm0, %xmm2 21341; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 21342; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21343; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 21344; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21345; SSE3-NEXT: pand %xmm3, %xmm1 21346; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21347; SSE3-NEXT: por %xmm1, %xmm0 21348; SSE3-NEXT: retq 21349; 21350; SSSE3-LABEL: ugt_13_v2i64: 21351; SSSE3: # %bb.0: 21352; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21353; SSSE3-NEXT: movdqa %xmm0, %xmm2 21354; SSSE3-NEXT: pand %xmm1, %xmm2 21355; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21356; SSSE3-NEXT: movdqa %xmm3, %xmm4 21357; SSSE3-NEXT: pshufb %xmm2, %xmm4 21358; SSSE3-NEXT: psrlw $4, %xmm0 21359; SSSE3-NEXT: pand %xmm1, %xmm0 21360; SSSE3-NEXT: pshufb %xmm0, %xmm3 21361; SSSE3-NEXT: paddb %xmm4, %xmm3 21362; SSSE3-NEXT: pxor %xmm0, %xmm0 21363; SSSE3-NEXT: psadbw %xmm3, %xmm0 21364; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 21365; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21366; SSSE3-NEXT: movdqa %xmm0, %xmm2 21367; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 21368; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21369; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 21370; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21371; SSSE3-NEXT: pand %xmm3, %xmm1 21372; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21373; SSSE3-NEXT: por %xmm1, %xmm0 21374; SSSE3-NEXT: retq 21375; 21376; SSE41-LABEL: ugt_13_v2i64: 21377; SSE41: # %bb.0: 21378; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21379; SSE41-NEXT: movdqa %xmm0, %xmm2 21380; SSE41-NEXT: pand %xmm1, %xmm2 21381; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21382; SSE41-NEXT: movdqa %xmm3, %xmm4 21383; SSE41-NEXT: pshufb %xmm2, %xmm4 21384; SSE41-NEXT: psrlw $4, %xmm0 21385; SSE41-NEXT: pand %xmm1, %xmm0 21386; SSE41-NEXT: pshufb %xmm0, %xmm3 21387; SSE41-NEXT: paddb %xmm4, %xmm3 21388; SSE41-NEXT: pxor %xmm0, %xmm0 21389; SSE41-NEXT: psadbw %xmm3, %xmm0 21390; SSE41-NEXT: por {{.*}}(%rip), %xmm0 21391; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] 21392; SSE41-NEXT: movdqa %xmm0, %xmm2 21393; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 21394; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21395; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 21396; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21397; SSE41-NEXT: pand %xmm3, %xmm1 21398; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21399; SSE41-NEXT: por %xmm1, %xmm0 21400; SSE41-NEXT: retq 21401; 21402; AVX1-LABEL: ugt_13_v2i64: 21403; AVX1: # %bb.0: 21404; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21405; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 21406; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21407; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21408; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 21409; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 21410; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21411; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21412; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 21413; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21414; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21415; AVX1-NEXT: retq 21416; 21417; AVX2-LABEL: ugt_13_v2i64: 21418; AVX2: # %bb.0: 21419; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21420; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 21421; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21422; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21423; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 21424; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 21425; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21426; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21427; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 21428; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21429; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21430; AVX2-NEXT: retq 21431; 21432; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64: 21433; AVX512VPOPCNTDQ: # %bb.0: 21434; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21435; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 21436; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21437; AVX512VPOPCNTDQ-NEXT: vzeroupper 21438; AVX512VPOPCNTDQ-NEXT: retq 21439; 21440; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64: 21441; AVX512VPOPCNTDQVL: # %bb.0: 21442; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 21443; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 21444; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21445; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21446; AVX512VPOPCNTDQVL-NEXT: retq 21447; 21448; BITALG_NOVLX-LABEL: ugt_13_v2i64: 21449; BITALG_NOVLX: # %bb.0: 21450; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21451; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 21452; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 21453; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21454; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21455; BITALG_NOVLX-NEXT: vzeroupper 21456; BITALG_NOVLX-NEXT: retq 21457; 21458; BITALG-LABEL: ugt_13_v2i64: 21459; BITALG: # %bb.0: 21460; BITALG-NEXT: vpopcntb %xmm0, %xmm0 21461; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 21462; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21463; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 21464; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21465; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21466; BITALG-NEXT: retq 21467 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 21468 %3 = icmp ugt <2 x i64> %2, <i64 13, i64 13> 21469 %4 = sext <2 x i1> %3 to <2 x i64> 21470 ret <2 x i64> %4 21471} 21472 21473define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { 21474; SSE2-LABEL: ult_14_v2i64: 21475; SSE2: # %bb.0: 21476; SSE2-NEXT: movdqa %xmm0, %xmm1 21477; SSE2-NEXT: psrlw $1, %xmm1 21478; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21479; SSE2-NEXT: psubb %xmm1, %xmm0 21480; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21481; SSE2-NEXT: movdqa %xmm0, %xmm2 21482; SSE2-NEXT: pand %xmm1, %xmm2 21483; SSE2-NEXT: psrlw $2, %xmm0 21484; SSE2-NEXT: pand %xmm1, %xmm0 21485; SSE2-NEXT: paddb %xmm2, %xmm0 21486; SSE2-NEXT: movdqa %xmm0, %xmm1 21487; SSE2-NEXT: psrlw $4, %xmm1 21488; SSE2-NEXT: paddb %xmm0, %xmm1 21489; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21490; SSE2-NEXT: pxor %xmm0, %xmm0 21491; SSE2-NEXT: psadbw %xmm1, %xmm0 21492; SSE2-NEXT: por {{.*}}(%rip), %xmm0 21493; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21494; SSE2-NEXT: movdqa %xmm1, %xmm2 21495; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 21496; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21497; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 21498; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21499; SSE2-NEXT: pand %xmm3, %xmm1 21500; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21501; SSE2-NEXT: por %xmm1, %xmm0 21502; SSE2-NEXT: retq 21503; 21504; SSE3-LABEL: ult_14_v2i64: 21505; SSE3: # %bb.0: 21506; SSE3-NEXT: movdqa %xmm0, %xmm1 21507; SSE3-NEXT: psrlw $1, %xmm1 21508; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21509; SSE3-NEXT: psubb %xmm1, %xmm0 21510; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21511; SSE3-NEXT: movdqa %xmm0, %xmm2 21512; SSE3-NEXT: pand %xmm1, %xmm2 21513; SSE3-NEXT: psrlw $2, %xmm0 21514; SSE3-NEXT: pand %xmm1, %xmm0 21515; SSE3-NEXT: paddb %xmm2, %xmm0 21516; SSE3-NEXT: movdqa %xmm0, %xmm1 21517; SSE3-NEXT: psrlw $4, %xmm1 21518; SSE3-NEXT: paddb %xmm0, %xmm1 21519; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21520; SSE3-NEXT: pxor %xmm0, %xmm0 21521; SSE3-NEXT: psadbw %xmm1, %xmm0 21522; SSE3-NEXT: por {{.*}}(%rip), %xmm0 21523; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21524; SSE3-NEXT: movdqa %xmm1, %xmm2 21525; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 21526; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21527; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 21528; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21529; SSE3-NEXT: pand %xmm3, %xmm1 21530; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21531; SSE3-NEXT: por %xmm1, %xmm0 21532; SSE3-NEXT: retq 21533; 21534; SSSE3-LABEL: ult_14_v2i64: 21535; SSSE3: # %bb.0: 21536; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21537; SSSE3-NEXT: movdqa %xmm0, %xmm2 21538; SSSE3-NEXT: pand %xmm1, %xmm2 21539; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21540; SSSE3-NEXT: movdqa %xmm3, %xmm4 21541; SSSE3-NEXT: pshufb %xmm2, %xmm4 21542; SSSE3-NEXT: psrlw $4, %xmm0 21543; SSSE3-NEXT: pand %xmm1, %xmm0 21544; SSSE3-NEXT: pshufb %xmm0, %xmm3 21545; SSSE3-NEXT: paddb %xmm4, %xmm3 21546; SSSE3-NEXT: pxor %xmm0, %xmm0 21547; SSSE3-NEXT: psadbw %xmm3, %xmm0 21548; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 21549; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21550; SSSE3-NEXT: movdqa %xmm1, %xmm2 21551; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 21552; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21553; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 21554; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21555; SSSE3-NEXT: pand %xmm3, %xmm1 21556; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21557; SSSE3-NEXT: por %xmm1, %xmm0 21558; SSSE3-NEXT: retq 21559; 21560; SSE41-LABEL: ult_14_v2i64: 21561; SSE41: # %bb.0: 21562; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21563; SSE41-NEXT: movdqa %xmm0, %xmm2 21564; SSE41-NEXT: pand %xmm1, %xmm2 21565; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21566; SSE41-NEXT: movdqa %xmm3, %xmm4 21567; SSE41-NEXT: pshufb %xmm2, %xmm4 21568; SSE41-NEXT: psrlw $4, %xmm0 21569; SSE41-NEXT: pand %xmm1, %xmm0 21570; SSE41-NEXT: pshufb %xmm0, %xmm3 21571; SSE41-NEXT: paddb %xmm4, %xmm3 21572; SSE41-NEXT: pxor %xmm0, %xmm0 21573; SSE41-NEXT: psadbw %xmm3, %xmm0 21574; SSE41-NEXT: por {{.*}}(%rip), %xmm0 21575; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21576; SSE41-NEXT: movdqa %xmm1, %xmm2 21577; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 21578; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21579; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 21580; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21581; SSE41-NEXT: pand %xmm3, %xmm1 21582; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21583; SSE41-NEXT: por %xmm1, %xmm0 21584; SSE41-NEXT: retq 21585; 21586; AVX1-LABEL: ult_14_v2i64: 21587; AVX1: # %bb.0: 21588; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21589; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 21590; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21591; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21592; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 21593; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 21594; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21595; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21596; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 21597; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21598; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] 21599; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21600; AVX1-NEXT: retq 21601; 21602; AVX2-LABEL: ult_14_v2i64: 21603; AVX2: # %bb.0: 21604; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21605; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 21606; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21607; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21608; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 21609; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 21610; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21611; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21612; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 21613; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21614; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] 21615; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21616; AVX2-NEXT: retq 21617; 21618; AVX512VPOPCNTDQ-LABEL: ult_14_v2i64: 21619; AVX512VPOPCNTDQ: # %bb.0: 21620; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21621; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 21622; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] 21623; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21624; AVX512VPOPCNTDQ-NEXT: vzeroupper 21625; AVX512VPOPCNTDQ-NEXT: retq 21626; 21627; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64: 21628; AVX512VPOPCNTDQVL: # %bb.0: 21629; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 21630; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 21631; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21632; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21633; AVX512VPOPCNTDQVL-NEXT: retq 21634; 21635; BITALG_NOVLX-LABEL: ult_14_v2i64: 21636; BITALG_NOVLX: # %bb.0: 21637; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21638; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 21639; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 21640; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21641; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] 21642; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21643; BITALG_NOVLX-NEXT: vzeroupper 21644; BITALG_NOVLX-NEXT: retq 21645; 21646; BITALG-LABEL: ult_14_v2i64: 21647; BITALG: # %bb.0: 21648; BITALG-NEXT: vpopcntb %xmm0, %xmm0 21649; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 21650; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21651; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 21652; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21653; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21654; BITALG-NEXT: retq 21655 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 21656 %3 = icmp ult <2 x i64> %2, <i64 14, i64 14> 21657 %4 = sext <2 x i1> %3 to <2 x i64> 21658 ret <2 x i64> %4 21659} 21660 21661define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { 21662; SSE2-LABEL: ugt_14_v2i64: 21663; SSE2: # %bb.0: 21664; SSE2-NEXT: movdqa %xmm0, %xmm1 21665; SSE2-NEXT: psrlw $1, %xmm1 21666; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21667; SSE2-NEXT: psubb %xmm1, %xmm0 21668; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21669; SSE2-NEXT: movdqa %xmm0, %xmm2 21670; SSE2-NEXT: pand %xmm1, %xmm2 21671; SSE2-NEXT: psrlw $2, %xmm0 21672; SSE2-NEXT: pand %xmm1, %xmm0 21673; SSE2-NEXT: paddb %xmm2, %xmm0 21674; SSE2-NEXT: movdqa %xmm0, %xmm1 21675; SSE2-NEXT: psrlw $4, %xmm1 21676; SSE2-NEXT: paddb %xmm0, %xmm1 21677; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21678; SSE2-NEXT: pxor %xmm0, %xmm0 21679; SSE2-NEXT: psadbw %xmm1, %xmm0 21680; SSE2-NEXT: por {{.*}}(%rip), %xmm0 21681; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21682; SSE2-NEXT: movdqa %xmm0, %xmm2 21683; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 21684; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21685; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 21686; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21687; SSE2-NEXT: pand %xmm3, %xmm1 21688; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21689; SSE2-NEXT: por %xmm1, %xmm0 21690; SSE2-NEXT: retq 21691; 21692; SSE3-LABEL: ugt_14_v2i64: 21693; SSE3: # %bb.0: 21694; SSE3-NEXT: movdqa %xmm0, %xmm1 21695; SSE3-NEXT: psrlw $1, %xmm1 21696; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21697; SSE3-NEXT: psubb %xmm1, %xmm0 21698; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21699; SSE3-NEXT: movdqa %xmm0, %xmm2 21700; SSE3-NEXT: pand %xmm1, %xmm2 21701; SSE3-NEXT: psrlw $2, %xmm0 21702; SSE3-NEXT: pand %xmm1, %xmm0 21703; SSE3-NEXT: paddb %xmm2, %xmm0 21704; SSE3-NEXT: movdqa %xmm0, %xmm1 21705; SSE3-NEXT: psrlw $4, %xmm1 21706; SSE3-NEXT: paddb %xmm0, %xmm1 21707; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21708; SSE3-NEXT: pxor %xmm0, %xmm0 21709; SSE3-NEXT: psadbw %xmm1, %xmm0 21710; SSE3-NEXT: por {{.*}}(%rip), %xmm0 21711; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21712; SSE3-NEXT: movdqa %xmm0, %xmm2 21713; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 21714; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21715; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 21716; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21717; SSE3-NEXT: pand %xmm3, %xmm1 21718; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21719; SSE3-NEXT: por %xmm1, %xmm0 21720; SSE3-NEXT: retq 21721; 21722; SSSE3-LABEL: ugt_14_v2i64: 21723; SSSE3: # %bb.0: 21724; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21725; SSSE3-NEXT: movdqa %xmm0, %xmm2 21726; SSSE3-NEXT: pand %xmm1, %xmm2 21727; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21728; SSSE3-NEXT: movdqa %xmm3, %xmm4 21729; SSSE3-NEXT: pshufb %xmm2, %xmm4 21730; SSSE3-NEXT: psrlw $4, %xmm0 21731; SSSE3-NEXT: pand %xmm1, %xmm0 21732; SSSE3-NEXT: pshufb %xmm0, %xmm3 21733; SSSE3-NEXT: paddb %xmm4, %xmm3 21734; SSSE3-NEXT: pxor %xmm0, %xmm0 21735; SSSE3-NEXT: psadbw %xmm3, %xmm0 21736; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 21737; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21738; SSSE3-NEXT: movdqa %xmm0, %xmm2 21739; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 21740; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21741; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 21742; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21743; SSSE3-NEXT: pand %xmm3, %xmm1 21744; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21745; SSSE3-NEXT: por %xmm1, %xmm0 21746; SSSE3-NEXT: retq 21747; 21748; SSE41-LABEL: ugt_14_v2i64: 21749; SSE41: # %bb.0: 21750; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21751; SSE41-NEXT: movdqa %xmm0, %xmm2 21752; SSE41-NEXT: pand %xmm1, %xmm2 21753; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21754; SSE41-NEXT: movdqa %xmm3, %xmm4 21755; SSE41-NEXT: pshufb %xmm2, %xmm4 21756; SSE41-NEXT: psrlw $4, %xmm0 21757; SSE41-NEXT: pand %xmm1, %xmm0 21758; SSE41-NEXT: pshufb %xmm0, %xmm3 21759; SSE41-NEXT: paddb %xmm4, %xmm3 21760; SSE41-NEXT: pxor %xmm0, %xmm0 21761; SSE41-NEXT: psadbw %xmm3, %xmm0 21762; SSE41-NEXT: por {{.*}}(%rip), %xmm0 21763; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] 21764; SSE41-NEXT: movdqa %xmm0, %xmm2 21765; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 21766; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21767; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 21768; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21769; SSE41-NEXT: pand %xmm3, %xmm1 21770; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21771; SSE41-NEXT: por %xmm1, %xmm0 21772; SSE41-NEXT: retq 21773; 21774; AVX1-LABEL: ugt_14_v2i64: 21775; AVX1: # %bb.0: 21776; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21777; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 21778; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21779; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21780; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 21781; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 21782; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21783; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21784; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 21785; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21786; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21787; AVX1-NEXT: retq 21788; 21789; AVX2-LABEL: ugt_14_v2i64: 21790; AVX2: # %bb.0: 21791; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21792; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 21793; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21794; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21795; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 21796; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 21797; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21798; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21799; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 21800; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21801; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21802; AVX2-NEXT: retq 21803; 21804; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64: 21805; AVX512VPOPCNTDQ: # %bb.0: 21806; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21807; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 21808; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21809; AVX512VPOPCNTDQ-NEXT: vzeroupper 21810; AVX512VPOPCNTDQ-NEXT: retq 21811; 21812; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64: 21813; AVX512VPOPCNTDQVL: # %bb.0: 21814; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 21815; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 21816; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21817; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21818; AVX512VPOPCNTDQVL-NEXT: retq 21819; 21820; BITALG_NOVLX-LABEL: ugt_14_v2i64: 21821; BITALG_NOVLX: # %bb.0: 21822; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21823; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 21824; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 21825; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21826; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 21827; BITALG_NOVLX-NEXT: vzeroupper 21828; BITALG_NOVLX-NEXT: retq 21829; 21830; BITALG-LABEL: ugt_14_v2i64: 21831; BITALG: # %bb.0: 21832; BITALG-NEXT: vpopcntb %xmm0, %xmm0 21833; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 21834; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21835; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 21836; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 21837; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 21838; BITALG-NEXT: retq 21839 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 21840 %3 = icmp ugt <2 x i64> %2, <i64 14, i64 14> 21841 %4 = sext <2 x i1> %3 to <2 x i64> 21842 ret <2 x i64> %4 21843} 21844 21845define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { 21846; SSE2-LABEL: ult_15_v2i64: 21847; SSE2: # %bb.0: 21848; SSE2-NEXT: movdqa %xmm0, %xmm1 21849; SSE2-NEXT: psrlw $1, %xmm1 21850; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21851; SSE2-NEXT: psubb %xmm1, %xmm0 21852; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21853; SSE2-NEXT: movdqa %xmm0, %xmm2 21854; SSE2-NEXT: pand %xmm1, %xmm2 21855; SSE2-NEXT: psrlw $2, %xmm0 21856; SSE2-NEXT: pand %xmm1, %xmm0 21857; SSE2-NEXT: paddb %xmm2, %xmm0 21858; SSE2-NEXT: movdqa %xmm0, %xmm1 21859; SSE2-NEXT: psrlw $4, %xmm1 21860; SSE2-NEXT: paddb %xmm0, %xmm1 21861; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 21862; SSE2-NEXT: pxor %xmm0, %xmm0 21863; SSE2-NEXT: psadbw %xmm1, %xmm0 21864; SSE2-NEXT: por {{.*}}(%rip), %xmm0 21865; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 21866; SSE2-NEXT: movdqa %xmm1, %xmm2 21867; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 21868; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21869; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 21870; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21871; SSE2-NEXT: pand %xmm3, %xmm1 21872; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21873; SSE2-NEXT: por %xmm1, %xmm0 21874; SSE2-NEXT: retq 21875; 21876; SSE3-LABEL: ult_15_v2i64: 21877; SSE3: # %bb.0: 21878; SSE3-NEXT: movdqa %xmm0, %xmm1 21879; SSE3-NEXT: psrlw $1, %xmm1 21880; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21881; SSE3-NEXT: psubb %xmm1, %xmm0 21882; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21883; SSE3-NEXT: movdqa %xmm0, %xmm2 21884; SSE3-NEXT: pand %xmm1, %xmm2 21885; SSE3-NEXT: psrlw $2, %xmm0 21886; SSE3-NEXT: pand %xmm1, %xmm0 21887; SSE3-NEXT: paddb %xmm2, %xmm0 21888; SSE3-NEXT: movdqa %xmm0, %xmm1 21889; SSE3-NEXT: psrlw $4, %xmm1 21890; SSE3-NEXT: paddb %xmm0, %xmm1 21891; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 21892; SSE3-NEXT: pxor %xmm0, %xmm0 21893; SSE3-NEXT: psadbw %xmm1, %xmm0 21894; SSE3-NEXT: por {{.*}}(%rip), %xmm0 21895; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 21896; SSE3-NEXT: movdqa %xmm1, %xmm2 21897; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 21898; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21899; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 21900; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21901; SSE3-NEXT: pand %xmm3, %xmm1 21902; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21903; SSE3-NEXT: por %xmm1, %xmm0 21904; SSE3-NEXT: retq 21905; 21906; SSSE3-LABEL: ult_15_v2i64: 21907; SSSE3: # %bb.0: 21908; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21909; SSSE3-NEXT: movdqa %xmm0, %xmm2 21910; SSSE3-NEXT: pand %xmm1, %xmm2 21911; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21912; SSSE3-NEXT: movdqa %xmm3, %xmm4 21913; SSSE3-NEXT: pshufb %xmm2, %xmm4 21914; SSSE3-NEXT: psrlw $4, %xmm0 21915; SSSE3-NEXT: pand %xmm1, %xmm0 21916; SSSE3-NEXT: pshufb %xmm0, %xmm3 21917; SSSE3-NEXT: paddb %xmm4, %xmm3 21918; SSSE3-NEXT: pxor %xmm0, %xmm0 21919; SSSE3-NEXT: psadbw %xmm3, %xmm0 21920; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 21921; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 21922; SSSE3-NEXT: movdqa %xmm1, %xmm2 21923; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 21924; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21925; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 21926; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21927; SSSE3-NEXT: pand %xmm3, %xmm1 21928; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21929; SSSE3-NEXT: por %xmm1, %xmm0 21930; SSSE3-NEXT: retq 21931; 21932; SSE41-LABEL: ult_15_v2i64: 21933; SSE41: # %bb.0: 21934; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21935; SSE41-NEXT: movdqa %xmm0, %xmm2 21936; SSE41-NEXT: pand %xmm1, %xmm2 21937; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21938; SSE41-NEXT: movdqa %xmm3, %xmm4 21939; SSE41-NEXT: pshufb %xmm2, %xmm4 21940; SSE41-NEXT: psrlw $4, %xmm0 21941; SSE41-NEXT: pand %xmm1, %xmm0 21942; SSE41-NEXT: pshufb %xmm0, %xmm3 21943; SSE41-NEXT: paddb %xmm4, %xmm3 21944; SSE41-NEXT: pxor %xmm0, %xmm0 21945; SSE41-NEXT: psadbw %xmm3, %xmm0 21946; SSE41-NEXT: por {{.*}}(%rip), %xmm0 21947; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 21948; SSE41-NEXT: movdqa %xmm1, %xmm2 21949; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 21950; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 21951; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 21952; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 21953; SSE41-NEXT: pand %xmm3, %xmm1 21954; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 21955; SSE41-NEXT: por %xmm1, %xmm0 21956; SSE41-NEXT: retq 21957; 21958; AVX1-LABEL: ult_15_v2i64: 21959; AVX1: # %bb.0: 21960; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21961; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 21962; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21963; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21964; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 21965; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 21966; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21967; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21968; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 21969; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21970; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] 21971; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21972; AVX1-NEXT: retq 21973; 21974; AVX2-LABEL: ult_15_v2i64: 21975; AVX2: # %bb.0: 21976; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 21977; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 21978; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 21979; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 21980; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 21981; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 21982; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 21983; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 21984; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 21985; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 21986; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] 21987; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21988; AVX2-NEXT: retq 21989; 21990; AVX512VPOPCNTDQ-LABEL: ult_15_v2i64: 21991; AVX512VPOPCNTDQ: # %bb.0: 21992; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21993; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 21994; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] 21995; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 21996; AVX512VPOPCNTDQ-NEXT: vzeroupper 21997; AVX512VPOPCNTDQ-NEXT: retq 21998; 21999; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64: 22000; AVX512VPOPCNTDQVL: # %bb.0: 22001; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 22002; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 22003; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22004; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22005; AVX512VPOPCNTDQVL-NEXT: retq 22006; 22007; BITALG_NOVLX-LABEL: ult_15_v2i64: 22008; BITALG_NOVLX: # %bb.0: 22009; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22010; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 22011; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 22012; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22013; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] 22014; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22015; BITALG_NOVLX-NEXT: vzeroupper 22016; BITALG_NOVLX-NEXT: retq 22017; 22018; BITALG-LABEL: ult_15_v2i64: 22019; BITALG: # %bb.0: 22020; BITALG-NEXT: vpopcntb %xmm0, %xmm0 22021; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 22022; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22023; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 22024; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22025; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22026; BITALG-NEXT: retq 22027 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 22028 %3 = icmp ult <2 x i64> %2, <i64 15, i64 15> 22029 %4 = sext <2 x i1> %3 to <2 x i64> 22030 ret <2 x i64> %4 22031} 22032 22033define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { 22034; SSE2-LABEL: ugt_15_v2i64: 22035; SSE2: # %bb.0: 22036; SSE2-NEXT: movdqa %xmm0, %xmm1 22037; SSE2-NEXT: psrlw $1, %xmm1 22038; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22039; SSE2-NEXT: psubb %xmm1, %xmm0 22040; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22041; SSE2-NEXT: movdqa %xmm0, %xmm2 22042; SSE2-NEXT: pand %xmm1, %xmm2 22043; SSE2-NEXT: psrlw $2, %xmm0 22044; SSE2-NEXT: pand %xmm1, %xmm0 22045; SSE2-NEXT: paddb %xmm2, %xmm0 22046; SSE2-NEXT: movdqa %xmm0, %xmm1 22047; SSE2-NEXT: psrlw $4, %xmm1 22048; SSE2-NEXT: paddb %xmm0, %xmm1 22049; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22050; SSE2-NEXT: pxor %xmm0, %xmm0 22051; SSE2-NEXT: psadbw %xmm1, %xmm0 22052; SSE2-NEXT: por {{.*}}(%rip), %xmm0 22053; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 22054; SSE2-NEXT: movdqa %xmm0, %xmm2 22055; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 22056; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22057; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 22058; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22059; SSE2-NEXT: pand %xmm3, %xmm1 22060; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22061; SSE2-NEXT: por %xmm1, %xmm0 22062; SSE2-NEXT: retq 22063; 22064; SSE3-LABEL: ugt_15_v2i64: 22065; SSE3: # %bb.0: 22066; SSE3-NEXT: movdqa %xmm0, %xmm1 22067; SSE3-NEXT: psrlw $1, %xmm1 22068; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22069; SSE3-NEXT: psubb %xmm1, %xmm0 22070; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22071; SSE3-NEXT: movdqa %xmm0, %xmm2 22072; SSE3-NEXT: pand %xmm1, %xmm2 22073; SSE3-NEXT: psrlw $2, %xmm0 22074; SSE3-NEXT: pand %xmm1, %xmm0 22075; SSE3-NEXT: paddb %xmm2, %xmm0 22076; SSE3-NEXT: movdqa %xmm0, %xmm1 22077; SSE3-NEXT: psrlw $4, %xmm1 22078; SSE3-NEXT: paddb %xmm0, %xmm1 22079; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22080; SSE3-NEXT: pxor %xmm0, %xmm0 22081; SSE3-NEXT: psadbw %xmm1, %xmm0 22082; SSE3-NEXT: por {{.*}}(%rip), %xmm0 22083; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 22084; SSE3-NEXT: movdqa %xmm0, %xmm2 22085; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 22086; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22087; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 22088; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22089; SSE3-NEXT: pand %xmm3, %xmm1 22090; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22091; SSE3-NEXT: por %xmm1, %xmm0 22092; SSE3-NEXT: retq 22093; 22094; SSSE3-LABEL: ugt_15_v2i64: 22095; SSSE3: # %bb.0: 22096; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22097; SSSE3-NEXT: movdqa %xmm0, %xmm2 22098; SSSE3-NEXT: pand %xmm1, %xmm2 22099; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22100; SSSE3-NEXT: movdqa %xmm3, %xmm4 22101; SSSE3-NEXT: pshufb %xmm2, %xmm4 22102; SSSE3-NEXT: psrlw $4, %xmm0 22103; SSSE3-NEXT: pand %xmm1, %xmm0 22104; SSSE3-NEXT: pshufb %xmm0, %xmm3 22105; SSSE3-NEXT: paddb %xmm4, %xmm3 22106; SSSE3-NEXT: pxor %xmm0, %xmm0 22107; SSSE3-NEXT: psadbw %xmm3, %xmm0 22108; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 22109; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 22110; SSSE3-NEXT: movdqa %xmm0, %xmm2 22111; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 22112; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22113; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 22114; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22115; SSSE3-NEXT: pand %xmm3, %xmm1 22116; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22117; SSSE3-NEXT: por %xmm1, %xmm0 22118; SSSE3-NEXT: retq 22119; 22120; SSE41-LABEL: ugt_15_v2i64: 22121; SSE41: # %bb.0: 22122; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22123; SSE41-NEXT: movdqa %xmm0, %xmm2 22124; SSE41-NEXT: pand %xmm1, %xmm2 22125; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22126; SSE41-NEXT: movdqa %xmm3, %xmm4 22127; SSE41-NEXT: pshufb %xmm2, %xmm4 22128; SSE41-NEXT: psrlw $4, %xmm0 22129; SSE41-NEXT: pand %xmm1, %xmm0 22130; SSE41-NEXT: pshufb %xmm0, %xmm3 22131; SSE41-NEXT: paddb %xmm4, %xmm3 22132; SSE41-NEXT: pxor %xmm0, %xmm0 22133; SSE41-NEXT: psadbw %xmm3, %xmm0 22134; SSE41-NEXT: por {{.*}}(%rip), %xmm0 22135; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] 22136; SSE41-NEXT: movdqa %xmm0, %xmm2 22137; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 22138; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22139; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 22140; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22141; SSE41-NEXT: pand %xmm3, %xmm1 22142; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22143; SSE41-NEXT: por %xmm1, %xmm0 22144; SSE41-NEXT: retq 22145; 22146; AVX1-LABEL: ugt_15_v2i64: 22147; AVX1: # %bb.0: 22148; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22149; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 22150; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22151; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22152; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 22153; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 22154; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22155; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22156; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 22157; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22158; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22159; AVX1-NEXT: retq 22160; 22161; AVX2-LABEL: ugt_15_v2i64: 22162; AVX2: # %bb.0: 22163; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22164; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 22165; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22166; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22167; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 22168; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 22169; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22170; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22171; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 22172; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22173; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22174; AVX2-NEXT: retq 22175; 22176; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64: 22177; AVX512VPOPCNTDQ: # %bb.0: 22178; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22179; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 22180; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22181; AVX512VPOPCNTDQ-NEXT: vzeroupper 22182; AVX512VPOPCNTDQ-NEXT: retq 22183; 22184; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64: 22185; AVX512VPOPCNTDQVL: # %bb.0: 22186; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 22187; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 22188; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22189; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22190; AVX512VPOPCNTDQVL-NEXT: retq 22191; 22192; BITALG_NOVLX-LABEL: ugt_15_v2i64: 22193; BITALG_NOVLX: # %bb.0: 22194; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22195; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 22196; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 22197; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22198; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22199; BITALG_NOVLX-NEXT: vzeroupper 22200; BITALG_NOVLX-NEXT: retq 22201; 22202; BITALG-LABEL: ugt_15_v2i64: 22203; BITALG: # %bb.0: 22204; BITALG-NEXT: vpopcntb %xmm0, %xmm0 22205; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 22206; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22207; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 22208; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22209; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22210; BITALG-NEXT: retq 22211 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 22212 %3 = icmp ugt <2 x i64> %2, <i64 15, i64 15> 22213 %4 = sext <2 x i1> %3 to <2 x i64> 22214 ret <2 x i64> %4 22215} 22216 22217define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { 22218; SSE2-LABEL: ult_16_v2i64: 22219; SSE2: # %bb.0: 22220; SSE2-NEXT: movdqa %xmm0, %xmm1 22221; SSE2-NEXT: psrlw $1, %xmm1 22222; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22223; SSE2-NEXT: psubb %xmm1, %xmm0 22224; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22225; SSE2-NEXT: movdqa %xmm0, %xmm2 22226; SSE2-NEXT: pand %xmm1, %xmm2 22227; SSE2-NEXT: psrlw $2, %xmm0 22228; SSE2-NEXT: pand %xmm1, %xmm0 22229; SSE2-NEXT: paddb %xmm2, %xmm0 22230; SSE2-NEXT: movdqa %xmm0, %xmm1 22231; SSE2-NEXT: psrlw $4, %xmm1 22232; SSE2-NEXT: paddb %xmm0, %xmm1 22233; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22234; SSE2-NEXT: pxor %xmm0, %xmm0 22235; SSE2-NEXT: psadbw %xmm1, %xmm0 22236; SSE2-NEXT: por {{.*}}(%rip), %xmm0 22237; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22238; SSE2-NEXT: movdqa %xmm1, %xmm2 22239; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 22240; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22241; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 22242; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22243; SSE2-NEXT: pand %xmm3, %xmm1 22244; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22245; SSE2-NEXT: por %xmm1, %xmm0 22246; SSE2-NEXT: retq 22247; 22248; SSE3-LABEL: ult_16_v2i64: 22249; SSE3: # %bb.0: 22250; SSE3-NEXT: movdqa %xmm0, %xmm1 22251; SSE3-NEXT: psrlw $1, %xmm1 22252; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22253; SSE3-NEXT: psubb %xmm1, %xmm0 22254; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22255; SSE3-NEXT: movdqa %xmm0, %xmm2 22256; SSE3-NEXT: pand %xmm1, %xmm2 22257; SSE3-NEXT: psrlw $2, %xmm0 22258; SSE3-NEXT: pand %xmm1, %xmm0 22259; SSE3-NEXT: paddb %xmm2, %xmm0 22260; SSE3-NEXT: movdqa %xmm0, %xmm1 22261; SSE3-NEXT: psrlw $4, %xmm1 22262; SSE3-NEXT: paddb %xmm0, %xmm1 22263; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22264; SSE3-NEXT: pxor %xmm0, %xmm0 22265; SSE3-NEXT: psadbw %xmm1, %xmm0 22266; SSE3-NEXT: por {{.*}}(%rip), %xmm0 22267; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22268; SSE3-NEXT: movdqa %xmm1, %xmm2 22269; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 22270; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22271; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 22272; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22273; SSE3-NEXT: pand %xmm3, %xmm1 22274; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22275; SSE3-NEXT: por %xmm1, %xmm0 22276; SSE3-NEXT: retq 22277; 22278; SSSE3-LABEL: ult_16_v2i64: 22279; SSSE3: # %bb.0: 22280; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22281; SSSE3-NEXT: movdqa %xmm0, %xmm2 22282; SSSE3-NEXT: pand %xmm1, %xmm2 22283; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22284; SSSE3-NEXT: movdqa %xmm3, %xmm4 22285; SSSE3-NEXT: pshufb %xmm2, %xmm4 22286; SSSE3-NEXT: psrlw $4, %xmm0 22287; SSSE3-NEXT: pand %xmm1, %xmm0 22288; SSSE3-NEXT: pshufb %xmm0, %xmm3 22289; SSSE3-NEXT: paddb %xmm4, %xmm3 22290; SSSE3-NEXT: pxor %xmm0, %xmm0 22291; SSSE3-NEXT: psadbw %xmm3, %xmm0 22292; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 22293; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22294; SSSE3-NEXT: movdqa %xmm1, %xmm2 22295; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 22296; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22297; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 22298; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22299; SSSE3-NEXT: pand %xmm3, %xmm1 22300; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22301; SSSE3-NEXT: por %xmm1, %xmm0 22302; SSSE3-NEXT: retq 22303; 22304; SSE41-LABEL: ult_16_v2i64: 22305; SSE41: # %bb.0: 22306; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22307; SSE41-NEXT: movdqa %xmm0, %xmm2 22308; SSE41-NEXT: pand %xmm1, %xmm2 22309; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22310; SSE41-NEXT: movdqa %xmm3, %xmm4 22311; SSE41-NEXT: pshufb %xmm2, %xmm4 22312; SSE41-NEXT: psrlw $4, %xmm0 22313; SSE41-NEXT: pand %xmm1, %xmm0 22314; SSE41-NEXT: pshufb %xmm0, %xmm3 22315; SSE41-NEXT: paddb %xmm4, %xmm3 22316; SSE41-NEXT: pxor %xmm0, %xmm0 22317; SSE41-NEXT: psadbw %xmm3, %xmm0 22318; SSE41-NEXT: por {{.*}}(%rip), %xmm0 22319; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22320; SSE41-NEXT: movdqa %xmm1, %xmm2 22321; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 22322; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22323; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 22324; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22325; SSE41-NEXT: pand %xmm3, %xmm1 22326; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22327; SSE41-NEXT: por %xmm1, %xmm0 22328; SSE41-NEXT: retq 22329; 22330; AVX1-LABEL: ult_16_v2i64: 22331; AVX1: # %bb.0: 22332; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22333; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 22334; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22335; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22336; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 22337; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 22338; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22339; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22340; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 22341; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22342; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] 22343; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22344; AVX1-NEXT: retq 22345; 22346; AVX2-LABEL: ult_16_v2i64: 22347; AVX2: # %bb.0: 22348; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22349; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 22350; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22351; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22352; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 22353; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 22354; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22355; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22356; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 22357; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22358; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] 22359; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22360; AVX2-NEXT: retq 22361; 22362; AVX512VPOPCNTDQ-LABEL: ult_16_v2i64: 22363; AVX512VPOPCNTDQ: # %bb.0: 22364; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22365; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 22366; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] 22367; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22368; AVX512VPOPCNTDQ-NEXT: vzeroupper 22369; AVX512VPOPCNTDQ-NEXT: retq 22370; 22371; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64: 22372; AVX512VPOPCNTDQVL: # %bb.0: 22373; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 22374; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 22375; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22376; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22377; AVX512VPOPCNTDQVL-NEXT: retq 22378; 22379; BITALG_NOVLX-LABEL: ult_16_v2i64: 22380; BITALG_NOVLX: # %bb.0: 22381; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22382; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 22383; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 22384; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22385; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] 22386; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22387; BITALG_NOVLX-NEXT: vzeroupper 22388; BITALG_NOVLX-NEXT: retq 22389; 22390; BITALG-LABEL: ult_16_v2i64: 22391; BITALG: # %bb.0: 22392; BITALG-NEXT: vpopcntb %xmm0, %xmm0 22393; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 22394; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22395; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 22396; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22397; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22398; BITALG-NEXT: retq 22399 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 22400 %3 = icmp ult <2 x i64> %2, <i64 16, i64 16> 22401 %4 = sext <2 x i1> %3 to <2 x i64> 22402 ret <2 x i64> %4 22403} 22404 22405define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { 22406; SSE2-LABEL: ugt_16_v2i64: 22407; SSE2: # %bb.0: 22408; SSE2-NEXT: movdqa %xmm0, %xmm1 22409; SSE2-NEXT: psrlw $1, %xmm1 22410; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22411; SSE2-NEXT: psubb %xmm1, %xmm0 22412; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22413; SSE2-NEXT: movdqa %xmm0, %xmm2 22414; SSE2-NEXT: pand %xmm1, %xmm2 22415; SSE2-NEXT: psrlw $2, %xmm0 22416; SSE2-NEXT: pand %xmm1, %xmm0 22417; SSE2-NEXT: paddb %xmm2, %xmm0 22418; SSE2-NEXT: movdqa %xmm0, %xmm1 22419; SSE2-NEXT: psrlw $4, %xmm1 22420; SSE2-NEXT: paddb %xmm0, %xmm1 22421; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22422; SSE2-NEXT: pxor %xmm0, %xmm0 22423; SSE2-NEXT: psadbw %xmm1, %xmm0 22424; SSE2-NEXT: por {{.*}}(%rip), %xmm0 22425; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22426; SSE2-NEXT: movdqa %xmm0, %xmm2 22427; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 22428; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22429; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 22430; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22431; SSE2-NEXT: pand %xmm3, %xmm1 22432; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22433; SSE2-NEXT: por %xmm1, %xmm0 22434; SSE2-NEXT: retq 22435; 22436; SSE3-LABEL: ugt_16_v2i64: 22437; SSE3: # %bb.0: 22438; SSE3-NEXT: movdqa %xmm0, %xmm1 22439; SSE3-NEXT: psrlw $1, %xmm1 22440; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22441; SSE3-NEXT: psubb %xmm1, %xmm0 22442; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22443; SSE3-NEXT: movdqa %xmm0, %xmm2 22444; SSE3-NEXT: pand %xmm1, %xmm2 22445; SSE3-NEXT: psrlw $2, %xmm0 22446; SSE3-NEXT: pand %xmm1, %xmm0 22447; SSE3-NEXT: paddb %xmm2, %xmm0 22448; SSE3-NEXT: movdqa %xmm0, %xmm1 22449; SSE3-NEXT: psrlw $4, %xmm1 22450; SSE3-NEXT: paddb %xmm0, %xmm1 22451; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22452; SSE3-NEXT: pxor %xmm0, %xmm0 22453; SSE3-NEXT: psadbw %xmm1, %xmm0 22454; SSE3-NEXT: por {{.*}}(%rip), %xmm0 22455; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22456; SSE3-NEXT: movdqa %xmm0, %xmm2 22457; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 22458; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22459; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 22460; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22461; SSE3-NEXT: pand %xmm3, %xmm1 22462; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22463; SSE3-NEXT: por %xmm1, %xmm0 22464; SSE3-NEXT: retq 22465; 22466; SSSE3-LABEL: ugt_16_v2i64: 22467; SSSE3: # %bb.0: 22468; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22469; SSSE3-NEXT: movdqa %xmm0, %xmm2 22470; SSSE3-NEXT: pand %xmm1, %xmm2 22471; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22472; SSSE3-NEXT: movdqa %xmm3, %xmm4 22473; SSSE3-NEXT: pshufb %xmm2, %xmm4 22474; SSSE3-NEXT: psrlw $4, %xmm0 22475; SSSE3-NEXT: pand %xmm1, %xmm0 22476; SSSE3-NEXT: pshufb %xmm0, %xmm3 22477; SSSE3-NEXT: paddb %xmm4, %xmm3 22478; SSSE3-NEXT: pxor %xmm0, %xmm0 22479; SSSE3-NEXT: psadbw %xmm3, %xmm0 22480; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 22481; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22482; SSSE3-NEXT: movdqa %xmm0, %xmm2 22483; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 22484; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22485; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 22486; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22487; SSSE3-NEXT: pand %xmm3, %xmm1 22488; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22489; SSSE3-NEXT: por %xmm1, %xmm0 22490; SSSE3-NEXT: retq 22491; 22492; SSE41-LABEL: ugt_16_v2i64: 22493; SSE41: # %bb.0: 22494; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22495; SSE41-NEXT: movdqa %xmm0, %xmm2 22496; SSE41-NEXT: pand %xmm1, %xmm2 22497; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22498; SSE41-NEXT: movdqa %xmm3, %xmm4 22499; SSE41-NEXT: pshufb %xmm2, %xmm4 22500; SSE41-NEXT: psrlw $4, %xmm0 22501; SSE41-NEXT: pand %xmm1, %xmm0 22502; SSE41-NEXT: pshufb %xmm0, %xmm3 22503; SSE41-NEXT: paddb %xmm4, %xmm3 22504; SSE41-NEXT: pxor %xmm0, %xmm0 22505; SSE41-NEXT: psadbw %xmm3, %xmm0 22506; SSE41-NEXT: por {{.*}}(%rip), %xmm0 22507; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] 22508; SSE41-NEXT: movdqa %xmm0, %xmm2 22509; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 22510; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22511; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 22512; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22513; SSE41-NEXT: pand %xmm3, %xmm1 22514; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22515; SSE41-NEXT: por %xmm1, %xmm0 22516; SSE41-NEXT: retq 22517; 22518; AVX1-LABEL: ugt_16_v2i64: 22519; AVX1: # %bb.0: 22520; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22521; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 22522; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22523; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22524; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 22525; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 22526; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22527; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22528; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 22529; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22530; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22531; AVX1-NEXT: retq 22532; 22533; AVX2-LABEL: ugt_16_v2i64: 22534; AVX2: # %bb.0: 22535; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22536; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 22537; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22538; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22539; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 22540; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 22541; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22542; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22543; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 22544; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22545; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22546; AVX2-NEXT: retq 22547; 22548; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64: 22549; AVX512VPOPCNTDQ: # %bb.0: 22550; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22551; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 22552; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22553; AVX512VPOPCNTDQ-NEXT: vzeroupper 22554; AVX512VPOPCNTDQ-NEXT: retq 22555; 22556; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64: 22557; AVX512VPOPCNTDQVL: # %bb.0: 22558; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 22559; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 22560; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22561; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22562; AVX512VPOPCNTDQVL-NEXT: retq 22563; 22564; BITALG_NOVLX-LABEL: ugt_16_v2i64: 22565; BITALG_NOVLX: # %bb.0: 22566; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22567; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 22568; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 22569; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22570; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22571; BITALG_NOVLX-NEXT: vzeroupper 22572; BITALG_NOVLX-NEXT: retq 22573; 22574; BITALG-LABEL: ugt_16_v2i64: 22575; BITALG: # %bb.0: 22576; BITALG-NEXT: vpopcntb %xmm0, %xmm0 22577; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 22578; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22579; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 22580; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22581; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22582; BITALG-NEXT: retq 22583 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 22584 %3 = icmp ugt <2 x i64> %2, <i64 16, i64 16> 22585 %4 = sext <2 x i1> %3 to <2 x i64> 22586 ret <2 x i64> %4 22587} 22588 22589define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { 22590; SSE2-LABEL: ult_17_v2i64: 22591; SSE2: # %bb.0: 22592; SSE2-NEXT: movdqa %xmm0, %xmm1 22593; SSE2-NEXT: psrlw $1, %xmm1 22594; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22595; SSE2-NEXT: psubb %xmm1, %xmm0 22596; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22597; SSE2-NEXT: movdqa %xmm0, %xmm2 22598; SSE2-NEXT: pand %xmm1, %xmm2 22599; SSE2-NEXT: psrlw $2, %xmm0 22600; SSE2-NEXT: pand %xmm1, %xmm0 22601; SSE2-NEXT: paddb %xmm2, %xmm0 22602; SSE2-NEXT: movdqa %xmm0, %xmm1 22603; SSE2-NEXT: psrlw $4, %xmm1 22604; SSE2-NEXT: paddb %xmm0, %xmm1 22605; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22606; SSE2-NEXT: pxor %xmm0, %xmm0 22607; SSE2-NEXT: psadbw %xmm1, %xmm0 22608; SSE2-NEXT: por {{.*}}(%rip), %xmm0 22609; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22610; SSE2-NEXT: movdqa %xmm1, %xmm2 22611; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 22612; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22613; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 22614; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22615; SSE2-NEXT: pand %xmm3, %xmm1 22616; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22617; SSE2-NEXT: por %xmm1, %xmm0 22618; SSE2-NEXT: retq 22619; 22620; SSE3-LABEL: ult_17_v2i64: 22621; SSE3: # %bb.0: 22622; SSE3-NEXT: movdqa %xmm0, %xmm1 22623; SSE3-NEXT: psrlw $1, %xmm1 22624; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22625; SSE3-NEXT: psubb %xmm1, %xmm0 22626; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22627; SSE3-NEXT: movdqa %xmm0, %xmm2 22628; SSE3-NEXT: pand %xmm1, %xmm2 22629; SSE3-NEXT: psrlw $2, %xmm0 22630; SSE3-NEXT: pand %xmm1, %xmm0 22631; SSE3-NEXT: paddb %xmm2, %xmm0 22632; SSE3-NEXT: movdqa %xmm0, %xmm1 22633; SSE3-NEXT: psrlw $4, %xmm1 22634; SSE3-NEXT: paddb %xmm0, %xmm1 22635; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22636; SSE3-NEXT: pxor %xmm0, %xmm0 22637; SSE3-NEXT: psadbw %xmm1, %xmm0 22638; SSE3-NEXT: por {{.*}}(%rip), %xmm0 22639; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22640; SSE3-NEXT: movdqa %xmm1, %xmm2 22641; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 22642; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22643; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 22644; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22645; SSE3-NEXT: pand %xmm3, %xmm1 22646; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22647; SSE3-NEXT: por %xmm1, %xmm0 22648; SSE3-NEXT: retq 22649; 22650; SSSE3-LABEL: ult_17_v2i64: 22651; SSSE3: # %bb.0: 22652; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22653; SSSE3-NEXT: movdqa %xmm0, %xmm2 22654; SSSE3-NEXT: pand %xmm1, %xmm2 22655; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22656; SSSE3-NEXT: movdqa %xmm3, %xmm4 22657; SSSE3-NEXT: pshufb %xmm2, %xmm4 22658; SSSE3-NEXT: psrlw $4, %xmm0 22659; SSSE3-NEXT: pand %xmm1, %xmm0 22660; SSSE3-NEXT: pshufb %xmm0, %xmm3 22661; SSSE3-NEXT: paddb %xmm4, %xmm3 22662; SSSE3-NEXT: pxor %xmm0, %xmm0 22663; SSSE3-NEXT: psadbw %xmm3, %xmm0 22664; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 22665; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22666; SSSE3-NEXT: movdqa %xmm1, %xmm2 22667; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 22668; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22669; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 22670; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22671; SSSE3-NEXT: pand %xmm3, %xmm1 22672; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22673; SSSE3-NEXT: por %xmm1, %xmm0 22674; SSSE3-NEXT: retq 22675; 22676; SSE41-LABEL: ult_17_v2i64: 22677; SSE41: # %bb.0: 22678; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22679; SSE41-NEXT: movdqa %xmm0, %xmm2 22680; SSE41-NEXT: pand %xmm1, %xmm2 22681; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22682; SSE41-NEXT: movdqa %xmm3, %xmm4 22683; SSE41-NEXT: pshufb %xmm2, %xmm4 22684; SSE41-NEXT: psrlw $4, %xmm0 22685; SSE41-NEXT: pand %xmm1, %xmm0 22686; SSE41-NEXT: pshufb %xmm0, %xmm3 22687; SSE41-NEXT: paddb %xmm4, %xmm3 22688; SSE41-NEXT: pxor %xmm0, %xmm0 22689; SSE41-NEXT: psadbw %xmm3, %xmm0 22690; SSE41-NEXT: por {{.*}}(%rip), %xmm0 22691; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22692; SSE41-NEXT: movdqa %xmm1, %xmm2 22693; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 22694; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22695; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 22696; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22697; SSE41-NEXT: pand %xmm3, %xmm1 22698; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22699; SSE41-NEXT: por %xmm1, %xmm0 22700; SSE41-NEXT: retq 22701; 22702; AVX1-LABEL: ult_17_v2i64: 22703; AVX1: # %bb.0: 22704; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22705; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 22706; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22707; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22708; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 22709; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 22710; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22711; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22712; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 22713; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22714; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] 22715; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22716; AVX1-NEXT: retq 22717; 22718; AVX2-LABEL: ult_17_v2i64: 22719; AVX2: # %bb.0: 22720; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22721; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 22722; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22723; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22724; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 22725; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 22726; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22727; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22728; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 22729; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22730; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] 22731; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22732; AVX2-NEXT: retq 22733; 22734; AVX512VPOPCNTDQ-LABEL: ult_17_v2i64: 22735; AVX512VPOPCNTDQ: # %bb.0: 22736; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22737; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 22738; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] 22739; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22740; AVX512VPOPCNTDQ-NEXT: vzeroupper 22741; AVX512VPOPCNTDQ-NEXT: retq 22742; 22743; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64: 22744; AVX512VPOPCNTDQVL: # %bb.0: 22745; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 22746; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 22747; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22748; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22749; AVX512VPOPCNTDQVL-NEXT: retq 22750; 22751; BITALG_NOVLX-LABEL: ult_17_v2i64: 22752; BITALG_NOVLX: # %bb.0: 22753; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22754; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 22755; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 22756; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22757; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] 22758; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 22759; BITALG_NOVLX-NEXT: vzeroupper 22760; BITALG_NOVLX-NEXT: retq 22761; 22762; BITALG-LABEL: ult_17_v2i64: 22763; BITALG: # %bb.0: 22764; BITALG-NEXT: vpopcntb %xmm0, %xmm0 22765; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 22766; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22767; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 22768; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22769; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22770; BITALG-NEXT: retq 22771 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 22772 %3 = icmp ult <2 x i64> %2, <i64 17, i64 17> 22773 %4 = sext <2 x i1> %3 to <2 x i64> 22774 ret <2 x i64> %4 22775} 22776 22777define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { 22778; SSE2-LABEL: ugt_17_v2i64: 22779; SSE2: # %bb.0: 22780; SSE2-NEXT: movdqa %xmm0, %xmm1 22781; SSE2-NEXT: psrlw $1, %xmm1 22782; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22783; SSE2-NEXT: psubb %xmm1, %xmm0 22784; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22785; SSE2-NEXT: movdqa %xmm0, %xmm2 22786; SSE2-NEXT: pand %xmm1, %xmm2 22787; SSE2-NEXT: psrlw $2, %xmm0 22788; SSE2-NEXT: pand %xmm1, %xmm0 22789; SSE2-NEXT: paddb %xmm2, %xmm0 22790; SSE2-NEXT: movdqa %xmm0, %xmm1 22791; SSE2-NEXT: psrlw $4, %xmm1 22792; SSE2-NEXT: paddb %xmm0, %xmm1 22793; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22794; SSE2-NEXT: pxor %xmm0, %xmm0 22795; SSE2-NEXT: psadbw %xmm1, %xmm0 22796; SSE2-NEXT: por {{.*}}(%rip), %xmm0 22797; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22798; SSE2-NEXT: movdqa %xmm0, %xmm2 22799; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 22800; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22801; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 22802; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22803; SSE2-NEXT: pand %xmm3, %xmm1 22804; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22805; SSE2-NEXT: por %xmm1, %xmm0 22806; SSE2-NEXT: retq 22807; 22808; SSE3-LABEL: ugt_17_v2i64: 22809; SSE3: # %bb.0: 22810; SSE3-NEXT: movdqa %xmm0, %xmm1 22811; SSE3-NEXT: psrlw $1, %xmm1 22812; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22813; SSE3-NEXT: psubb %xmm1, %xmm0 22814; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22815; SSE3-NEXT: movdqa %xmm0, %xmm2 22816; SSE3-NEXT: pand %xmm1, %xmm2 22817; SSE3-NEXT: psrlw $2, %xmm0 22818; SSE3-NEXT: pand %xmm1, %xmm0 22819; SSE3-NEXT: paddb %xmm2, %xmm0 22820; SSE3-NEXT: movdqa %xmm0, %xmm1 22821; SSE3-NEXT: psrlw $4, %xmm1 22822; SSE3-NEXT: paddb %xmm0, %xmm1 22823; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22824; SSE3-NEXT: pxor %xmm0, %xmm0 22825; SSE3-NEXT: psadbw %xmm1, %xmm0 22826; SSE3-NEXT: por {{.*}}(%rip), %xmm0 22827; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22828; SSE3-NEXT: movdqa %xmm0, %xmm2 22829; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 22830; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22831; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 22832; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22833; SSE3-NEXT: pand %xmm3, %xmm1 22834; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22835; SSE3-NEXT: por %xmm1, %xmm0 22836; SSE3-NEXT: retq 22837; 22838; SSSE3-LABEL: ugt_17_v2i64: 22839; SSSE3: # %bb.0: 22840; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22841; SSSE3-NEXT: movdqa %xmm0, %xmm2 22842; SSSE3-NEXT: pand %xmm1, %xmm2 22843; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22844; SSSE3-NEXT: movdqa %xmm3, %xmm4 22845; SSSE3-NEXT: pshufb %xmm2, %xmm4 22846; SSSE3-NEXT: psrlw $4, %xmm0 22847; SSSE3-NEXT: pand %xmm1, %xmm0 22848; SSSE3-NEXT: pshufb %xmm0, %xmm3 22849; SSSE3-NEXT: paddb %xmm4, %xmm3 22850; SSSE3-NEXT: pxor %xmm0, %xmm0 22851; SSSE3-NEXT: psadbw %xmm3, %xmm0 22852; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 22853; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22854; SSSE3-NEXT: movdqa %xmm0, %xmm2 22855; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 22856; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22857; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 22858; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22859; SSSE3-NEXT: pand %xmm3, %xmm1 22860; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22861; SSSE3-NEXT: por %xmm1, %xmm0 22862; SSSE3-NEXT: retq 22863; 22864; SSE41-LABEL: ugt_17_v2i64: 22865; SSE41: # %bb.0: 22866; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22867; SSE41-NEXT: movdqa %xmm0, %xmm2 22868; SSE41-NEXT: pand %xmm1, %xmm2 22869; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22870; SSE41-NEXT: movdqa %xmm3, %xmm4 22871; SSE41-NEXT: pshufb %xmm2, %xmm4 22872; SSE41-NEXT: psrlw $4, %xmm0 22873; SSE41-NEXT: pand %xmm1, %xmm0 22874; SSE41-NEXT: pshufb %xmm0, %xmm3 22875; SSE41-NEXT: paddb %xmm4, %xmm3 22876; SSE41-NEXT: pxor %xmm0, %xmm0 22877; SSE41-NEXT: psadbw %xmm3, %xmm0 22878; SSE41-NEXT: por {{.*}}(%rip), %xmm0 22879; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] 22880; SSE41-NEXT: movdqa %xmm0, %xmm2 22881; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 22882; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22883; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 22884; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22885; SSE41-NEXT: pand %xmm3, %xmm1 22886; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22887; SSE41-NEXT: por %xmm1, %xmm0 22888; SSE41-NEXT: retq 22889; 22890; AVX1-LABEL: ugt_17_v2i64: 22891; AVX1: # %bb.0: 22892; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22893; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 22894; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22895; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22896; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 22897; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 22898; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22899; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22900; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 22901; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22902; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22903; AVX1-NEXT: retq 22904; 22905; AVX2-LABEL: ugt_17_v2i64: 22906; AVX2: # %bb.0: 22907; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 22908; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 22909; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 22910; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 22911; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 22912; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 22913; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 22914; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 22915; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 22916; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22917; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22918; AVX2-NEXT: retq 22919; 22920; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64: 22921; AVX512VPOPCNTDQ: # %bb.0: 22922; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22923; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 22924; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22925; AVX512VPOPCNTDQ-NEXT: vzeroupper 22926; AVX512VPOPCNTDQ-NEXT: retq 22927; 22928; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64: 22929; AVX512VPOPCNTDQVL: # %bb.0: 22930; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 22931; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 22932; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22933; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22934; AVX512VPOPCNTDQVL-NEXT: retq 22935; 22936; BITALG_NOVLX-LABEL: ugt_17_v2i64: 22937; BITALG_NOVLX: # %bb.0: 22938; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22939; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 22940; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 22941; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22942; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 22943; BITALG_NOVLX-NEXT: vzeroupper 22944; BITALG_NOVLX-NEXT: retq 22945; 22946; BITALG-LABEL: ugt_17_v2i64: 22947; BITALG: # %bb.0: 22948; BITALG-NEXT: vpopcntb %xmm0, %xmm0 22949; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 22950; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 22951; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 22952; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 22953; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 22954; BITALG-NEXT: retq 22955 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 22956 %3 = icmp ugt <2 x i64> %2, <i64 17, i64 17> 22957 %4 = sext <2 x i1> %3 to <2 x i64> 22958 ret <2 x i64> %4 22959} 22960 22961define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { 22962; SSE2-LABEL: ult_18_v2i64: 22963; SSE2: # %bb.0: 22964; SSE2-NEXT: movdqa %xmm0, %xmm1 22965; SSE2-NEXT: psrlw $1, %xmm1 22966; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22967; SSE2-NEXT: psubb %xmm1, %xmm0 22968; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22969; SSE2-NEXT: movdqa %xmm0, %xmm2 22970; SSE2-NEXT: pand %xmm1, %xmm2 22971; SSE2-NEXT: psrlw $2, %xmm0 22972; SSE2-NEXT: pand %xmm1, %xmm0 22973; SSE2-NEXT: paddb %xmm2, %xmm0 22974; SSE2-NEXT: movdqa %xmm0, %xmm1 22975; SSE2-NEXT: psrlw $4, %xmm1 22976; SSE2-NEXT: paddb %xmm0, %xmm1 22977; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 22978; SSE2-NEXT: pxor %xmm0, %xmm0 22979; SSE2-NEXT: psadbw %xmm1, %xmm0 22980; SSE2-NEXT: por {{.*}}(%rip), %xmm0 22981; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 22982; SSE2-NEXT: movdqa %xmm1, %xmm2 22983; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 22984; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22985; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 22986; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 22987; SSE2-NEXT: pand %xmm3, %xmm1 22988; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 22989; SSE2-NEXT: por %xmm1, %xmm0 22990; SSE2-NEXT: retq 22991; 22992; SSE3-LABEL: ult_18_v2i64: 22993; SSE3: # %bb.0: 22994; SSE3-NEXT: movdqa %xmm0, %xmm1 22995; SSE3-NEXT: psrlw $1, %xmm1 22996; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 22997; SSE3-NEXT: psubb %xmm1, %xmm0 22998; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 22999; SSE3-NEXT: movdqa %xmm0, %xmm2 23000; SSE3-NEXT: pand %xmm1, %xmm2 23001; SSE3-NEXT: psrlw $2, %xmm0 23002; SSE3-NEXT: pand %xmm1, %xmm0 23003; SSE3-NEXT: paddb %xmm2, %xmm0 23004; SSE3-NEXT: movdqa %xmm0, %xmm1 23005; SSE3-NEXT: psrlw $4, %xmm1 23006; SSE3-NEXT: paddb %xmm0, %xmm1 23007; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23008; SSE3-NEXT: pxor %xmm0, %xmm0 23009; SSE3-NEXT: psadbw %xmm1, %xmm0 23010; SSE3-NEXT: por {{.*}}(%rip), %xmm0 23011; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 23012; SSE3-NEXT: movdqa %xmm1, %xmm2 23013; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 23014; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23015; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 23016; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23017; SSE3-NEXT: pand %xmm3, %xmm1 23018; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23019; SSE3-NEXT: por %xmm1, %xmm0 23020; SSE3-NEXT: retq 23021; 23022; SSSE3-LABEL: ult_18_v2i64: 23023; SSSE3: # %bb.0: 23024; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23025; SSSE3-NEXT: movdqa %xmm0, %xmm2 23026; SSSE3-NEXT: pand %xmm1, %xmm2 23027; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23028; SSSE3-NEXT: movdqa %xmm3, %xmm4 23029; SSSE3-NEXT: pshufb %xmm2, %xmm4 23030; SSSE3-NEXT: psrlw $4, %xmm0 23031; SSSE3-NEXT: pand %xmm1, %xmm0 23032; SSSE3-NEXT: pshufb %xmm0, %xmm3 23033; SSSE3-NEXT: paddb %xmm4, %xmm3 23034; SSSE3-NEXT: pxor %xmm0, %xmm0 23035; SSSE3-NEXT: psadbw %xmm3, %xmm0 23036; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 23037; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 23038; SSSE3-NEXT: movdqa %xmm1, %xmm2 23039; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 23040; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23041; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 23042; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23043; SSSE3-NEXT: pand %xmm3, %xmm1 23044; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23045; SSSE3-NEXT: por %xmm1, %xmm0 23046; SSSE3-NEXT: retq 23047; 23048; SSE41-LABEL: ult_18_v2i64: 23049; SSE41: # %bb.0: 23050; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23051; SSE41-NEXT: movdqa %xmm0, %xmm2 23052; SSE41-NEXT: pand %xmm1, %xmm2 23053; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23054; SSE41-NEXT: movdqa %xmm3, %xmm4 23055; SSE41-NEXT: pshufb %xmm2, %xmm4 23056; SSE41-NEXT: psrlw $4, %xmm0 23057; SSE41-NEXT: pand %xmm1, %xmm0 23058; SSE41-NEXT: pshufb %xmm0, %xmm3 23059; SSE41-NEXT: paddb %xmm4, %xmm3 23060; SSE41-NEXT: pxor %xmm0, %xmm0 23061; SSE41-NEXT: psadbw %xmm3, %xmm0 23062; SSE41-NEXT: por {{.*}}(%rip), %xmm0 23063; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 23064; SSE41-NEXT: movdqa %xmm1, %xmm2 23065; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 23066; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23067; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 23068; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23069; SSE41-NEXT: pand %xmm3, %xmm1 23070; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23071; SSE41-NEXT: por %xmm1, %xmm0 23072; SSE41-NEXT: retq 23073; 23074; AVX1-LABEL: ult_18_v2i64: 23075; AVX1: # %bb.0: 23076; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23077; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 23078; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23079; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23080; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 23081; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 23082; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23083; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23084; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 23085; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23086; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] 23087; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23088; AVX1-NEXT: retq 23089; 23090; AVX2-LABEL: ult_18_v2i64: 23091; AVX2: # %bb.0: 23092; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23093; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 23094; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23095; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23096; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 23097; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 23098; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23099; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23100; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 23101; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23102; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] 23103; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23104; AVX2-NEXT: retq 23105; 23106; AVX512VPOPCNTDQ-LABEL: ult_18_v2i64: 23107; AVX512VPOPCNTDQ: # %bb.0: 23108; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23109; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 23110; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] 23111; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23112; AVX512VPOPCNTDQ-NEXT: vzeroupper 23113; AVX512VPOPCNTDQ-NEXT: retq 23114; 23115; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64: 23116; AVX512VPOPCNTDQVL: # %bb.0: 23117; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 23118; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 23119; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23120; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23121; AVX512VPOPCNTDQVL-NEXT: retq 23122; 23123; BITALG_NOVLX-LABEL: ult_18_v2i64: 23124; BITALG_NOVLX: # %bb.0: 23125; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23126; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 23127; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 23128; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23129; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] 23130; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23131; BITALG_NOVLX-NEXT: vzeroupper 23132; BITALG_NOVLX-NEXT: retq 23133; 23134; BITALG-LABEL: ult_18_v2i64: 23135; BITALG: # %bb.0: 23136; BITALG-NEXT: vpopcntb %xmm0, %xmm0 23137; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 23138; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23139; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 23140; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23141; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23142; BITALG-NEXT: retq 23143 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 23144 %3 = icmp ult <2 x i64> %2, <i64 18, i64 18> 23145 %4 = sext <2 x i1> %3 to <2 x i64> 23146 ret <2 x i64> %4 23147} 23148 23149define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { 23150; SSE2-LABEL: ugt_18_v2i64: 23151; SSE2: # %bb.0: 23152; SSE2-NEXT: movdqa %xmm0, %xmm1 23153; SSE2-NEXT: psrlw $1, %xmm1 23154; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23155; SSE2-NEXT: psubb %xmm1, %xmm0 23156; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23157; SSE2-NEXT: movdqa %xmm0, %xmm2 23158; SSE2-NEXT: pand %xmm1, %xmm2 23159; SSE2-NEXT: psrlw $2, %xmm0 23160; SSE2-NEXT: pand %xmm1, %xmm0 23161; SSE2-NEXT: paddb %xmm2, %xmm0 23162; SSE2-NEXT: movdqa %xmm0, %xmm1 23163; SSE2-NEXT: psrlw $4, %xmm1 23164; SSE2-NEXT: paddb %xmm0, %xmm1 23165; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23166; SSE2-NEXT: pxor %xmm0, %xmm0 23167; SSE2-NEXT: psadbw %xmm1, %xmm0 23168; SSE2-NEXT: por {{.*}}(%rip), %xmm0 23169; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 23170; SSE2-NEXT: movdqa %xmm0, %xmm2 23171; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 23172; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23173; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 23174; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23175; SSE2-NEXT: pand %xmm3, %xmm1 23176; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23177; SSE2-NEXT: por %xmm1, %xmm0 23178; SSE2-NEXT: retq 23179; 23180; SSE3-LABEL: ugt_18_v2i64: 23181; SSE3: # %bb.0: 23182; SSE3-NEXT: movdqa %xmm0, %xmm1 23183; SSE3-NEXT: psrlw $1, %xmm1 23184; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23185; SSE3-NEXT: psubb %xmm1, %xmm0 23186; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23187; SSE3-NEXT: movdqa %xmm0, %xmm2 23188; SSE3-NEXT: pand %xmm1, %xmm2 23189; SSE3-NEXT: psrlw $2, %xmm0 23190; SSE3-NEXT: pand %xmm1, %xmm0 23191; SSE3-NEXT: paddb %xmm2, %xmm0 23192; SSE3-NEXT: movdqa %xmm0, %xmm1 23193; SSE3-NEXT: psrlw $4, %xmm1 23194; SSE3-NEXT: paddb %xmm0, %xmm1 23195; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23196; SSE3-NEXT: pxor %xmm0, %xmm0 23197; SSE3-NEXT: psadbw %xmm1, %xmm0 23198; SSE3-NEXT: por {{.*}}(%rip), %xmm0 23199; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 23200; SSE3-NEXT: movdqa %xmm0, %xmm2 23201; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 23202; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23203; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 23204; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23205; SSE3-NEXT: pand %xmm3, %xmm1 23206; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23207; SSE3-NEXT: por %xmm1, %xmm0 23208; SSE3-NEXT: retq 23209; 23210; SSSE3-LABEL: ugt_18_v2i64: 23211; SSSE3: # %bb.0: 23212; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23213; SSSE3-NEXT: movdqa %xmm0, %xmm2 23214; SSSE3-NEXT: pand %xmm1, %xmm2 23215; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23216; SSSE3-NEXT: movdqa %xmm3, %xmm4 23217; SSSE3-NEXT: pshufb %xmm2, %xmm4 23218; SSSE3-NEXT: psrlw $4, %xmm0 23219; SSSE3-NEXT: pand %xmm1, %xmm0 23220; SSSE3-NEXT: pshufb %xmm0, %xmm3 23221; SSSE3-NEXT: paddb %xmm4, %xmm3 23222; SSSE3-NEXT: pxor %xmm0, %xmm0 23223; SSSE3-NEXT: psadbw %xmm3, %xmm0 23224; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 23225; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 23226; SSSE3-NEXT: movdqa %xmm0, %xmm2 23227; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 23228; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23229; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 23230; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23231; SSSE3-NEXT: pand %xmm3, %xmm1 23232; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23233; SSSE3-NEXT: por %xmm1, %xmm0 23234; SSSE3-NEXT: retq 23235; 23236; SSE41-LABEL: ugt_18_v2i64: 23237; SSE41: # %bb.0: 23238; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23239; SSE41-NEXT: movdqa %xmm0, %xmm2 23240; SSE41-NEXT: pand %xmm1, %xmm2 23241; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23242; SSE41-NEXT: movdqa %xmm3, %xmm4 23243; SSE41-NEXT: pshufb %xmm2, %xmm4 23244; SSE41-NEXT: psrlw $4, %xmm0 23245; SSE41-NEXT: pand %xmm1, %xmm0 23246; SSE41-NEXT: pshufb %xmm0, %xmm3 23247; SSE41-NEXT: paddb %xmm4, %xmm3 23248; SSE41-NEXT: pxor %xmm0, %xmm0 23249; SSE41-NEXT: psadbw %xmm3, %xmm0 23250; SSE41-NEXT: por {{.*}}(%rip), %xmm0 23251; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] 23252; SSE41-NEXT: movdqa %xmm0, %xmm2 23253; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 23254; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23255; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 23256; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23257; SSE41-NEXT: pand %xmm3, %xmm1 23258; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23259; SSE41-NEXT: por %xmm1, %xmm0 23260; SSE41-NEXT: retq 23261; 23262; AVX1-LABEL: ugt_18_v2i64: 23263; AVX1: # %bb.0: 23264; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23265; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 23266; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23267; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23268; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 23269; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 23270; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23271; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23272; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 23273; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23274; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23275; AVX1-NEXT: retq 23276; 23277; AVX2-LABEL: ugt_18_v2i64: 23278; AVX2: # %bb.0: 23279; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23280; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 23281; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23282; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23283; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 23284; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 23285; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23286; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23287; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 23288; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23289; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23290; AVX2-NEXT: retq 23291; 23292; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64: 23293; AVX512VPOPCNTDQ: # %bb.0: 23294; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23295; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 23296; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23297; AVX512VPOPCNTDQ-NEXT: vzeroupper 23298; AVX512VPOPCNTDQ-NEXT: retq 23299; 23300; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64: 23301; AVX512VPOPCNTDQVL: # %bb.0: 23302; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 23303; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 23304; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23305; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23306; AVX512VPOPCNTDQVL-NEXT: retq 23307; 23308; BITALG_NOVLX-LABEL: ugt_18_v2i64: 23309; BITALG_NOVLX: # %bb.0: 23310; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23311; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 23312; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 23313; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23314; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23315; BITALG_NOVLX-NEXT: vzeroupper 23316; BITALG_NOVLX-NEXT: retq 23317; 23318; BITALG-LABEL: ugt_18_v2i64: 23319; BITALG: # %bb.0: 23320; BITALG-NEXT: vpopcntb %xmm0, %xmm0 23321; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 23322; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23323; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 23324; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23325; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23326; BITALG-NEXT: retq 23327 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 23328 %3 = icmp ugt <2 x i64> %2, <i64 18, i64 18> 23329 %4 = sext <2 x i1> %3 to <2 x i64> 23330 ret <2 x i64> %4 23331} 23332 23333define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { 23334; SSE2-LABEL: ult_19_v2i64: 23335; SSE2: # %bb.0: 23336; SSE2-NEXT: movdqa %xmm0, %xmm1 23337; SSE2-NEXT: psrlw $1, %xmm1 23338; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23339; SSE2-NEXT: psubb %xmm1, %xmm0 23340; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23341; SSE2-NEXT: movdqa %xmm0, %xmm2 23342; SSE2-NEXT: pand %xmm1, %xmm2 23343; SSE2-NEXT: psrlw $2, %xmm0 23344; SSE2-NEXT: pand %xmm1, %xmm0 23345; SSE2-NEXT: paddb %xmm2, %xmm0 23346; SSE2-NEXT: movdqa %xmm0, %xmm1 23347; SSE2-NEXT: psrlw $4, %xmm1 23348; SSE2-NEXT: paddb %xmm0, %xmm1 23349; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23350; SSE2-NEXT: pxor %xmm0, %xmm0 23351; SSE2-NEXT: psadbw %xmm1, %xmm0 23352; SSE2-NEXT: por {{.*}}(%rip), %xmm0 23353; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23354; SSE2-NEXT: movdqa %xmm1, %xmm2 23355; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 23356; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23357; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 23358; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23359; SSE2-NEXT: pand %xmm3, %xmm1 23360; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23361; SSE2-NEXT: por %xmm1, %xmm0 23362; SSE2-NEXT: retq 23363; 23364; SSE3-LABEL: ult_19_v2i64: 23365; SSE3: # %bb.0: 23366; SSE3-NEXT: movdqa %xmm0, %xmm1 23367; SSE3-NEXT: psrlw $1, %xmm1 23368; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23369; SSE3-NEXT: psubb %xmm1, %xmm0 23370; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23371; SSE3-NEXT: movdqa %xmm0, %xmm2 23372; SSE3-NEXT: pand %xmm1, %xmm2 23373; SSE3-NEXT: psrlw $2, %xmm0 23374; SSE3-NEXT: pand %xmm1, %xmm0 23375; SSE3-NEXT: paddb %xmm2, %xmm0 23376; SSE3-NEXT: movdqa %xmm0, %xmm1 23377; SSE3-NEXT: psrlw $4, %xmm1 23378; SSE3-NEXT: paddb %xmm0, %xmm1 23379; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23380; SSE3-NEXT: pxor %xmm0, %xmm0 23381; SSE3-NEXT: psadbw %xmm1, %xmm0 23382; SSE3-NEXT: por {{.*}}(%rip), %xmm0 23383; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23384; SSE3-NEXT: movdqa %xmm1, %xmm2 23385; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 23386; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23387; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 23388; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23389; SSE3-NEXT: pand %xmm3, %xmm1 23390; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23391; SSE3-NEXT: por %xmm1, %xmm0 23392; SSE3-NEXT: retq 23393; 23394; SSSE3-LABEL: ult_19_v2i64: 23395; SSSE3: # %bb.0: 23396; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23397; SSSE3-NEXT: movdqa %xmm0, %xmm2 23398; SSSE3-NEXT: pand %xmm1, %xmm2 23399; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23400; SSSE3-NEXT: movdqa %xmm3, %xmm4 23401; SSSE3-NEXT: pshufb %xmm2, %xmm4 23402; SSSE3-NEXT: psrlw $4, %xmm0 23403; SSSE3-NEXT: pand %xmm1, %xmm0 23404; SSSE3-NEXT: pshufb %xmm0, %xmm3 23405; SSSE3-NEXT: paddb %xmm4, %xmm3 23406; SSSE3-NEXT: pxor %xmm0, %xmm0 23407; SSSE3-NEXT: psadbw %xmm3, %xmm0 23408; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 23409; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23410; SSSE3-NEXT: movdqa %xmm1, %xmm2 23411; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 23412; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23413; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 23414; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23415; SSSE3-NEXT: pand %xmm3, %xmm1 23416; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23417; SSSE3-NEXT: por %xmm1, %xmm0 23418; SSSE3-NEXT: retq 23419; 23420; SSE41-LABEL: ult_19_v2i64: 23421; SSE41: # %bb.0: 23422; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23423; SSE41-NEXT: movdqa %xmm0, %xmm2 23424; SSE41-NEXT: pand %xmm1, %xmm2 23425; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23426; SSE41-NEXT: movdqa %xmm3, %xmm4 23427; SSE41-NEXT: pshufb %xmm2, %xmm4 23428; SSE41-NEXT: psrlw $4, %xmm0 23429; SSE41-NEXT: pand %xmm1, %xmm0 23430; SSE41-NEXT: pshufb %xmm0, %xmm3 23431; SSE41-NEXT: paddb %xmm4, %xmm3 23432; SSE41-NEXT: pxor %xmm0, %xmm0 23433; SSE41-NEXT: psadbw %xmm3, %xmm0 23434; SSE41-NEXT: por {{.*}}(%rip), %xmm0 23435; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23436; SSE41-NEXT: movdqa %xmm1, %xmm2 23437; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 23438; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23439; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 23440; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23441; SSE41-NEXT: pand %xmm3, %xmm1 23442; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23443; SSE41-NEXT: por %xmm1, %xmm0 23444; SSE41-NEXT: retq 23445; 23446; AVX1-LABEL: ult_19_v2i64: 23447; AVX1: # %bb.0: 23448; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23449; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 23450; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23451; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23452; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 23453; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 23454; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23455; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23456; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 23457; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23458; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] 23459; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23460; AVX1-NEXT: retq 23461; 23462; AVX2-LABEL: ult_19_v2i64: 23463; AVX2: # %bb.0: 23464; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23465; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 23466; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23467; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23468; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 23469; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 23470; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23471; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23472; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 23473; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23474; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] 23475; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23476; AVX2-NEXT: retq 23477; 23478; AVX512VPOPCNTDQ-LABEL: ult_19_v2i64: 23479; AVX512VPOPCNTDQ: # %bb.0: 23480; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23481; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 23482; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] 23483; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23484; AVX512VPOPCNTDQ-NEXT: vzeroupper 23485; AVX512VPOPCNTDQ-NEXT: retq 23486; 23487; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64: 23488; AVX512VPOPCNTDQVL: # %bb.0: 23489; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 23490; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 23491; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23492; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23493; AVX512VPOPCNTDQVL-NEXT: retq 23494; 23495; BITALG_NOVLX-LABEL: ult_19_v2i64: 23496; BITALG_NOVLX: # %bb.0: 23497; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23498; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 23499; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 23500; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23501; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] 23502; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23503; BITALG_NOVLX-NEXT: vzeroupper 23504; BITALG_NOVLX-NEXT: retq 23505; 23506; BITALG-LABEL: ult_19_v2i64: 23507; BITALG: # %bb.0: 23508; BITALG-NEXT: vpopcntb %xmm0, %xmm0 23509; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 23510; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23511; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 23512; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23513; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23514; BITALG-NEXT: retq 23515 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 23516 %3 = icmp ult <2 x i64> %2, <i64 19, i64 19> 23517 %4 = sext <2 x i1> %3 to <2 x i64> 23518 ret <2 x i64> %4 23519} 23520 23521define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { 23522; SSE2-LABEL: ugt_19_v2i64: 23523; SSE2: # %bb.0: 23524; SSE2-NEXT: movdqa %xmm0, %xmm1 23525; SSE2-NEXT: psrlw $1, %xmm1 23526; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23527; SSE2-NEXT: psubb %xmm1, %xmm0 23528; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23529; SSE2-NEXT: movdqa %xmm0, %xmm2 23530; SSE2-NEXT: pand %xmm1, %xmm2 23531; SSE2-NEXT: psrlw $2, %xmm0 23532; SSE2-NEXT: pand %xmm1, %xmm0 23533; SSE2-NEXT: paddb %xmm2, %xmm0 23534; SSE2-NEXT: movdqa %xmm0, %xmm1 23535; SSE2-NEXT: psrlw $4, %xmm1 23536; SSE2-NEXT: paddb %xmm0, %xmm1 23537; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23538; SSE2-NEXT: pxor %xmm0, %xmm0 23539; SSE2-NEXT: psadbw %xmm1, %xmm0 23540; SSE2-NEXT: por {{.*}}(%rip), %xmm0 23541; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23542; SSE2-NEXT: movdqa %xmm0, %xmm2 23543; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 23544; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23545; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 23546; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23547; SSE2-NEXT: pand %xmm3, %xmm1 23548; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23549; SSE2-NEXT: por %xmm1, %xmm0 23550; SSE2-NEXT: retq 23551; 23552; SSE3-LABEL: ugt_19_v2i64: 23553; SSE3: # %bb.0: 23554; SSE3-NEXT: movdqa %xmm0, %xmm1 23555; SSE3-NEXT: psrlw $1, %xmm1 23556; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23557; SSE3-NEXT: psubb %xmm1, %xmm0 23558; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23559; SSE3-NEXT: movdqa %xmm0, %xmm2 23560; SSE3-NEXT: pand %xmm1, %xmm2 23561; SSE3-NEXT: psrlw $2, %xmm0 23562; SSE3-NEXT: pand %xmm1, %xmm0 23563; SSE3-NEXT: paddb %xmm2, %xmm0 23564; SSE3-NEXT: movdqa %xmm0, %xmm1 23565; SSE3-NEXT: psrlw $4, %xmm1 23566; SSE3-NEXT: paddb %xmm0, %xmm1 23567; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23568; SSE3-NEXT: pxor %xmm0, %xmm0 23569; SSE3-NEXT: psadbw %xmm1, %xmm0 23570; SSE3-NEXT: por {{.*}}(%rip), %xmm0 23571; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23572; SSE3-NEXT: movdqa %xmm0, %xmm2 23573; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 23574; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23575; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 23576; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23577; SSE3-NEXT: pand %xmm3, %xmm1 23578; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23579; SSE3-NEXT: por %xmm1, %xmm0 23580; SSE3-NEXT: retq 23581; 23582; SSSE3-LABEL: ugt_19_v2i64: 23583; SSSE3: # %bb.0: 23584; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23585; SSSE3-NEXT: movdqa %xmm0, %xmm2 23586; SSSE3-NEXT: pand %xmm1, %xmm2 23587; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23588; SSSE3-NEXT: movdqa %xmm3, %xmm4 23589; SSSE3-NEXT: pshufb %xmm2, %xmm4 23590; SSSE3-NEXT: psrlw $4, %xmm0 23591; SSSE3-NEXT: pand %xmm1, %xmm0 23592; SSSE3-NEXT: pshufb %xmm0, %xmm3 23593; SSSE3-NEXT: paddb %xmm4, %xmm3 23594; SSSE3-NEXT: pxor %xmm0, %xmm0 23595; SSSE3-NEXT: psadbw %xmm3, %xmm0 23596; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 23597; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23598; SSSE3-NEXT: movdqa %xmm0, %xmm2 23599; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 23600; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23601; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 23602; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23603; SSSE3-NEXT: pand %xmm3, %xmm1 23604; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23605; SSSE3-NEXT: por %xmm1, %xmm0 23606; SSSE3-NEXT: retq 23607; 23608; SSE41-LABEL: ugt_19_v2i64: 23609; SSE41: # %bb.0: 23610; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23611; SSE41-NEXT: movdqa %xmm0, %xmm2 23612; SSE41-NEXT: pand %xmm1, %xmm2 23613; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23614; SSE41-NEXT: movdqa %xmm3, %xmm4 23615; SSE41-NEXT: pshufb %xmm2, %xmm4 23616; SSE41-NEXT: psrlw $4, %xmm0 23617; SSE41-NEXT: pand %xmm1, %xmm0 23618; SSE41-NEXT: pshufb %xmm0, %xmm3 23619; SSE41-NEXT: paddb %xmm4, %xmm3 23620; SSE41-NEXT: pxor %xmm0, %xmm0 23621; SSE41-NEXT: psadbw %xmm3, %xmm0 23622; SSE41-NEXT: por {{.*}}(%rip), %xmm0 23623; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] 23624; SSE41-NEXT: movdqa %xmm0, %xmm2 23625; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 23626; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23627; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 23628; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23629; SSE41-NEXT: pand %xmm3, %xmm1 23630; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23631; SSE41-NEXT: por %xmm1, %xmm0 23632; SSE41-NEXT: retq 23633; 23634; AVX1-LABEL: ugt_19_v2i64: 23635; AVX1: # %bb.0: 23636; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23637; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 23638; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23639; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23640; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 23641; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 23642; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23643; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23644; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 23645; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23646; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23647; AVX1-NEXT: retq 23648; 23649; AVX2-LABEL: ugt_19_v2i64: 23650; AVX2: # %bb.0: 23651; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23652; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 23653; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23654; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23655; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 23656; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 23657; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23658; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23659; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 23660; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23661; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23662; AVX2-NEXT: retq 23663; 23664; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64: 23665; AVX512VPOPCNTDQ: # %bb.0: 23666; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23667; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 23668; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23669; AVX512VPOPCNTDQ-NEXT: vzeroupper 23670; AVX512VPOPCNTDQ-NEXT: retq 23671; 23672; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64: 23673; AVX512VPOPCNTDQVL: # %bb.0: 23674; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 23675; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 23676; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23677; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23678; AVX512VPOPCNTDQVL-NEXT: retq 23679; 23680; BITALG_NOVLX-LABEL: ugt_19_v2i64: 23681; BITALG_NOVLX: # %bb.0: 23682; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23683; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 23684; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 23685; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23686; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 23687; BITALG_NOVLX-NEXT: vzeroupper 23688; BITALG_NOVLX-NEXT: retq 23689; 23690; BITALG-LABEL: ugt_19_v2i64: 23691; BITALG: # %bb.0: 23692; BITALG-NEXT: vpopcntb %xmm0, %xmm0 23693; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 23694; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23695; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 23696; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23697; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23698; BITALG-NEXT: retq 23699 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 23700 %3 = icmp ugt <2 x i64> %2, <i64 19, i64 19> 23701 %4 = sext <2 x i1> %3 to <2 x i64> 23702 ret <2 x i64> %4 23703} 23704 23705define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { 23706; SSE2-LABEL: ult_20_v2i64: 23707; SSE2: # %bb.0: 23708; SSE2-NEXT: movdqa %xmm0, %xmm1 23709; SSE2-NEXT: psrlw $1, %xmm1 23710; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23711; SSE2-NEXT: psubb %xmm1, %xmm0 23712; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23713; SSE2-NEXT: movdqa %xmm0, %xmm2 23714; SSE2-NEXT: pand %xmm1, %xmm2 23715; SSE2-NEXT: psrlw $2, %xmm0 23716; SSE2-NEXT: pand %xmm1, %xmm0 23717; SSE2-NEXT: paddb %xmm2, %xmm0 23718; SSE2-NEXT: movdqa %xmm0, %xmm1 23719; SSE2-NEXT: psrlw $4, %xmm1 23720; SSE2-NEXT: paddb %xmm0, %xmm1 23721; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23722; SSE2-NEXT: pxor %xmm0, %xmm0 23723; SSE2-NEXT: psadbw %xmm1, %xmm0 23724; SSE2-NEXT: por {{.*}}(%rip), %xmm0 23725; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23726; SSE2-NEXT: movdqa %xmm1, %xmm2 23727; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 23728; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23729; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 23730; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23731; SSE2-NEXT: pand %xmm3, %xmm1 23732; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23733; SSE2-NEXT: por %xmm1, %xmm0 23734; SSE2-NEXT: retq 23735; 23736; SSE3-LABEL: ult_20_v2i64: 23737; SSE3: # %bb.0: 23738; SSE3-NEXT: movdqa %xmm0, %xmm1 23739; SSE3-NEXT: psrlw $1, %xmm1 23740; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23741; SSE3-NEXT: psubb %xmm1, %xmm0 23742; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23743; SSE3-NEXT: movdqa %xmm0, %xmm2 23744; SSE3-NEXT: pand %xmm1, %xmm2 23745; SSE3-NEXT: psrlw $2, %xmm0 23746; SSE3-NEXT: pand %xmm1, %xmm0 23747; SSE3-NEXT: paddb %xmm2, %xmm0 23748; SSE3-NEXT: movdqa %xmm0, %xmm1 23749; SSE3-NEXT: psrlw $4, %xmm1 23750; SSE3-NEXT: paddb %xmm0, %xmm1 23751; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23752; SSE3-NEXT: pxor %xmm0, %xmm0 23753; SSE3-NEXT: psadbw %xmm1, %xmm0 23754; SSE3-NEXT: por {{.*}}(%rip), %xmm0 23755; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23756; SSE3-NEXT: movdqa %xmm1, %xmm2 23757; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 23758; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23759; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 23760; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23761; SSE3-NEXT: pand %xmm3, %xmm1 23762; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23763; SSE3-NEXT: por %xmm1, %xmm0 23764; SSE3-NEXT: retq 23765; 23766; SSSE3-LABEL: ult_20_v2i64: 23767; SSSE3: # %bb.0: 23768; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23769; SSSE3-NEXT: movdqa %xmm0, %xmm2 23770; SSSE3-NEXT: pand %xmm1, %xmm2 23771; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23772; SSSE3-NEXT: movdqa %xmm3, %xmm4 23773; SSSE3-NEXT: pshufb %xmm2, %xmm4 23774; SSSE3-NEXT: psrlw $4, %xmm0 23775; SSSE3-NEXT: pand %xmm1, %xmm0 23776; SSSE3-NEXT: pshufb %xmm0, %xmm3 23777; SSSE3-NEXT: paddb %xmm4, %xmm3 23778; SSSE3-NEXT: pxor %xmm0, %xmm0 23779; SSSE3-NEXT: psadbw %xmm3, %xmm0 23780; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 23781; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23782; SSSE3-NEXT: movdqa %xmm1, %xmm2 23783; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 23784; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23785; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 23786; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23787; SSSE3-NEXT: pand %xmm3, %xmm1 23788; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23789; SSSE3-NEXT: por %xmm1, %xmm0 23790; SSSE3-NEXT: retq 23791; 23792; SSE41-LABEL: ult_20_v2i64: 23793; SSE41: # %bb.0: 23794; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23795; SSE41-NEXT: movdqa %xmm0, %xmm2 23796; SSE41-NEXT: pand %xmm1, %xmm2 23797; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23798; SSE41-NEXT: movdqa %xmm3, %xmm4 23799; SSE41-NEXT: pshufb %xmm2, %xmm4 23800; SSE41-NEXT: psrlw $4, %xmm0 23801; SSE41-NEXT: pand %xmm1, %xmm0 23802; SSE41-NEXT: pshufb %xmm0, %xmm3 23803; SSE41-NEXT: paddb %xmm4, %xmm3 23804; SSE41-NEXT: pxor %xmm0, %xmm0 23805; SSE41-NEXT: psadbw %xmm3, %xmm0 23806; SSE41-NEXT: por {{.*}}(%rip), %xmm0 23807; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23808; SSE41-NEXT: movdqa %xmm1, %xmm2 23809; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 23810; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23811; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 23812; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23813; SSE41-NEXT: pand %xmm3, %xmm1 23814; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23815; SSE41-NEXT: por %xmm1, %xmm0 23816; SSE41-NEXT: retq 23817; 23818; AVX1-LABEL: ult_20_v2i64: 23819; AVX1: # %bb.0: 23820; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23821; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 23822; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23823; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23824; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 23825; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 23826; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23827; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23828; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 23829; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23830; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] 23831; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23832; AVX1-NEXT: retq 23833; 23834; AVX2-LABEL: ult_20_v2i64: 23835; AVX2: # %bb.0: 23836; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23837; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 23838; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23839; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 23840; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 23841; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 23842; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 23843; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 23844; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 23845; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23846; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] 23847; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23848; AVX2-NEXT: retq 23849; 23850; AVX512VPOPCNTDQ-LABEL: ult_20_v2i64: 23851; AVX512VPOPCNTDQ: # %bb.0: 23852; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23853; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 23854; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] 23855; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23856; AVX512VPOPCNTDQ-NEXT: vzeroupper 23857; AVX512VPOPCNTDQ-NEXT: retq 23858; 23859; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64: 23860; AVX512VPOPCNTDQVL: # %bb.0: 23861; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 23862; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 23863; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23864; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23865; AVX512VPOPCNTDQVL-NEXT: retq 23866; 23867; BITALG_NOVLX-LABEL: ult_20_v2i64: 23868; BITALG_NOVLX: # %bb.0: 23869; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23870; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 23871; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 23872; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23873; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] 23874; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 23875; BITALG_NOVLX-NEXT: vzeroupper 23876; BITALG_NOVLX-NEXT: retq 23877; 23878; BITALG-LABEL: ult_20_v2i64: 23879; BITALG: # %bb.0: 23880; BITALG-NEXT: vpopcntb %xmm0, %xmm0 23881; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 23882; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 23883; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 23884; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 23885; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 23886; BITALG-NEXT: retq 23887 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 23888 %3 = icmp ult <2 x i64> %2, <i64 20, i64 20> 23889 %4 = sext <2 x i1> %3 to <2 x i64> 23890 ret <2 x i64> %4 23891} 23892 23893define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { 23894; SSE2-LABEL: ugt_20_v2i64: 23895; SSE2: # %bb.0: 23896; SSE2-NEXT: movdqa %xmm0, %xmm1 23897; SSE2-NEXT: psrlw $1, %xmm1 23898; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23899; SSE2-NEXT: psubb %xmm1, %xmm0 23900; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23901; SSE2-NEXT: movdqa %xmm0, %xmm2 23902; SSE2-NEXT: pand %xmm1, %xmm2 23903; SSE2-NEXT: psrlw $2, %xmm0 23904; SSE2-NEXT: pand %xmm1, %xmm0 23905; SSE2-NEXT: paddb %xmm2, %xmm0 23906; SSE2-NEXT: movdqa %xmm0, %xmm1 23907; SSE2-NEXT: psrlw $4, %xmm1 23908; SSE2-NEXT: paddb %xmm0, %xmm1 23909; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 23910; SSE2-NEXT: pxor %xmm0, %xmm0 23911; SSE2-NEXT: psadbw %xmm1, %xmm0 23912; SSE2-NEXT: por {{.*}}(%rip), %xmm0 23913; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23914; SSE2-NEXT: movdqa %xmm0, %xmm2 23915; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 23916; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23917; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 23918; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23919; SSE2-NEXT: pand %xmm3, %xmm1 23920; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23921; SSE2-NEXT: por %xmm1, %xmm0 23922; SSE2-NEXT: retq 23923; 23924; SSE3-LABEL: ugt_20_v2i64: 23925; SSE3: # %bb.0: 23926; SSE3-NEXT: movdqa %xmm0, %xmm1 23927; SSE3-NEXT: psrlw $1, %xmm1 23928; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23929; SSE3-NEXT: psubb %xmm1, %xmm0 23930; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 23931; SSE3-NEXT: movdqa %xmm0, %xmm2 23932; SSE3-NEXT: pand %xmm1, %xmm2 23933; SSE3-NEXT: psrlw $2, %xmm0 23934; SSE3-NEXT: pand %xmm1, %xmm0 23935; SSE3-NEXT: paddb %xmm2, %xmm0 23936; SSE3-NEXT: movdqa %xmm0, %xmm1 23937; SSE3-NEXT: psrlw $4, %xmm1 23938; SSE3-NEXT: paddb %xmm0, %xmm1 23939; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 23940; SSE3-NEXT: pxor %xmm0, %xmm0 23941; SSE3-NEXT: psadbw %xmm1, %xmm0 23942; SSE3-NEXT: por {{.*}}(%rip), %xmm0 23943; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23944; SSE3-NEXT: movdqa %xmm0, %xmm2 23945; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 23946; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23947; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 23948; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23949; SSE3-NEXT: pand %xmm3, %xmm1 23950; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23951; SSE3-NEXT: por %xmm1, %xmm0 23952; SSE3-NEXT: retq 23953; 23954; SSSE3-LABEL: ugt_20_v2i64: 23955; SSSE3: # %bb.0: 23956; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23957; SSSE3-NEXT: movdqa %xmm0, %xmm2 23958; SSSE3-NEXT: pand %xmm1, %xmm2 23959; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23960; SSSE3-NEXT: movdqa %xmm3, %xmm4 23961; SSSE3-NEXT: pshufb %xmm2, %xmm4 23962; SSSE3-NEXT: psrlw $4, %xmm0 23963; SSSE3-NEXT: pand %xmm1, %xmm0 23964; SSSE3-NEXT: pshufb %xmm0, %xmm3 23965; SSSE3-NEXT: paddb %xmm4, %xmm3 23966; SSSE3-NEXT: pxor %xmm0, %xmm0 23967; SSSE3-NEXT: psadbw %xmm3, %xmm0 23968; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 23969; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23970; SSSE3-NEXT: movdqa %xmm0, %xmm2 23971; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 23972; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23973; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 23974; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 23975; SSSE3-NEXT: pand %xmm3, %xmm1 23976; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 23977; SSSE3-NEXT: por %xmm1, %xmm0 23978; SSSE3-NEXT: retq 23979; 23980; SSE41-LABEL: ugt_20_v2i64: 23981; SSE41: # %bb.0: 23982; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 23983; SSE41-NEXT: movdqa %xmm0, %xmm2 23984; SSE41-NEXT: pand %xmm1, %xmm2 23985; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 23986; SSE41-NEXT: movdqa %xmm3, %xmm4 23987; SSE41-NEXT: pshufb %xmm2, %xmm4 23988; SSE41-NEXT: psrlw $4, %xmm0 23989; SSE41-NEXT: pand %xmm1, %xmm0 23990; SSE41-NEXT: pshufb %xmm0, %xmm3 23991; SSE41-NEXT: paddb %xmm4, %xmm3 23992; SSE41-NEXT: pxor %xmm0, %xmm0 23993; SSE41-NEXT: psadbw %xmm3, %xmm0 23994; SSE41-NEXT: por {{.*}}(%rip), %xmm0 23995; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] 23996; SSE41-NEXT: movdqa %xmm0, %xmm2 23997; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 23998; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 23999; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 24000; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24001; SSE41-NEXT: pand %xmm3, %xmm1 24002; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24003; SSE41-NEXT: por %xmm1, %xmm0 24004; SSE41-NEXT: retq 24005; 24006; AVX1-LABEL: ugt_20_v2i64: 24007; AVX1: # %bb.0: 24008; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24009; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 24010; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24011; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24012; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 24013; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 24014; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24015; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24016; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 24017; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24018; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24019; AVX1-NEXT: retq 24020; 24021; AVX2-LABEL: ugt_20_v2i64: 24022; AVX2: # %bb.0: 24023; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24024; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 24025; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24026; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24027; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 24028; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 24029; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24030; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24031; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 24032; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24033; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24034; AVX2-NEXT: retq 24035; 24036; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64: 24037; AVX512VPOPCNTDQ: # %bb.0: 24038; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24039; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 24040; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24041; AVX512VPOPCNTDQ-NEXT: vzeroupper 24042; AVX512VPOPCNTDQ-NEXT: retq 24043; 24044; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64: 24045; AVX512VPOPCNTDQVL: # %bb.0: 24046; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 24047; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 24048; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24049; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24050; AVX512VPOPCNTDQVL-NEXT: retq 24051; 24052; BITALG_NOVLX-LABEL: ugt_20_v2i64: 24053; BITALG_NOVLX: # %bb.0: 24054; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24055; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 24056; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 24057; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24058; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24059; BITALG_NOVLX-NEXT: vzeroupper 24060; BITALG_NOVLX-NEXT: retq 24061; 24062; BITALG-LABEL: ugt_20_v2i64: 24063; BITALG: # %bb.0: 24064; BITALG-NEXT: vpopcntb %xmm0, %xmm0 24065; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 24066; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24067; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 24068; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24069; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24070; BITALG-NEXT: retq 24071 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 24072 %3 = icmp ugt <2 x i64> %2, <i64 20, i64 20> 24073 %4 = sext <2 x i1> %3 to <2 x i64> 24074 ret <2 x i64> %4 24075} 24076 24077define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { 24078; SSE2-LABEL: ult_21_v2i64: 24079; SSE2: # %bb.0: 24080; SSE2-NEXT: movdqa %xmm0, %xmm1 24081; SSE2-NEXT: psrlw $1, %xmm1 24082; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24083; SSE2-NEXT: psubb %xmm1, %xmm0 24084; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24085; SSE2-NEXT: movdqa %xmm0, %xmm2 24086; SSE2-NEXT: pand %xmm1, %xmm2 24087; SSE2-NEXT: psrlw $2, %xmm0 24088; SSE2-NEXT: pand %xmm1, %xmm0 24089; SSE2-NEXT: paddb %xmm2, %xmm0 24090; SSE2-NEXT: movdqa %xmm0, %xmm1 24091; SSE2-NEXT: psrlw $4, %xmm1 24092; SSE2-NEXT: paddb %xmm0, %xmm1 24093; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24094; SSE2-NEXT: pxor %xmm0, %xmm0 24095; SSE2-NEXT: psadbw %xmm1, %xmm0 24096; SSE2-NEXT: por {{.*}}(%rip), %xmm0 24097; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24098; SSE2-NEXT: movdqa %xmm1, %xmm2 24099; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 24100; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24101; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 24102; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24103; SSE2-NEXT: pand %xmm3, %xmm1 24104; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24105; SSE2-NEXT: por %xmm1, %xmm0 24106; SSE2-NEXT: retq 24107; 24108; SSE3-LABEL: ult_21_v2i64: 24109; SSE3: # %bb.0: 24110; SSE3-NEXT: movdqa %xmm0, %xmm1 24111; SSE3-NEXT: psrlw $1, %xmm1 24112; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24113; SSE3-NEXT: psubb %xmm1, %xmm0 24114; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24115; SSE3-NEXT: movdqa %xmm0, %xmm2 24116; SSE3-NEXT: pand %xmm1, %xmm2 24117; SSE3-NEXT: psrlw $2, %xmm0 24118; SSE3-NEXT: pand %xmm1, %xmm0 24119; SSE3-NEXT: paddb %xmm2, %xmm0 24120; SSE3-NEXT: movdqa %xmm0, %xmm1 24121; SSE3-NEXT: psrlw $4, %xmm1 24122; SSE3-NEXT: paddb %xmm0, %xmm1 24123; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24124; SSE3-NEXT: pxor %xmm0, %xmm0 24125; SSE3-NEXT: psadbw %xmm1, %xmm0 24126; SSE3-NEXT: por {{.*}}(%rip), %xmm0 24127; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24128; SSE3-NEXT: movdqa %xmm1, %xmm2 24129; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 24130; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24131; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 24132; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24133; SSE3-NEXT: pand %xmm3, %xmm1 24134; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24135; SSE3-NEXT: por %xmm1, %xmm0 24136; SSE3-NEXT: retq 24137; 24138; SSSE3-LABEL: ult_21_v2i64: 24139; SSSE3: # %bb.0: 24140; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24141; SSSE3-NEXT: movdqa %xmm0, %xmm2 24142; SSSE3-NEXT: pand %xmm1, %xmm2 24143; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24144; SSSE3-NEXT: movdqa %xmm3, %xmm4 24145; SSSE3-NEXT: pshufb %xmm2, %xmm4 24146; SSSE3-NEXT: psrlw $4, %xmm0 24147; SSSE3-NEXT: pand %xmm1, %xmm0 24148; SSSE3-NEXT: pshufb %xmm0, %xmm3 24149; SSSE3-NEXT: paddb %xmm4, %xmm3 24150; SSSE3-NEXT: pxor %xmm0, %xmm0 24151; SSSE3-NEXT: psadbw %xmm3, %xmm0 24152; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 24153; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24154; SSSE3-NEXT: movdqa %xmm1, %xmm2 24155; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 24156; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24157; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 24158; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24159; SSSE3-NEXT: pand %xmm3, %xmm1 24160; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24161; SSSE3-NEXT: por %xmm1, %xmm0 24162; SSSE3-NEXT: retq 24163; 24164; SSE41-LABEL: ult_21_v2i64: 24165; SSE41: # %bb.0: 24166; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24167; SSE41-NEXT: movdqa %xmm0, %xmm2 24168; SSE41-NEXT: pand %xmm1, %xmm2 24169; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24170; SSE41-NEXT: movdqa %xmm3, %xmm4 24171; SSE41-NEXT: pshufb %xmm2, %xmm4 24172; SSE41-NEXT: psrlw $4, %xmm0 24173; SSE41-NEXT: pand %xmm1, %xmm0 24174; SSE41-NEXT: pshufb %xmm0, %xmm3 24175; SSE41-NEXT: paddb %xmm4, %xmm3 24176; SSE41-NEXT: pxor %xmm0, %xmm0 24177; SSE41-NEXT: psadbw %xmm3, %xmm0 24178; SSE41-NEXT: por {{.*}}(%rip), %xmm0 24179; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24180; SSE41-NEXT: movdqa %xmm1, %xmm2 24181; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 24182; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24183; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 24184; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24185; SSE41-NEXT: pand %xmm3, %xmm1 24186; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24187; SSE41-NEXT: por %xmm1, %xmm0 24188; SSE41-NEXT: retq 24189; 24190; AVX1-LABEL: ult_21_v2i64: 24191; AVX1: # %bb.0: 24192; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24193; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 24194; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24195; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24196; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 24197; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 24198; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24199; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24200; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 24201; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24202; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] 24203; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24204; AVX1-NEXT: retq 24205; 24206; AVX2-LABEL: ult_21_v2i64: 24207; AVX2: # %bb.0: 24208; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24209; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 24210; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24211; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24212; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 24213; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 24214; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24215; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24216; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 24217; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24218; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] 24219; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24220; AVX2-NEXT: retq 24221; 24222; AVX512VPOPCNTDQ-LABEL: ult_21_v2i64: 24223; AVX512VPOPCNTDQ: # %bb.0: 24224; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24225; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 24226; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] 24227; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24228; AVX512VPOPCNTDQ-NEXT: vzeroupper 24229; AVX512VPOPCNTDQ-NEXT: retq 24230; 24231; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64: 24232; AVX512VPOPCNTDQVL: # %bb.0: 24233; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 24234; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 24235; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24236; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24237; AVX512VPOPCNTDQVL-NEXT: retq 24238; 24239; BITALG_NOVLX-LABEL: ult_21_v2i64: 24240; BITALG_NOVLX: # %bb.0: 24241; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24242; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 24243; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 24244; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24245; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] 24246; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24247; BITALG_NOVLX-NEXT: vzeroupper 24248; BITALG_NOVLX-NEXT: retq 24249; 24250; BITALG-LABEL: ult_21_v2i64: 24251; BITALG: # %bb.0: 24252; BITALG-NEXT: vpopcntb %xmm0, %xmm0 24253; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 24254; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24255; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 24256; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24257; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24258; BITALG-NEXT: retq 24259 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 24260 %3 = icmp ult <2 x i64> %2, <i64 21, i64 21> 24261 %4 = sext <2 x i1> %3 to <2 x i64> 24262 ret <2 x i64> %4 24263} 24264 24265define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { 24266; SSE2-LABEL: ugt_21_v2i64: 24267; SSE2: # %bb.0: 24268; SSE2-NEXT: movdqa %xmm0, %xmm1 24269; SSE2-NEXT: psrlw $1, %xmm1 24270; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24271; SSE2-NEXT: psubb %xmm1, %xmm0 24272; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24273; SSE2-NEXT: movdqa %xmm0, %xmm2 24274; SSE2-NEXT: pand %xmm1, %xmm2 24275; SSE2-NEXT: psrlw $2, %xmm0 24276; SSE2-NEXT: pand %xmm1, %xmm0 24277; SSE2-NEXT: paddb %xmm2, %xmm0 24278; SSE2-NEXT: movdqa %xmm0, %xmm1 24279; SSE2-NEXT: psrlw $4, %xmm1 24280; SSE2-NEXT: paddb %xmm0, %xmm1 24281; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24282; SSE2-NEXT: pxor %xmm0, %xmm0 24283; SSE2-NEXT: psadbw %xmm1, %xmm0 24284; SSE2-NEXT: por {{.*}}(%rip), %xmm0 24285; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24286; SSE2-NEXT: movdqa %xmm0, %xmm2 24287; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 24288; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24289; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 24290; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24291; SSE2-NEXT: pand %xmm3, %xmm1 24292; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24293; SSE2-NEXT: por %xmm1, %xmm0 24294; SSE2-NEXT: retq 24295; 24296; SSE3-LABEL: ugt_21_v2i64: 24297; SSE3: # %bb.0: 24298; SSE3-NEXT: movdqa %xmm0, %xmm1 24299; SSE3-NEXT: psrlw $1, %xmm1 24300; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24301; SSE3-NEXT: psubb %xmm1, %xmm0 24302; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24303; SSE3-NEXT: movdqa %xmm0, %xmm2 24304; SSE3-NEXT: pand %xmm1, %xmm2 24305; SSE3-NEXT: psrlw $2, %xmm0 24306; SSE3-NEXT: pand %xmm1, %xmm0 24307; SSE3-NEXT: paddb %xmm2, %xmm0 24308; SSE3-NEXT: movdqa %xmm0, %xmm1 24309; SSE3-NEXT: psrlw $4, %xmm1 24310; SSE3-NEXT: paddb %xmm0, %xmm1 24311; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24312; SSE3-NEXT: pxor %xmm0, %xmm0 24313; SSE3-NEXT: psadbw %xmm1, %xmm0 24314; SSE3-NEXT: por {{.*}}(%rip), %xmm0 24315; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24316; SSE3-NEXT: movdqa %xmm0, %xmm2 24317; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 24318; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24319; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 24320; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24321; SSE3-NEXT: pand %xmm3, %xmm1 24322; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24323; SSE3-NEXT: por %xmm1, %xmm0 24324; SSE3-NEXT: retq 24325; 24326; SSSE3-LABEL: ugt_21_v2i64: 24327; SSSE3: # %bb.0: 24328; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24329; SSSE3-NEXT: movdqa %xmm0, %xmm2 24330; SSSE3-NEXT: pand %xmm1, %xmm2 24331; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24332; SSSE3-NEXT: movdqa %xmm3, %xmm4 24333; SSSE3-NEXT: pshufb %xmm2, %xmm4 24334; SSSE3-NEXT: psrlw $4, %xmm0 24335; SSSE3-NEXT: pand %xmm1, %xmm0 24336; SSSE3-NEXT: pshufb %xmm0, %xmm3 24337; SSSE3-NEXT: paddb %xmm4, %xmm3 24338; SSSE3-NEXT: pxor %xmm0, %xmm0 24339; SSSE3-NEXT: psadbw %xmm3, %xmm0 24340; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 24341; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24342; SSSE3-NEXT: movdqa %xmm0, %xmm2 24343; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 24344; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24345; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 24346; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24347; SSSE3-NEXT: pand %xmm3, %xmm1 24348; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24349; SSSE3-NEXT: por %xmm1, %xmm0 24350; SSSE3-NEXT: retq 24351; 24352; SSE41-LABEL: ugt_21_v2i64: 24353; SSE41: # %bb.0: 24354; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24355; SSE41-NEXT: movdqa %xmm0, %xmm2 24356; SSE41-NEXT: pand %xmm1, %xmm2 24357; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24358; SSE41-NEXT: movdqa %xmm3, %xmm4 24359; SSE41-NEXT: pshufb %xmm2, %xmm4 24360; SSE41-NEXT: psrlw $4, %xmm0 24361; SSE41-NEXT: pand %xmm1, %xmm0 24362; SSE41-NEXT: pshufb %xmm0, %xmm3 24363; SSE41-NEXT: paddb %xmm4, %xmm3 24364; SSE41-NEXT: pxor %xmm0, %xmm0 24365; SSE41-NEXT: psadbw %xmm3, %xmm0 24366; SSE41-NEXT: por {{.*}}(%rip), %xmm0 24367; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] 24368; SSE41-NEXT: movdqa %xmm0, %xmm2 24369; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 24370; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24371; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 24372; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24373; SSE41-NEXT: pand %xmm3, %xmm1 24374; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24375; SSE41-NEXT: por %xmm1, %xmm0 24376; SSE41-NEXT: retq 24377; 24378; AVX1-LABEL: ugt_21_v2i64: 24379; AVX1: # %bb.0: 24380; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24381; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 24382; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24383; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24384; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 24385; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 24386; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24387; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24388; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 24389; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24390; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24391; AVX1-NEXT: retq 24392; 24393; AVX2-LABEL: ugt_21_v2i64: 24394; AVX2: # %bb.0: 24395; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24396; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 24397; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24398; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24399; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 24400; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 24401; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24402; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24403; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 24404; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24405; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24406; AVX2-NEXT: retq 24407; 24408; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64: 24409; AVX512VPOPCNTDQ: # %bb.0: 24410; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24411; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 24412; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24413; AVX512VPOPCNTDQ-NEXT: vzeroupper 24414; AVX512VPOPCNTDQ-NEXT: retq 24415; 24416; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64: 24417; AVX512VPOPCNTDQVL: # %bb.0: 24418; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 24419; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 24420; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24421; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24422; AVX512VPOPCNTDQVL-NEXT: retq 24423; 24424; BITALG_NOVLX-LABEL: ugt_21_v2i64: 24425; BITALG_NOVLX: # %bb.0: 24426; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24427; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 24428; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 24429; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24430; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24431; BITALG_NOVLX-NEXT: vzeroupper 24432; BITALG_NOVLX-NEXT: retq 24433; 24434; BITALG-LABEL: ugt_21_v2i64: 24435; BITALG: # %bb.0: 24436; BITALG-NEXT: vpopcntb %xmm0, %xmm0 24437; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 24438; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24439; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 24440; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24441; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24442; BITALG-NEXT: retq 24443 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 24444 %3 = icmp ugt <2 x i64> %2, <i64 21, i64 21> 24445 %4 = sext <2 x i1> %3 to <2 x i64> 24446 ret <2 x i64> %4 24447} 24448 24449define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { 24450; SSE2-LABEL: ult_22_v2i64: 24451; SSE2: # %bb.0: 24452; SSE2-NEXT: movdqa %xmm0, %xmm1 24453; SSE2-NEXT: psrlw $1, %xmm1 24454; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24455; SSE2-NEXT: psubb %xmm1, %xmm0 24456; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24457; SSE2-NEXT: movdqa %xmm0, %xmm2 24458; SSE2-NEXT: pand %xmm1, %xmm2 24459; SSE2-NEXT: psrlw $2, %xmm0 24460; SSE2-NEXT: pand %xmm1, %xmm0 24461; SSE2-NEXT: paddb %xmm2, %xmm0 24462; SSE2-NEXT: movdqa %xmm0, %xmm1 24463; SSE2-NEXT: psrlw $4, %xmm1 24464; SSE2-NEXT: paddb %xmm0, %xmm1 24465; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24466; SSE2-NEXT: pxor %xmm0, %xmm0 24467; SSE2-NEXT: psadbw %xmm1, %xmm0 24468; SSE2-NEXT: por {{.*}}(%rip), %xmm0 24469; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24470; SSE2-NEXT: movdqa %xmm1, %xmm2 24471; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 24472; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24473; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 24474; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24475; SSE2-NEXT: pand %xmm3, %xmm1 24476; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24477; SSE2-NEXT: por %xmm1, %xmm0 24478; SSE2-NEXT: retq 24479; 24480; SSE3-LABEL: ult_22_v2i64: 24481; SSE3: # %bb.0: 24482; SSE3-NEXT: movdqa %xmm0, %xmm1 24483; SSE3-NEXT: psrlw $1, %xmm1 24484; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24485; SSE3-NEXT: psubb %xmm1, %xmm0 24486; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24487; SSE3-NEXT: movdqa %xmm0, %xmm2 24488; SSE3-NEXT: pand %xmm1, %xmm2 24489; SSE3-NEXT: psrlw $2, %xmm0 24490; SSE3-NEXT: pand %xmm1, %xmm0 24491; SSE3-NEXT: paddb %xmm2, %xmm0 24492; SSE3-NEXT: movdqa %xmm0, %xmm1 24493; SSE3-NEXT: psrlw $4, %xmm1 24494; SSE3-NEXT: paddb %xmm0, %xmm1 24495; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24496; SSE3-NEXT: pxor %xmm0, %xmm0 24497; SSE3-NEXT: psadbw %xmm1, %xmm0 24498; SSE3-NEXT: por {{.*}}(%rip), %xmm0 24499; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24500; SSE3-NEXT: movdqa %xmm1, %xmm2 24501; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 24502; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24503; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 24504; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24505; SSE3-NEXT: pand %xmm3, %xmm1 24506; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24507; SSE3-NEXT: por %xmm1, %xmm0 24508; SSE3-NEXT: retq 24509; 24510; SSSE3-LABEL: ult_22_v2i64: 24511; SSSE3: # %bb.0: 24512; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24513; SSSE3-NEXT: movdqa %xmm0, %xmm2 24514; SSSE3-NEXT: pand %xmm1, %xmm2 24515; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24516; SSSE3-NEXT: movdqa %xmm3, %xmm4 24517; SSSE3-NEXT: pshufb %xmm2, %xmm4 24518; SSSE3-NEXT: psrlw $4, %xmm0 24519; SSSE3-NEXT: pand %xmm1, %xmm0 24520; SSSE3-NEXT: pshufb %xmm0, %xmm3 24521; SSSE3-NEXT: paddb %xmm4, %xmm3 24522; SSSE3-NEXT: pxor %xmm0, %xmm0 24523; SSSE3-NEXT: psadbw %xmm3, %xmm0 24524; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 24525; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24526; SSSE3-NEXT: movdqa %xmm1, %xmm2 24527; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 24528; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24529; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 24530; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24531; SSSE3-NEXT: pand %xmm3, %xmm1 24532; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24533; SSSE3-NEXT: por %xmm1, %xmm0 24534; SSSE3-NEXT: retq 24535; 24536; SSE41-LABEL: ult_22_v2i64: 24537; SSE41: # %bb.0: 24538; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24539; SSE41-NEXT: movdqa %xmm0, %xmm2 24540; SSE41-NEXT: pand %xmm1, %xmm2 24541; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24542; SSE41-NEXT: movdqa %xmm3, %xmm4 24543; SSE41-NEXT: pshufb %xmm2, %xmm4 24544; SSE41-NEXT: psrlw $4, %xmm0 24545; SSE41-NEXT: pand %xmm1, %xmm0 24546; SSE41-NEXT: pshufb %xmm0, %xmm3 24547; SSE41-NEXT: paddb %xmm4, %xmm3 24548; SSE41-NEXT: pxor %xmm0, %xmm0 24549; SSE41-NEXT: psadbw %xmm3, %xmm0 24550; SSE41-NEXT: por {{.*}}(%rip), %xmm0 24551; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24552; SSE41-NEXT: movdqa %xmm1, %xmm2 24553; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 24554; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24555; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 24556; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24557; SSE41-NEXT: pand %xmm3, %xmm1 24558; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24559; SSE41-NEXT: por %xmm1, %xmm0 24560; SSE41-NEXT: retq 24561; 24562; AVX1-LABEL: ult_22_v2i64: 24563; AVX1: # %bb.0: 24564; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24565; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 24566; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24567; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24568; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 24569; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 24570; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24571; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24572; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 24573; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24574; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] 24575; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24576; AVX1-NEXT: retq 24577; 24578; AVX2-LABEL: ult_22_v2i64: 24579; AVX2: # %bb.0: 24580; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24581; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 24582; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24583; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24584; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 24585; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 24586; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24587; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24588; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 24589; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24590; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] 24591; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24592; AVX2-NEXT: retq 24593; 24594; AVX512VPOPCNTDQ-LABEL: ult_22_v2i64: 24595; AVX512VPOPCNTDQ: # %bb.0: 24596; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24597; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 24598; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] 24599; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24600; AVX512VPOPCNTDQ-NEXT: vzeroupper 24601; AVX512VPOPCNTDQ-NEXT: retq 24602; 24603; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64: 24604; AVX512VPOPCNTDQVL: # %bb.0: 24605; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 24606; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 24607; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24608; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24609; AVX512VPOPCNTDQVL-NEXT: retq 24610; 24611; BITALG_NOVLX-LABEL: ult_22_v2i64: 24612; BITALG_NOVLX: # %bb.0: 24613; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24614; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 24615; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 24616; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24617; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] 24618; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24619; BITALG_NOVLX-NEXT: vzeroupper 24620; BITALG_NOVLX-NEXT: retq 24621; 24622; BITALG-LABEL: ult_22_v2i64: 24623; BITALG: # %bb.0: 24624; BITALG-NEXT: vpopcntb %xmm0, %xmm0 24625; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 24626; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24627; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 24628; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24629; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24630; BITALG-NEXT: retq 24631 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 24632 %3 = icmp ult <2 x i64> %2, <i64 22, i64 22> 24633 %4 = sext <2 x i1> %3 to <2 x i64> 24634 ret <2 x i64> %4 24635} 24636 24637define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { 24638; SSE2-LABEL: ugt_22_v2i64: 24639; SSE2: # %bb.0: 24640; SSE2-NEXT: movdqa %xmm0, %xmm1 24641; SSE2-NEXT: psrlw $1, %xmm1 24642; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24643; SSE2-NEXT: psubb %xmm1, %xmm0 24644; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24645; SSE2-NEXT: movdqa %xmm0, %xmm2 24646; SSE2-NEXT: pand %xmm1, %xmm2 24647; SSE2-NEXT: psrlw $2, %xmm0 24648; SSE2-NEXT: pand %xmm1, %xmm0 24649; SSE2-NEXT: paddb %xmm2, %xmm0 24650; SSE2-NEXT: movdqa %xmm0, %xmm1 24651; SSE2-NEXT: psrlw $4, %xmm1 24652; SSE2-NEXT: paddb %xmm0, %xmm1 24653; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24654; SSE2-NEXT: pxor %xmm0, %xmm0 24655; SSE2-NEXT: psadbw %xmm1, %xmm0 24656; SSE2-NEXT: por {{.*}}(%rip), %xmm0 24657; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24658; SSE2-NEXT: movdqa %xmm0, %xmm2 24659; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 24660; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24661; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 24662; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24663; SSE2-NEXT: pand %xmm3, %xmm1 24664; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24665; SSE2-NEXT: por %xmm1, %xmm0 24666; SSE2-NEXT: retq 24667; 24668; SSE3-LABEL: ugt_22_v2i64: 24669; SSE3: # %bb.0: 24670; SSE3-NEXT: movdqa %xmm0, %xmm1 24671; SSE3-NEXT: psrlw $1, %xmm1 24672; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24673; SSE3-NEXT: psubb %xmm1, %xmm0 24674; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24675; SSE3-NEXT: movdqa %xmm0, %xmm2 24676; SSE3-NEXT: pand %xmm1, %xmm2 24677; SSE3-NEXT: psrlw $2, %xmm0 24678; SSE3-NEXT: pand %xmm1, %xmm0 24679; SSE3-NEXT: paddb %xmm2, %xmm0 24680; SSE3-NEXT: movdqa %xmm0, %xmm1 24681; SSE3-NEXT: psrlw $4, %xmm1 24682; SSE3-NEXT: paddb %xmm0, %xmm1 24683; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24684; SSE3-NEXT: pxor %xmm0, %xmm0 24685; SSE3-NEXT: psadbw %xmm1, %xmm0 24686; SSE3-NEXT: por {{.*}}(%rip), %xmm0 24687; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24688; SSE3-NEXT: movdqa %xmm0, %xmm2 24689; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 24690; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24691; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 24692; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24693; SSE3-NEXT: pand %xmm3, %xmm1 24694; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24695; SSE3-NEXT: por %xmm1, %xmm0 24696; SSE3-NEXT: retq 24697; 24698; SSSE3-LABEL: ugt_22_v2i64: 24699; SSSE3: # %bb.0: 24700; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24701; SSSE3-NEXT: movdqa %xmm0, %xmm2 24702; SSSE3-NEXT: pand %xmm1, %xmm2 24703; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24704; SSSE3-NEXT: movdqa %xmm3, %xmm4 24705; SSSE3-NEXT: pshufb %xmm2, %xmm4 24706; SSSE3-NEXT: psrlw $4, %xmm0 24707; SSSE3-NEXT: pand %xmm1, %xmm0 24708; SSSE3-NEXT: pshufb %xmm0, %xmm3 24709; SSSE3-NEXT: paddb %xmm4, %xmm3 24710; SSSE3-NEXT: pxor %xmm0, %xmm0 24711; SSSE3-NEXT: psadbw %xmm3, %xmm0 24712; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 24713; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24714; SSSE3-NEXT: movdqa %xmm0, %xmm2 24715; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 24716; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24717; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 24718; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24719; SSSE3-NEXT: pand %xmm3, %xmm1 24720; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24721; SSSE3-NEXT: por %xmm1, %xmm0 24722; SSSE3-NEXT: retq 24723; 24724; SSE41-LABEL: ugt_22_v2i64: 24725; SSE41: # %bb.0: 24726; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24727; SSE41-NEXT: movdqa %xmm0, %xmm2 24728; SSE41-NEXT: pand %xmm1, %xmm2 24729; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24730; SSE41-NEXT: movdqa %xmm3, %xmm4 24731; SSE41-NEXT: pshufb %xmm2, %xmm4 24732; SSE41-NEXT: psrlw $4, %xmm0 24733; SSE41-NEXT: pand %xmm1, %xmm0 24734; SSE41-NEXT: pshufb %xmm0, %xmm3 24735; SSE41-NEXT: paddb %xmm4, %xmm3 24736; SSE41-NEXT: pxor %xmm0, %xmm0 24737; SSE41-NEXT: psadbw %xmm3, %xmm0 24738; SSE41-NEXT: por {{.*}}(%rip), %xmm0 24739; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] 24740; SSE41-NEXT: movdqa %xmm0, %xmm2 24741; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 24742; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24743; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 24744; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24745; SSE41-NEXT: pand %xmm3, %xmm1 24746; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24747; SSE41-NEXT: por %xmm1, %xmm0 24748; SSE41-NEXT: retq 24749; 24750; AVX1-LABEL: ugt_22_v2i64: 24751; AVX1: # %bb.0: 24752; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24753; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 24754; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24755; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24756; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 24757; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 24758; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24759; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24760; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 24761; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24762; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24763; AVX1-NEXT: retq 24764; 24765; AVX2-LABEL: ugt_22_v2i64: 24766; AVX2: # %bb.0: 24767; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24768; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 24769; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24770; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24771; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 24772; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 24773; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24774; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24775; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 24776; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24777; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24778; AVX2-NEXT: retq 24779; 24780; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64: 24781; AVX512VPOPCNTDQ: # %bb.0: 24782; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24783; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 24784; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24785; AVX512VPOPCNTDQ-NEXT: vzeroupper 24786; AVX512VPOPCNTDQ-NEXT: retq 24787; 24788; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64: 24789; AVX512VPOPCNTDQVL: # %bb.0: 24790; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 24791; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 24792; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24793; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24794; AVX512VPOPCNTDQVL-NEXT: retq 24795; 24796; BITALG_NOVLX-LABEL: ugt_22_v2i64: 24797; BITALG_NOVLX: # %bb.0: 24798; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24799; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 24800; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 24801; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24802; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 24803; BITALG_NOVLX-NEXT: vzeroupper 24804; BITALG_NOVLX-NEXT: retq 24805; 24806; BITALG-LABEL: ugt_22_v2i64: 24807; BITALG: # %bb.0: 24808; BITALG-NEXT: vpopcntb %xmm0, %xmm0 24809; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 24810; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24811; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 24812; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24813; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24814; BITALG-NEXT: retq 24815 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 24816 %3 = icmp ugt <2 x i64> %2, <i64 22, i64 22> 24817 %4 = sext <2 x i1> %3 to <2 x i64> 24818 ret <2 x i64> %4 24819} 24820 24821define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { 24822; SSE2-LABEL: ult_23_v2i64: 24823; SSE2: # %bb.0: 24824; SSE2-NEXT: movdqa %xmm0, %xmm1 24825; SSE2-NEXT: psrlw $1, %xmm1 24826; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24827; SSE2-NEXT: psubb %xmm1, %xmm0 24828; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24829; SSE2-NEXT: movdqa %xmm0, %xmm2 24830; SSE2-NEXT: pand %xmm1, %xmm2 24831; SSE2-NEXT: psrlw $2, %xmm0 24832; SSE2-NEXT: pand %xmm1, %xmm0 24833; SSE2-NEXT: paddb %xmm2, %xmm0 24834; SSE2-NEXT: movdqa %xmm0, %xmm1 24835; SSE2-NEXT: psrlw $4, %xmm1 24836; SSE2-NEXT: paddb %xmm0, %xmm1 24837; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 24838; SSE2-NEXT: pxor %xmm0, %xmm0 24839; SSE2-NEXT: psadbw %xmm1, %xmm0 24840; SSE2-NEXT: por {{.*}}(%rip), %xmm0 24841; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 24842; SSE2-NEXT: movdqa %xmm1, %xmm2 24843; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 24844; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24845; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 24846; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24847; SSE2-NEXT: pand %xmm3, %xmm1 24848; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24849; SSE2-NEXT: por %xmm1, %xmm0 24850; SSE2-NEXT: retq 24851; 24852; SSE3-LABEL: ult_23_v2i64: 24853; SSE3: # %bb.0: 24854; SSE3-NEXT: movdqa %xmm0, %xmm1 24855; SSE3-NEXT: psrlw $1, %xmm1 24856; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24857; SSE3-NEXT: psubb %xmm1, %xmm0 24858; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 24859; SSE3-NEXT: movdqa %xmm0, %xmm2 24860; SSE3-NEXT: pand %xmm1, %xmm2 24861; SSE3-NEXT: psrlw $2, %xmm0 24862; SSE3-NEXT: pand %xmm1, %xmm0 24863; SSE3-NEXT: paddb %xmm2, %xmm0 24864; SSE3-NEXT: movdqa %xmm0, %xmm1 24865; SSE3-NEXT: psrlw $4, %xmm1 24866; SSE3-NEXT: paddb %xmm0, %xmm1 24867; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 24868; SSE3-NEXT: pxor %xmm0, %xmm0 24869; SSE3-NEXT: psadbw %xmm1, %xmm0 24870; SSE3-NEXT: por {{.*}}(%rip), %xmm0 24871; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 24872; SSE3-NEXT: movdqa %xmm1, %xmm2 24873; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 24874; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24875; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 24876; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24877; SSE3-NEXT: pand %xmm3, %xmm1 24878; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24879; SSE3-NEXT: por %xmm1, %xmm0 24880; SSE3-NEXT: retq 24881; 24882; SSSE3-LABEL: ult_23_v2i64: 24883; SSSE3: # %bb.0: 24884; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24885; SSSE3-NEXT: movdqa %xmm0, %xmm2 24886; SSSE3-NEXT: pand %xmm1, %xmm2 24887; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24888; SSSE3-NEXT: movdqa %xmm3, %xmm4 24889; SSSE3-NEXT: pshufb %xmm2, %xmm4 24890; SSSE3-NEXT: psrlw $4, %xmm0 24891; SSSE3-NEXT: pand %xmm1, %xmm0 24892; SSSE3-NEXT: pshufb %xmm0, %xmm3 24893; SSSE3-NEXT: paddb %xmm4, %xmm3 24894; SSSE3-NEXT: pxor %xmm0, %xmm0 24895; SSSE3-NEXT: psadbw %xmm3, %xmm0 24896; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 24897; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 24898; SSSE3-NEXT: movdqa %xmm1, %xmm2 24899; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 24900; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24901; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 24902; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24903; SSSE3-NEXT: pand %xmm3, %xmm1 24904; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24905; SSSE3-NEXT: por %xmm1, %xmm0 24906; SSSE3-NEXT: retq 24907; 24908; SSE41-LABEL: ult_23_v2i64: 24909; SSE41: # %bb.0: 24910; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24911; SSE41-NEXT: movdqa %xmm0, %xmm2 24912; SSE41-NEXT: pand %xmm1, %xmm2 24913; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24914; SSE41-NEXT: movdqa %xmm3, %xmm4 24915; SSE41-NEXT: pshufb %xmm2, %xmm4 24916; SSE41-NEXT: psrlw $4, %xmm0 24917; SSE41-NEXT: pand %xmm1, %xmm0 24918; SSE41-NEXT: pshufb %xmm0, %xmm3 24919; SSE41-NEXT: paddb %xmm4, %xmm3 24920; SSE41-NEXT: pxor %xmm0, %xmm0 24921; SSE41-NEXT: psadbw %xmm3, %xmm0 24922; SSE41-NEXT: por {{.*}}(%rip), %xmm0 24923; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 24924; SSE41-NEXT: movdqa %xmm1, %xmm2 24925; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 24926; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 24927; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 24928; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24929; SSE41-NEXT: pand %xmm3, %xmm1 24930; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 24931; SSE41-NEXT: por %xmm1, %xmm0 24932; SSE41-NEXT: retq 24933; 24934; AVX1-LABEL: ult_23_v2i64: 24935; AVX1: # %bb.0: 24936; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24937; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 24938; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24939; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24940; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 24941; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 24942; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24943; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24944; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 24945; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24946; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] 24947; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24948; AVX1-NEXT: retq 24949; 24950; AVX2-LABEL: ult_23_v2i64: 24951; AVX2: # %bb.0: 24952; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 24953; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 24954; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 24955; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 24956; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 24957; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 24958; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 24959; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 24960; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 24961; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24962; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] 24963; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24964; AVX2-NEXT: retq 24965; 24966; AVX512VPOPCNTDQ-LABEL: ult_23_v2i64: 24967; AVX512VPOPCNTDQ: # %bb.0: 24968; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24969; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 24970; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] 24971; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24972; AVX512VPOPCNTDQ-NEXT: vzeroupper 24973; AVX512VPOPCNTDQ-NEXT: retq 24974; 24975; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64: 24976; AVX512VPOPCNTDQVL: # %bb.0: 24977; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 24978; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 24979; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24980; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 24981; AVX512VPOPCNTDQVL-NEXT: retq 24982; 24983; BITALG_NOVLX-LABEL: ult_23_v2i64: 24984; BITALG_NOVLX: # %bb.0: 24985; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 24986; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 24987; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 24988; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24989; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] 24990; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 24991; BITALG_NOVLX-NEXT: vzeroupper 24992; BITALG_NOVLX-NEXT: retq 24993; 24994; BITALG-LABEL: ult_23_v2i64: 24995; BITALG: # %bb.0: 24996; BITALG-NEXT: vpopcntb %xmm0, %xmm0 24997; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 24998; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 24999; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 25000; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25001; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25002; BITALG-NEXT: retq 25003 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 25004 %3 = icmp ult <2 x i64> %2, <i64 23, i64 23> 25005 %4 = sext <2 x i1> %3 to <2 x i64> 25006 ret <2 x i64> %4 25007} 25008 25009define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { 25010; SSE2-LABEL: ugt_23_v2i64: 25011; SSE2: # %bb.0: 25012; SSE2-NEXT: movdqa %xmm0, %xmm1 25013; SSE2-NEXT: psrlw $1, %xmm1 25014; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25015; SSE2-NEXT: psubb %xmm1, %xmm0 25016; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25017; SSE2-NEXT: movdqa %xmm0, %xmm2 25018; SSE2-NEXT: pand %xmm1, %xmm2 25019; SSE2-NEXT: psrlw $2, %xmm0 25020; SSE2-NEXT: pand %xmm1, %xmm0 25021; SSE2-NEXT: paddb %xmm2, %xmm0 25022; SSE2-NEXT: movdqa %xmm0, %xmm1 25023; SSE2-NEXT: psrlw $4, %xmm1 25024; SSE2-NEXT: paddb %xmm0, %xmm1 25025; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25026; SSE2-NEXT: pxor %xmm0, %xmm0 25027; SSE2-NEXT: psadbw %xmm1, %xmm0 25028; SSE2-NEXT: por {{.*}}(%rip), %xmm0 25029; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 25030; SSE2-NEXT: movdqa %xmm0, %xmm2 25031; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 25032; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25033; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 25034; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25035; SSE2-NEXT: pand %xmm3, %xmm1 25036; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25037; SSE2-NEXT: por %xmm1, %xmm0 25038; SSE2-NEXT: retq 25039; 25040; SSE3-LABEL: ugt_23_v2i64: 25041; SSE3: # %bb.0: 25042; SSE3-NEXT: movdqa %xmm0, %xmm1 25043; SSE3-NEXT: psrlw $1, %xmm1 25044; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25045; SSE3-NEXT: psubb %xmm1, %xmm0 25046; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25047; SSE3-NEXT: movdqa %xmm0, %xmm2 25048; SSE3-NEXT: pand %xmm1, %xmm2 25049; SSE3-NEXT: psrlw $2, %xmm0 25050; SSE3-NEXT: pand %xmm1, %xmm0 25051; SSE3-NEXT: paddb %xmm2, %xmm0 25052; SSE3-NEXT: movdqa %xmm0, %xmm1 25053; SSE3-NEXT: psrlw $4, %xmm1 25054; SSE3-NEXT: paddb %xmm0, %xmm1 25055; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25056; SSE3-NEXT: pxor %xmm0, %xmm0 25057; SSE3-NEXT: psadbw %xmm1, %xmm0 25058; SSE3-NEXT: por {{.*}}(%rip), %xmm0 25059; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 25060; SSE3-NEXT: movdqa %xmm0, %xmm2 25061; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 25062; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25063; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 25064; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25065; SSE3-NEXT: pand %xmm3, %xmm1 25066; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25067; SSE3-NEXT: por %xmm1, %xmm0 25068; SSE3-NEXT: retq 25069; 25070; SSSE3-LABEL: ugt_23_v2i64: 25071; SSSE3: # %bb.0: 25072; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25073; SSSE3-NEXT: movdqa %xmm0, %xmm2 25074; SSSE3-NEXT: pand %xmm1, %xmm2 25075; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25076; SSSE3-NEXT: movdqa %xmm3, %xmm4 25077; SSSE3-NEXT: pshufb %xmm2, %xmm4 25078; SSSE3-NEXT: psrlw $4, %xmm0 25079; SSSE3-NEXT: pand %xmm1, %xmm0 25080; SSSE3-NEXT: pshufb %xmm0, %xmm3 25081; SSSE3-NEXT: paddb %xmm4, %xmm3 25082; SSSE3-NEXT: pxor %xmm0, %xmm0 25083; SSSE3-NEXT: psadbw %xmm3, %xmm0 25084; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 25085; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 25086; SSSE3-NEXT: movdqa %xmm0, %xmm2 25087; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 25088; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25089; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 25090; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25091; SSSE3-NEXT: pand %xmm3, %xmm1 25092; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25093; SSSE3-NEXT: por %xmm1, %xmm0 25094; SSSE3-NEXT: retq 25095; 25096; SSE41-LABEL: ugt_23_v2i64: 25097; SSE41: # %bb.0: 25098; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25099; SSE41-NEXT: movdqa %xmm0, %xmm2 25100; SSE41-NEXT: pand %xmm1, %xmm2 25101; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25102; SSE41-NEXT: movdqa %xmm3, %xmm4 25103; SSE41-NEXT: pshufb %xmm2, %xmm4 25104; SSE41-NEXT: psrlw $4, %xmm0 25105; SSE41-NEXT: pand %xmm1, %xmm0 25106; SSE41-NEXT: pshufb %xmm0, %xmm3 25107; SSE41-NEXT: paddb %xmm4, %xmm3 25108; SSE41-NEXT: pxor %xmm0, %xmm0 25109; SSE41-NEXT: psadbw %xmm3, %xmm0 25110; SSE41-NEXT: por {{.*}}(%rip), %xmm0 25111; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] 25112; SSE41-NEXT: movdqa %xmm0, %xmm2 25113; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 25114; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25115; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 25116; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25117; SSE41-NEXT: pand %xmm3, %xmm1 25118; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25119; SSE41-NEXT: por %xmm1, %xmm0 25120; SSE41-NEXT: retq 25121; 25122; AVX1-LABEL: ugt_23_v2i64: 25123; AVX1: # %bb.0: 25124; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25125; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 25126; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25127; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25128; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 25129; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 25130; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25131; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25132; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 25133; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25134; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25135; AVX1-NEXT: retq 25136; 25137; AVX2-LABEL: ugt_23_v2i64: 25138; AVX2: # %bb.0: 25139; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25140; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 25141; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25142; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25143; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 25144; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 25145; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25146; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25147; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 25148; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25149; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25150; AVX2-NEXT: retq 25151; 25152; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64: 25153; AVX512VPOPCNTDQ: # %bb.0: 25154; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25155; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 25156; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25157; AVX512VPOPCNTDQ-NEXT: vzeroupper 25158; AVX512VPOPCNTDQ-NEXT: retq 25159; 25160; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64: 25161; AVX512VPOPCNTDQVL: # %bb.0: 25162; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 25163; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 25164; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25165; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25166; AVX512VPOPCNTDQVL-NEXT: retq 25167; 25168; BITALG_NOVLX-LABEL: ugt_23_v2i64: 25169; BITALG_NOVLX: # %bb.0: 25170; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25171; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 25172; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 25173; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25174; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25175; BITALG_NOVLX-NEXT: vzeroupper 25176; BITALG_NOVLX-NEXT: retq 25177; 25178; BITALG-LABEL: ugt_23_v2i64: 25179; BITALG: # %bb.0: 25180; BITALG-NEXT: vpopcntb %xmm0, %xmm0 25181; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 25182; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25183; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 25184; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25185; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25186; BITALG-NEXT: retq 25187 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 25188 %3 = icmp ugt <2 x i64> %2, <i64 23, i64 23> 25189 %4 = sext <2 x i1> %3 to <2 x i64> 25190 ret <2 x i64> %4 25191} 25192 25193define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { 25194; SSE2-LABEL: ult_24_v2i64: 25195; SSE2: # %bb.0: 25196; SSE2-NEXT: movdqa %xmm0, %xmm1 25197; SSE2-NEXT: psrlw $1, %xmm1 25198; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25199; SSE2-NEXT: psubb %xmm1, %xmm0 25200; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25201; SSE2-NEXT: movdqa %xmm0, %xmm2 25202; SSE2-NEXT: pand %xmm1, %xmm2 25203; SSE2-NEXT: psrlw $2, %xmm0 25204; SSE2-NEXT: pand %xmm1, %xmm0 25205; SSE2-NEXT: paddb %xmm2, %xmm0 25206; SSE2-NEXT: movdqa %xmm0, %xmm1 25207; SSE2-NEXT: psrlw $4, %xmm1 25208; SSE2-NEXT: paddb %xmm0, %xmm1 25209; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25210; SSE2-NEXT: pxor %xmm0, %xmm0 25211; SSE2-NEXT: psadbw %xmm1, %xmm0 25212; SSE2-NEXT: por {{.*}}(%rip), %xmm0 25213; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25214; SSE2-NEXT: movdqa %xmm1, %xmm2 25215; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 25216; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25217; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 25218; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25219; SSE2-NEXT: pand %xmm3, %xmm1 25220; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25221; SSE2-NEXT: por %xmm1, %xmm0 25222; SSE2-NEXT: retq 25223; 25224; SSE3-LABEL: ult_24_v2i64: 25225; SSE3: # %bb.0: 25226; SSE3-NEXT: movdqa %xmm0, %xmm1 25227; SSE3-NEXT: psrlw $1, %xmm1 25228; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25229; SSE3-NEXT: psubb %xmm1, %xmm0 25230; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25231; SSE3-NEXT: movdqa %xmm0, %xmm2 25232; SSE3-NEXT: pand %xmm1, %xmm2 25233; SSE3-NEXT: psrlw $2, %xmm0 25234; SSE3-NEXT: pand %xmm1, %xmm0 25235; SSE3-NEXT: paddb %xmm2, %xmm0 25236; SSE3-NEXT: movdqa %xmm0, %xmm1 25237; SSE3-NEXT: psrlw $4, %xmm1 25238; SSE3-NEXT: paddb %xmm0, %xmm1 25239; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25240; SSE3-NEXT: pxor %xmm0, %xmm0 25241; SSE3-NEXT: psadbw %xmm1, %xmm0 25242; SSE3-NEXT: por {{.*}}(%rip), %xmm0 25243; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25244; SSE3-NEXT: movdqa %xmm1, %xmm2 25245; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 25246; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25247; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 25248; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25249; SSE3-NEXT: pand %xmm3, %xmm1 25250; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25251; SSE3-NEXT: por %xmm1, %xmm0 25252; SSE3-NEXT: retq 25253; 25254; SSSE3-LABEL: ult_24_v2i64: 25255; SSSE3: # %bb.0: 25256; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25257; SSSE3-NEXT: movdqa %xmm0, %xmm2 25258; SSSE3-NEXT: pand %xmm1, %xmm2 25259; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25260; SSSE3-NEXT: movdqa %xmm3, %xmm4 25261; SSSE3-NEXT: pshufb %xmm2, %xmm4 25262; SSSE3-NEXT: psrlw $4, %xmm0 25263; SSSE3-NEXT: pand %xmm1, %xmm0 25264; SSSE3-NEXT: pshufb %xmm0, %xmm3 25265; SSSE3-NEXT: paddb %xmm4, %xmm3 25266; SSSE3-NEXT: pxor %xmm0, %xmm0 25267; SSSE3-NEXT: psadbw %xmm3, %xmm0 25268; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 25269; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25270; SSSE3-NEXT: movdqa %xmm1, %xmm2 25271; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 25272; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25273; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 25274; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25275; SSSE3-NEXT: pand %xmm3, %xmm1 25276; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25277; SSSE3-NEXT: por %xmm1, %xmm0 25278; SSSE3-NEXT: retq 25279; 25280; SSE41-LABEL: ult_24_v2i64: 25281; SSE41: # %bb.0: 25282; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25283; SSE41-NEXT: movdqa %xmm0, %xmm2 25284; SSE41-NEXT: pand %xmm1, %xmm2 25285; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25286; SSE41-NEXT: movdqa %xmm3, %xmm4 25287; SSE41-NEXT: pshufb %xmm2, %xmm4 25288; SSE41-NEXT: psrlw $4, %xmm0 25289; SSE41-NEXT: pand %xmm1, %xmm0 25290; SSE41-NEXT: pshufb %xmm0, %xmm3 25291; SSE41-NEXT: paddb %xmm4, %xmm3 25292; SSE41-NEXT: pxor %xmm0, %xmm0 25293; SSE41-NEXT: psadbw %xmm3, %xmm0 25294; SSE41-NEXT: por {{.*}}(%rip), %xmm0 25295; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25296; SSE41-NEXT: movdqa %xmm1, %xmm2 25297; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 25298; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25299; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 25300; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25301; SSE41-NEXT: pand %xmm3, %xmm1 25302; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25303; SSE41-NEXT: por %xmm1, %xmm0 25304; SSE41-NEXT: retq 25305; 25306; AVX1-LABEL: ult_24_v2i64: 25307; AVX1: # %bb.0: 25308; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25309; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 25310; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25311; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25312; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 25313; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 25314; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25315; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25316; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 25317; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25318; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] 25319; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25320; AVX1-NEXT: retq 25321; 25322; AVX2-LABEL: ult_24_v2i64: 25323; AVX2: # %bb.0: 25324; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25325; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 25326; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25327; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25328; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 25329; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 25330; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25331; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25332; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 25333; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25334; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] 25335; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25336; AVX2-NEXT: retq 25337; 25338; AVX512VPOPCNTDQ-LABEL: ult_24_v2i64: 25339; AVX512VPOPCNTDQ: # %bb.0: 25340; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25341; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 25342; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] 25343; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25344; AVX512VPOPCNTDQ-NEXT: vzeroupper 25345; AVX512VPOPCNTDQ-NEXT: retq 25346; 25347; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64: 25348; AVX512VPOPCNTDQVL: # %bb.0: 25349; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 25350; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 25351; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25352; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25353; AVX512VPOPCNTDQVL-NEXT: retq 25354; 25355; BITALG_NOVLX-LABEL: ult_24_v2i64: 25356; BITALG_NOVLX: # %bb.0: 25357; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25358; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 25359; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 25360; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25361; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] 25362; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25363; BITALG_NOVLX-NEXT: vzeroupper 25364; BITALG_NOVLX-NEXT: retq 25365; 25366; BITALG-LABEL: ult_24_v2i64: 25367; BITALG: # %bb.0: 25368; BITALG-NEXT: vpopcntb %xmm0, %xmm0 25369; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 25370; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25371; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 25372; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25373; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25374; BITALG-NEXT: retq 25375 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 25376 %3 = icmp ult <2 x i64> %2, <i64 24, i64 24> 25377 %4 = sext <2 x i1> %3 to <2 x i64> 25378 ret <2 x i64> %4 25379} 25380 25381define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { 25382; SSE2-LABEL: ugt_24_v2i64: 25383; SSE2: # %bb.0: 25384; SSE2-NEXT: movdqa %xmm0, %xmm1 25385; SSE2-NEXT: psrlw $1, %xmm1 25386; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25387; SSE2-NEXT: psubb %xmm1, %xmm0 25388; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25389; SSE2-NEXT: movdqa %xmm0, %xmm2 25390; SSE2-NEXT: pand %xmm1, %xmm2 25391; SSE2-NEXT: psrlw $2, %xmm0 25392; SSE2-NEXT: pand %xmm1, %xmm0 25393; SSE2-NEXT: paddb %xmm2, %xmm0 25394; SSE2-NEXT: movdqa %xmm0, %xmm1 25395; SSE2-NEXT: psrlw $4, %xmm1 25396; SSE2-NEXT: paddb %xmm0, %xmm1 25397; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25398; SSE2-NEXT: pxor %xmm0, %xmm0 25399; SSE2-NEXT: psadbw %xmm1, %xmm0 25400; SSE2-NEXT: por {{.*}}(%rip), %xmm0 25401; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25402; SSE2-NEXT: movdqa %xmm0, %xmm2 25403; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 25404; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25405; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 25406; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25407; SSE2-NEXT: pand %xmm3, %xmm1 25408; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25409; SSE2-NEXT: por %xmm1, %xmm0 25410; SSE2-NEXT: retq 25411; 25412; SSE3-LABEL: ugt_24_v2i64: 25413; SSE3: # %bb.0: 25414; SSE3-NEXT: movdqa %xmm0, %xmm1 25415; SSE3-NEXT: psrlw $1, %xmm1 25416; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25417; SSE3-NEXT: psubb %xmm1, %xmm0 25418; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25419; SSE3-NEXT: movdqa %xmm0, %xmm2 25420; SSE3-NEXT: pand %xmm1, %xmm2 25421; SSE3-NEXT: psrlw $2, %xmm0 25422; SSE3-NEXT: pand %xmm1, %xmm0 25423; SSE3-NEXT: paddb %xmm2, %xmm0 25424; SSE3-NEXT: movdqa %xmm0, %xmm1 25425; SSE3-NEXT: psrlw $4, %xmm1 25426; SSE3-NEXT: paddb %xmm0, %xmm1 25427; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25428; SSE3-NEXT: pxor %xmm0, %xmm0 25429; SSE3-NEXT: psadbw %xmm1, %xmm0 25430; SSE3-NEXT: por {{.*}}(%rip), %xmm0 25431; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25432; SSE3-NEXT: movdqa %xmm0, %xmm2 25433; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 25434; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25435; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 25436; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25437; SSE3-NEXT: pand %xmm3, %xmm1 25438; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25439; SSE3-NEXT: por %xmm1, %xmm0 25440; SSE3-NEXT: retq 25441; 25442; SSSE3-LABEL: ugt_24_v2i64: 25443; SSSE3: # %bb.0: 25444; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25445; SSSE3-NEXT: movdqa %xmm0, %xmm2 25446; SSSE3-NEXT: pand %xmm1, %xmm2 25447; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25448; SSSE3-NEXT: movdqa %xmm3, %xmm4 25449; SSSE3-NEXT: pshufb %xmm2, %xmm4 25450; SSSE3-NEXT: psrlw $4, %xmm0 25451; SSSE3-NEXT: pand %xmm1, %xmm0 25452; SSSE3-NEXT: pshufb %xmm0, %xmm3 25453; SSSE3-NEXT: paddb %xmm4, %xmm3 25454; SSSE3-NEXT: pxor %xmm0, %xmm0 25455; SSSE3-NEXT: psadbw %xmm3, %xmm0 25456; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 25457; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25458; SSSE3-NEXT: movdqa %xmm0, %xmm2 25459; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 25460; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25461; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 25462; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25463; SSSE3-NEXT: pand %xmm3, %xmm1 25464; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25465; SSSE3-NEXT: por %xmm1, %xmm0 25466; SSSE3-NEXT: retq 25467; 25468; SSE41-LABEL: ugt_24_v2i64: 25469; SSE41: # %bb.0: 25470; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25471; SSE41-NEXT: movdqa %xmm0, %xmm2 25472; SSE41-NEXT: pand %xmm1, %xmm2 25473; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25474; SSE41-NEXT: movdqa %xmm3, %xmm4 25475; SSE41-NEXT: pshufb %xmm2, %xmm4 25476; SSE41-NEXT: psrlw $4, %xmm0 25477; SSE41-NEXT: pand %xmm1, %xmm0 25478; SSE41-NEXT: pshufb %xmm0, %xmm3 25479; SSE41-NEXT: paddb %xmm4, %xmm3 25480; SSE41-NEXT: pxor %xmm0, %xmm0 25481; SSE41-NEXT: psadbw %xmm3, %xmm0 25482; SSE41-NEXT: por {{.*}}(%rip), %xmm0 25483; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] 25484; SSE41-NEXT: movdqa %xmm0, %xmm2 25485; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 25486; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25487; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 25488; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25489; SSE41-NEXT: pand %xmm3, %xmm1 25490; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25491; SSE41-NEXT: por %xmm1, %xmm0 25492; SSE41-NEXT: retq 25493; 25494; AVX1-LABEL: ugt_24_v2i64: 25495; AVX1: # %bb.0: 25496; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25497; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 25498; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25499; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25500; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 25501; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 25502; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25503; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25504; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 25505; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25506; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25507; AVX1-NEXT: retq 25508; 25509; AVX2-LABEL: ugt_24_v2i64: 25510; AVX2: # %bb.0: 25511; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25512; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 25513; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25514; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25515; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 25516; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 25517; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25518; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25519; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 25520; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25521; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25522; AVX2-NEXT: retq 25523; 25524; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64: 25525; AVX512VPOPCNTDQ: # %bb.0: 25526; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25527; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 25528; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25529; AVX512VPOPCNTDQ-NEXT: vzeroupper 25530; AVX512VPOPCNTDQ-NEXT: retq 25531; 25532; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64: 25533; AVX512VPOPCNTDQVL: # %bb.0: 25534; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 25535; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 25536; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25537; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25538; AVX512VPOPCNTDQVL-NEXT: retq 25539; 25540; BITALG_NOVLX-LABEL: ugt_24_v2i64: 25541; BITALG_NOVLX: # %bb.0: 25542; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25543; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 25544; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 25545; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25546; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25547; BITALG_NOVLX-NEXT: vzeroupper 25548; BITALG_NOVLX-NEXT: retq 25549; 25550; BITALG-LABEL: ugt_24_v2i64: 25551; BITALG: # %bb.0: 25552; BITALG-NEXT: vpopcntb %xmm0, %xmm0 25553; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 25554; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25555; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 25556; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25557; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25558; BITALG-NEXT: retq 25559 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 25560 %3 = icmp ugt <2 x i64> %2, <i64 24, i64 24> 25561 %4 = sext <2 x i1> %3 to <2 x i64> 25562 ret <2 x i64> %4 25563} 25564 25565define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { 25566; SSE2-LABEL: ult_25_v2i64: 25567; SSE2: # %bb.0: 25568; SSE2-NEXT: movdqa %xmm0, %xmm1 25569; SSE2-NEXT: psrlw $1, %xmm1 25570; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25571; SSE2-NEXT: psubb %xmm1, %xmm0 25572; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25573; SSE2-NEXT: movdqa %xmm0, %xmm2 25574; SSE2-NEXT: pand %xmm1, %xmm2 25575; SSE2-NEXT: psrlw $2, %xmm0 25576; SSE2-NEXT: pand %xmm1, %xmm0 25577; SSE2-NEXT: paddb %xmm2, %xmm0 25578; SSE2-NEXT: movdqa %xmm0, %xmm1 25579; SSE2-NEXT: psrlw $4, %xmm1 25580; SSE2-NEXT: paddb %xmm0, %xmm1 25581; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25582; SSE2-NEXT: pxor %xmm0, %xmm0 25583; SSE2-NEXT: psadbw %xmm1, %xmm0 25584; SSE2-NEXT: por {{.*}}(%rip), %xmm0 25585; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25586; SSE2-NEXT: movdqa %xmm1, %xmm2 25587; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 25588; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25589; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 25590; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25591; SSE2-NEXT: pand %xmm3, %xmm1 25592; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25593; SSE2-NEXT: por %xmm1, %xmm0 25594; SSE2-NEXT: retq 25595; 25596; SSE3-LABEL: ult_25_v2i64: 25597; SSE3: # %bb.0: 25598; SSE3-NEXT: movdqa %xmm0, %xmm1 25599; SSE3-NEXT: psrlw $1, %xmm1 25600; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25601; SSE3-NEXT: psubb %xmm1, %xmm0 25602; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25603; SSE3-NEXT: movdqa %xmm0, %xmm2 25604; SSE3-NEXT: pand %xmm1, %xmm2 25605; SSE3-NEXT: psrlw $2, %xmm0 25606; SSE3-NEXT: pand %xmm1, %xmm0 25607; SSE3-NEXT: paddb %xmm2, %xmm0 25608; SSE3-NEXT: movdqa %xmm0, %xmm1 25609; SSE3-NEXT: psrlw $4, %xmm1 25610; SSE3-NEXT: paddb %xmm0, %xmm1 25611; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25612; SSE3-NEXT: pxor %xmm0, %xmm0 25613; SSE3-NEXT: psadbw %xmm1, %xmm0 25614; SSE3-NEXT: por {{.*}}(%rip), %xmm0 25615; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25616; SSE3-NEXT: movdqa %xmm1, %xmm2 25617; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 25618; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25619; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 25620; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25621; SSE3-NEXT: pand %xmm3, %xmm1 25622; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25623; SSE3-NEXT: por %xmm1, %xmm0 25624; SSE3-NEXT: retq 25625; 25626; SSSE3-LABEL: ult_25_v2i64: 25627; SSSE3: # %bb.0: 25628; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25629; SSSE3-NEXT: movdqa %xmm0, %xmm2 25630; SSSE3-NEXT: pand %xmm1, %xmm2 25631; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25632; SSSE3-NEXT: movdqa %xmm3, %xmm4 25633; SSSE3-NEXT: pshufb %xmm2, %xmm4 25634; SSSE3-NEXT: psrlw $4, %xmm0 25635; SSSE3-NEXT: pand %xmm1, %xmm0 25636; SSSE3-NEXT: pshufb %xmm0, %xmm3 25637; SSSE3-NEXT: paddb %xmm4, %xmm3 25638; SSSE3-NEXT: pxor %xmm0, %xmm0 25639; SSSE3-NEXT: psadbw %xmm3, %xmm0 25640; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 25641; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25642; SSSE3-NEXT: movdqa %xmm1, %xmm2 25643; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 25644; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25645; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 25646; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25647; SSSE3-NEXT: pand %xmm3, %xmm1 25648; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25649; SSSE3-NEXT: por %xmm1, %xmm0 25650; SSSE3-NEXT: retq 25651; 25652; SSE41-LABEL: ult_25_v2i64: 25653; SSE41: # %bb.0: 25654; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25655; SSE41-NEXT: movdqa %xmm0, %xmm2 25656; SSE41-NEXT: pand %xmm1, %xmm2 25657; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25658; SSE41-NEXT: movdqa %xmm3, %xmm4 25659; SSE41-NEXT: pshufb %xmm2, %xmm4 25660; SSE41-NEXT: psrlw $4, %xmm0 25661; SSE41-NEXT: pand %xmm1, %xmm0 25662; SSE41-NEXT: pshufb %xmm0, %xmm3 25663; SSE41-NEXT: paddb %xmm4, %xmm3 25664; SSE41-NEXT: pxor %xmm0, %xmm0 25665; SSE41-NEXT: psadbw %xmm3, %xmm0 25666; SSE41-NEXT: por {{.*}}(%rip), %xmm0 25667; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25668; SSE41-NEXT: movdqa %xmm1, %xmm2 25669; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 25670; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25671; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 25672; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25673; SSE41-NEXT: pand %xmm3, %xmm1 25674; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25675; SSE41-NEXT: por %xmm1, %xmm0 25676; SSE41-NEXT: retq 25677; 25678; AVX1-LABEL: ult_25_v2i64: 25679; AVX1: # %bb.0: 25680; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25681; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 25682; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25683; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25684; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 25685; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 25686; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25687; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25688; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 25689; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25690; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] 25691; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25692; AVX1-NEXT: retq 25693; 25694; AVX2-LABEL: ult_25_v2i64: 25695; AVX2: # %bb.0: 25696; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25697; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 25698; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25699; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25700; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 25701; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 25702; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25703; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25704; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 25705; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25706; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] 25707; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25708; AVX2-NEXT: retq 25709; 25710; AVX512VPOPCNTDQ-LABEL: ult_25_v2i64: 25711; AVX512VPOPCNTDQ: # %bb.0: 25712; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25713; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 25714; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] 25715; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25716; AVX512VPOPCNTDQ-NEXT: vzeroupper 25717; AVX512VPOPCNTDQ-NEXT: retq 25718; 25719; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64: 25720; AVX512VPOPCNTDQVL: # %bb.0: 25721; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 25722; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 25723; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25724; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25725; AVX512VPOPCNTDQVL-NEXT: retq 25726; 25727; BITALG_NOVLX-LABEL: ult_25_v2i64: 25728; BITALG_NOVLX: # %bb.0: 25729; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25730; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 25731; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 25732; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25733; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] 25734; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 25735; BITALG_NOVLX-NEXT: vzeroupper 25736; BITALG_NOVLX-NEXT: retq 25737; 25738; BITALG-LABEL: ult_25_v2i64: 25739; BITALG: # %bb.0: 25740; BITALG-NEXT: vpopcntb %xmm0, %xmm0 25741; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 25742; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25743; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 25744; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25745; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25746; BITALG-NEXT: retq 25747 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 25748 %3 = icmp ult <2 x i64> %2, <i64 25, i64 25> 25749 %4 = sext <2 x i1> %3 to <2 x i64> 25750 ret <2 x i64> %4 25751} 25752 25753define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { 25754; SSE2-LABEL: ugt_25_v2i64: 25755; SSE2: # %bb.0: 25756; SSE2-NEXT: movdqa %xmm0, %xmm1 25757; SSE2-NEXT: psrlw $1, %xmm1 25758; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25759; SSE2-NEXT: psubb %xmm1, %xmm0 25760; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25761; SSE2-NEXT: movdqa %xmm0, %xmm2 25762; SSE2-NEXT: pand %xmm1, %xmm2 25763; SSE2-NEXT: psrlw $2, %xmm0 25764; SSE2-NEXT: pand %xmm1, %xmm0 25765; SSE2-NEXT: paddb %xmm2, %xmm0 25766; SSE2-NEXT: movdqa %xmm0, %xmm1 25767; SSE2-NEXT: psrlw $4, %xmm1 25768; SSE2-NEXT: paddb %xmm0, %xmm1 25769; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25770; SSE2-NEXT: pxor %xmm0, %xmm0 25771; SSE2-NEXT: psadbw %xmm1, %xmm0 25772; SSE2-NEXT: por {{.*}}(%rip), %xmm0 25773; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25774; SSE2-NEXT: movdqa %xmm0, %xmm2 25775; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 25776; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25777; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 25778; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25779; SSE2-NEXT: pand %xmm3, %xmm1 25780; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25781; SSE2-NEXT: por %xmm1, %xmm0 25782; SSE2-NEXT: retq 25783; 25784; SSE3-LABEL: ugt_25_v2i64: 25785; SSE3: # %bb.0: 25786; SSE3-NEXT: movdqa %xmm0, %xmm1 25787; SSE3-NEXT: psrlw $1, %xmm1 25788; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25789; SSE3-NEXT: psubb %xmm1, %xmm0 25790; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25791; SSE3-NEXT: movdqa %xmm0, %xmm2 25792; SSE3-NEXT: pand %xmm1, %xmm2 25793; SSE3-NEXT: psrlw $2, %xmm0 25794; SSE3-NEXT: pand %xmm1, %xmm0 25795; SSE3-NEXT: paddb %xmm2, %xmm0 25796; SSE3-NEXT: movdqa %xmm0, %xmm1 25797; SSE3-NEXT: psrlw $4, %xmm1 25798; SSE3-NEXT: paddb %xmm0, %xmm1 25799; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25800; SSE3-NEXT: pxor %xmm0, %xmm0 25801; SSE3-NEXT: psadbw %xmm1, %xmm0 25802; SSE3-NEXT: por {{.*}}(%rip), %xmm0 25803; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25804; SSE3-NEXT: movdqa %xmm0, %xmm2 25805; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 25806; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25807; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 25808; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25809; SSE3-NEXT: pand %xmm3, %xmm1 25810; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25811; SSE3-NEXT: por %xmm1, %xmm0 25812; SSE3-NEXT: retq 25813; 25814; SSSE3-LABEL: ugt_25_v2i64: 25815; SSSE3: # %bb.0: 25816; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25817; SSSE3-NEXT: movdqa %xmm0, %xmm2 25818; SSSE3-NEXT: pand %xmm1, %xmm2 25819; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25820; SSSE3-NEXT: movdqa %xmm3, %xmm4 25821; SSSE3-NEXT: pshufb %xmm2, %xmm4 25822; SSSE3-NEXT: psrlw $4, %xmm0 25823; SSSE3-NEXT: pand %xmm1, %xmm0 25824; SSSE3-NEXT: pshufb %xmm0, %xmm3 25825; SSSE3-NEXT: paddb %xmm4, %xmm3 25826; SSSE3-NEXT: pxor %xmm0, %xmm0 25827; SSSE3-NEXT: psadbw %xmm3, %xmm0 25828; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 25829; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25830; SSSE3-NEXT: movdqa %xmm0, %xmm2 25831; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 25832; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25833; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 25834; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25835; SSSE3-NEXT: pand %xmm3, %xmm1 25836; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25837; SSSE3-NEXT: por %xmm1, %xmm0 25838; SSSE3-NEXT: retq 25839; 25840; SSE41-LABEL: ugt_25_v2i64: 25841; SSE41: # %bb.0: 25842; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25843; SSE41-NEXT: movdqa %xmm0, %xmm2 25844; SSE41-NEXT: pand %xmm1, %xmm2 25845; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25846; SSE41-NEXT: movdqa %xmm3, %xmm4 25847; SSE41-NEXT: pshufb %xmm2, %xmm4 25848; SSE41-NEXT: psrlw $4, %xmm0 25849; SSE41-NEXT: pand %xmm1, %xmm0 25850; SSE41-NEXT: pshufb %xmm0, %xmm3 25851; SSE41-NEXT: paddb %xmm4, %xmm3 25852; SSE41-NEXT: pxor %xmm0, %xmm0 25853; SSE41-NEXT: psadbw %xmm3, %xmm0 25854; SSE41-NEXT: por {{.*}}(%rip), %xmm0 25855; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] 25856; SSE41-NEXT: movdqa %xmm0, %xmm2 25857; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 25858; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25859; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 25860; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25861; SSE41-NEXT: pand %xmm3, %xmm1 25862; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25863; SSE41-NEXT: por %xmm1, %xmm0 25864; SSE41-NEXT: retq 25865; 25866; AVX1-LABEL: ugt_25_v2i64: 25867; AVX1: # %bb.0: 25868; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25869; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 25870; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25871; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25872; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 25873; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 25874; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25875; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25876; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 25877; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25878; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25879; AVX1-NEXT: retq 25880; 25881; AVX2-LABEL: ugt_25_v2i64: 25882; AVX2: # %bb.0: 25883; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 25884; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 25885; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 25886; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 25887; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 25888; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 25889; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 25890; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 25891; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 25892; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25893; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25894; AVX2-NEXT: retq 25895; 25896; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64: 25897; AVX512VPOPCNTDQ: # %bb.0: 25898; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25899; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 25900; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25901; AVX512VPOPCNTDQ-NEXT: vzeroupper 25902; AVX512VPOPCNTDQ-NEXT: retq 25903; 25904; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64: 25905; AVX512VPOPCNTDQVL: # %bb.0: 25906; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 25907; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 25908; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25909; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25910; AVX512VPOPCNTDQVL-NEXT: retq 25911; 25912; BITALG_NOVLX-LABEL: ugt_25_v2i64: 25913; BITALG_NOVLX: # %bb.0: 25914; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 25915; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 25916; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 25917; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25918; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 25919; BITALG_NOVLX-NEXT: vzeroupper 25920; BITALG_NOVLX-NEXT: retq 25921; 25922; BITALG-LABEL: ugt_25_v2i64: 25923; BITALG: # %bb.0: 25924; BITALG-NEXT: vpopcntb %xmm0, %xmm0 25925; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 25926; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 25927; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 25928; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 25929; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 25930; BITALG-NEXT: retq 25931 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 25932 %3 = icmp ugt <2 x i64> %2, <i64 25, i64 25> 25933 %4 = sext <2 x i1> %3 to <2 x i64> 25934 ret <2 x i64> %4 25935} 25936 25937define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { 25938; SSE2-LABEL: ult_26_v2i64: 25939; SSE2: # %bb.0: 25940; SSE2-NEXT: movdqa %xmm0, %xmm1 25941; SSE2-NEXT: psrlw $1, %xmm1 25942; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25943; SSE2-NEXT: psubb %xmm1, %xmm0 25944; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25945; SSE2-NEXT: movdqa %xmm0, %xmm2 25946; SSE2-NEXT: pand %xmm1, %xmm2 25947; SSE2-NEXT: psrlw $2, %xmm0 25948; SSE2-NEXT: pand %xmm1, %xmm0 25949; SSE2-NEXT: paddb %xmm2, %xmm0 25950; SSE2-NEXT: movdqa %xmm0, %xmm1 25951; SSE2-NEXT: psrlw $4, %xmm1 25952; SSE2-NEXT: paddb %xmm0, %xmm1 25953; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 25954; SSE2-NEXT: pxor %xmm0, %xmm0 25955; SSE2-NEXT: psadbw %xmm1, %xmm0 25956; SSE2-NEXT: por {{.*}}(%rip), %xmm0 25957; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 25958; SSE2-NEXT: movdqa %xmm1, %xmm2 25959; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 25960; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25961; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 25962; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25963; SSE2-NEXT: pand %xmm3, %xmm1 25964; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25965; SSE2-NEXT: por %xmm1, %xmm0 25966; SSE2-NEXT: retq 25967; 25968; SSE3-LABEL: ult_26_v2i64: 25969; SSE3: # %bb.0: 25970; SSE3-NEXT: movdqa %xmm0, %xmm1 25971; SSE3-NEXT: psrlw $1, %xmm1 25972; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25973; SSE3-NEXT: psubb %xmm1, %xmm0 25974; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 25975; SSE3-NEXT: movdqa %xmm0, %xmm2 25976; SSE3-NEXT: pand %xmm1, %xmm2 25977; SSE3-NEXT: psrlw $2, %xmm0 25978; SSE3-NEXT: pand %xmm1, %xmm0 25979; SSE3-NEXT: paddb %xmm2, %xmm0 25980; SSE3-NEXT: movdqa %xmm0, %xmm1 25981; SSE3-NEXT: psrlw $4, %xmm1 25982; SSE3-NEXT: paddb %xmm0, %xmm1 25983; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 25984; SSE3-NEXT: pxor %xmm0, %xmm0 25985; SSE3-NEXT: psadbw %xmm1, %xmm0 25986; SSE3-NEXT: por {{.*}}(%rip), %xmm0 25987; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 25988; SSE3-NEXT: movdqa %xmm1, %xmm2 25989; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 25990; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 25991; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 25992; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 25993; SSE3-NEXT: pand %xmm3, %xmm1 25994; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 25995; SSE3-NEXT: por %xmm1, %xmm0 25996; SSE3-NEXT: retq 25997; 25998; SSSE3-LABEL: ult_26_v2i64: 25999; SSSE3: # %bb.0: 26000; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26001; SSSE3-NEXT: movdqa %xmm0, %xmm2 26002; SSSE3-NEXT: pand %xmm1, %xmm2 26003; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26004; SSSE3-NEXT: movdqa %xmm3, %xmm4 26005; SSSE3-NEXT: pshufb %xmm2, %xmm4 26006; SSSE3-NEXT: psrlw $4, %xmm0 26007; SSSE3-NEXT: pand %xmm1, %xmm0 26008; SSSE3-NEXT: pshufb %xmm0, %xmm3 26009; SSSE3-NEXT: paddb %xmm4, %xmm3 26010; SSSE3-NEXT: pxor %xmm0, %xmm0 26011; SSSE3-NEXT: psadbw %xmm3, %xmm0 26012; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 26013; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 26014; SSSE3-NEXT: movdqa %xmm1, %xmm2 26015; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 26016; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26017; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 26018; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26019; SSSE3-NEXT: pand %xmm3, %xmm1 26020; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26021; SSSE3-NEXT: por %xmm1, %xmm0 26022; SSSE3-NEXT: retq 26023; 26024; SSE41-LABEL: ult_26_v2i64: 26025; SSE41: # %bb.0: 26026; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26027; SSE41-NEXT: movdqa %xmm0, %xmm2 26028; SSE41-NEXT: pand %xmm1, %xmm2 26029; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26030; SSE41-NEXT: movdqa %xmm3, %xmm4 26031; SSE41-NEXT: pshufb %xmm2, %xmm4 26032; SSE41-NEXT: psrlw $4, %xmm0 26033; SSE41-NEXT: pand %xmm1, %xmm0 26034; SSE41-NEXT: pshufb %xmm0, %xmm3 26035; SSE41-NEXT: paddb %xmm4, %xmm3 26036; SSE41-NEXT: pxor %xmm0, %xmm0 26037; SSE41-NEXT: psadbw %xmm3, %xmm0 26038; SSE41-NEXT: por {{.*}}(%rip), %xmm0 26039; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 26040; SSE41-NEXT: movdqa %xmm1, %xmm2 26041; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 26042; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26043; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 26044; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26045; SSE41-NEXT: pand %xmm3, %xmm1 26046; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26047; SSE41-NEXT: por %xmm1, %xmm0 26048; SSE41-NEXT: retq 26049; 26050; AVX1-LABEL: ult_26_v2i64: 26051; AVX1: # %bb.0: 26052; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26053; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 26054; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26055; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26056; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26057; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 26058; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26059; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26060; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 26061; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26062; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] 26063; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26064; AVX1-NEXT: retq 26065; 26066; AVX2-LABEL: ult_26_v2i64: 26067; AVX2: # %bb.0: 26068; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26069; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 26070; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26071; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26072; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 26073; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 26074; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26075; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26076; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 26077; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26078; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] 26079; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26080; AVX2-NEXT: retq 26081; 26082; AVX512VPOPCNTDQ-LABEL: ult_26_v2i64: 26083; AVX512VPOPCNTDQ: # %bb.0: 26084; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26085; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 26086; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] 26087; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26088; AVX512VPOPCNTDQ-NEXT: vzeroupper 26089; AVX512VPOPCNTDQ-NEXT: retq 26090; 26091; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64: 26092; AVX512VPOPCNTDQVL: # %bb.0: 26093; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 26094; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 26095; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26096; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26097; AVX512VPOPCNTDQVL-NEXT: retq 26098; 26099; BITALG_NOVLX-LABEL: ult_26_v2i64: 26100; BITALG_NOVLX: # %bb.0: 26101; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26102; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 26103; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 26104; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26105; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] 26106; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26107; BITALG_NOVLX-NEXT: vzeroupper 26108; BITALG_NOVLX-NEXT: retq 26109; 26110; BITALG-LABEL: ult_26_v2i64: 26111; BITALG: # %bb.0: 26112; BITALG-NEXT: vpopcntb %xmm0, %xmm0 26113; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 26114; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26115; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 26116; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26117; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26118; BITALG-NEXT: retq 26119 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 26120 %3 = icmp ult <2 x i64> %2, <i64 26, i64 26> 26121 %4 = sext <2 x i1> %3 to <2 x i64> 26122 ret <2 x i64> %4 26123} 26124 26125define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { 26126; SSE2-LABEL: ugt_26_v2i64: 26127; SSE2: # %bb.0: 26128; SSE2-NEXT: movdqa %xmm0, %xmm1 26129; SSE2-NEXT: psrlw $1, %xmm1 26130; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26131; SSE2-NEXT: psubb %xmm1, %xmm0 26132; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26133; SSE2-NEXT: movdqa %xmm0, %xmm2 26134; SSE2-NEXT: pand %xmm1, %xmm2 26135; SSE2-NEXT: psrlw $2, %xmm0 26136; SSE2-NEXT: pand %xmm1, %xmm0 26137; SSE2-NEXT: paddb %xmm2, %xmm0 26138; SSE2-NEXT: movdqa %xmm0, %xmm1 26139; SSE2-NEXT: psrlw $4, %xmm1 26140; SSE2-NEXT: paddb %xmm0, %xmm1 26141; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26142; SSE2-NEXT: pxor %xmm0, %xmm0 26143; SSE2-NEXT: psadbw %xmm1, %xmm0 26144; SSE2-NEXT: por {{.*}}(%rip), %xmm0 26145; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 26146; SSE2-NEXT: movdqa %xmm0, %xmm2 26147; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 26148; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26149; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 26150; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26151; SSE2-NEXT: pand %xmm3, %xmm1 26152; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26153; SSE2-NEXT: por %xmm1, %xmm0 26154; SSE2-NEXT: retq 26155; 26156; SSE3-LABEL: ugt_26_v2i64: 26157; SSE3: # %bb.0: 26158; SSE3-NEXT: movdqa %xmm0, %xmm1 26159; SSE3-NEXT: psrlw $1, %xmm1 26160; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26161; SSE3-NEXT: psubb %xmm1, %xmm0 26162; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26163; SSE3-NEXT: movdqa %xmm0, %xmm2 26164; SSE3-NEXT: pand %xmm1, %xmm2 26165; SSE3-NEXT: psrlw $2, %xmm0 26166; SSE3-NEXT: pand %xmm1, %xmm0 26167; SSE3-NEXT: paddb %xmm2, %xmm0 26168; SSE3-NEXT: movdqa %xmm0, %xmm1 26169; SSE3-NEXT: psrlw $4, %xmm1 26170; SSE3-NEXT: paddb %xmm0, %xmm1 26171; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26172; SSE3-NEXT: pxor %xmm0, %xmm0 26173; SSE3-NEXT: psadbw %xmm1, %xmm0 26174; SSE3-NEXT: por {{.*}}(%rip), %xmm0 26175; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 26176; SSE3-NEXT: movdqa %xmm0, %xmm2 26177; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 26178; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26179; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 26180; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26181; SSE3-NEXT: pand %xmm3, %xmm1 26182; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26183; SSE3-NEXT: por %xmm1, %xmm0 26184; SSE3-NEXT: retq 26185; 26186; SSSE3-LABEL: ugt_26_v2i64: 26187; SSSE3: # %bb.0: 26188; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26189; SSSE3-NEXT: movdqa %xmm0, %xmm2 26190; SSSE3-NEXT: pand %xmm1, %xmm2 26191; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26192; SSSE3-NEXT: movdqa %xmm3, %xmm4 26193; SSSE3-NEXT: pshufb %xmm2, %xmm4 26194; SSSE3-NEXT: psrlw $4, %xmm0 26195; SSSE3-NEXT: pand %xmm1, %xmm0 26196; SSSE3-NEXT: pshufb %xmm0, %xmm3 26197; SSSE3-NEXT: paddb %xmm4, %xmm3 26198; SSSE3-NEXT: pxor %xmm0, %xmm0 26199; SSSE3-NEXT: psadbw %xmm3, %xmm0 26200; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 26201; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 26202; SSSE3-NEXT: movdqa %xmm0, %xmm2 26203; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 26204; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26205; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 26206; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26207; SSSE3-NEXT: pand %xmm3, %xmm1 26208; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26209; SSSE3-NEXT: por %xmm1, %xmm0 26210; SSSE3-NEXT: retq 26211; 26212; SSE41-LABEL: ugt_26_v2i64: 26213; SSE41: # %bb.0: 26214; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26215; SSE41-NEXT: movdqa %xmm0, %xmm2 26216; SSE41-NEXT: pand %xmm1, %xmm2 26217; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26218; SSE41-NEXT: movdqa %xmm3, %xmm4 26219; SSE41-NEXT: pshufb %xmm2, %xmm4 26220; SSE41-NEXT: psrlw $4, %xmm0 26221; SSE41-NEXT: pand %xmm1, %xmm0 26222; SSE41-NEXT: pshufb %xmm0, %xmm3 26223; SSE41-NEXT: paddb %xmm4, %xmm3 26224; SSE41-NEXT: pxor %xmm0, %xmm0 26225; SSE41-NEXT: psadbw %xmm3, %xmm0 26226; SSE41-NEXT: por {{.*}}(%rip), %xmm0 26227; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] 26228; SSE41-NEXT: movdqa %xmm0, %xmm2 26229; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 26230; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26231; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 26232; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26233; SSE41-NEXT: pand %xmm3, %xmm1 26234; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26235; SSE41-NEXT: por %xmm1, %xmm0 26236; SSE41-NEXT: retq 26237; 26238; AVX1-LABEL: ugt_26_v2i64: 26239; AVX1: # %bb.0: 26240; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26241; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 26242; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26243; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26244; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26245; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 26246; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26247; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26248; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 26249; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26250; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26251; AVX1-NEXT: retq 26252; 26253; AVX2-LABEL: ugt_26_v2i64: 26254; AVX2: # %bb.0: 26255; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26256; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 26257; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26258; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26259; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 26260; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 26261; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26262; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26263; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 26264; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26265; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26266; AVX2-NEXT: retq 26267; 26268; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64: 26269; AVX512VPOPCNTDQ: # %bb.0: 26270; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26271; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 26272; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26273; AVX512VPOPCNTDQ-NEXT: vzeroupper 26274; AVX512VPOPCNTDQ-NEXT: retq 26275; 26276; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64: 26277; AVX512VPOPCNTDQVL: # %bb.0: 26278; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 26279; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 26280; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26281; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26282; AVX512VPOPCNTDQVL-NEXT: retq 26283; 26284; BITALG_NOVLX-LABEL: ugt_26_v2i64: 26285; BITALG_NOVLX: # %bb.0: 26286; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26287; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 26288; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 26289; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26290; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26291; BITALG_NOVLX-NEXT: vzeroupper 26292; BITALG_NOVLX-NEXT: retq 26293; 26294; BITALG-LABEL: ugt_26_v2i64: 26295; BITALG: # %bb.0: 26296; BITALG-NEXT: vpopcntb %xmm0, %xmm0 26297; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 26298; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26299; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 26300; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26301; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26302; BITALG-NEXT: retq 26303 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 26304 %3 = icmp ugt <2 x i64> %2, <i64 26, i64 26> 26305 %4 = sext <2 x i1> %3 to <2 x i64> 26306 ret <2 x i64> %4 26307} 26308 26309define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { 26310; SSE2-LABEL: ult_27_v2i64: 26311; SSE2: # %bb.0: 26312; SSE2-NEXT: movdqa %xmm0, %xmm1 26313; SSE2-NEXT: psrlw $1, %xmm1 26314; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26315; SSE2-NEXT: psubb %xmm1, %xmm0 26316; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26317; SSE2-NEXT: movdqa %xmm0, %xmm2 26318; SSE2-NEXT: pand %xmm1, %xmm2 26319; SSE2-NEXT: psrlw $2, %xmm0 26320; SSE2-NEXT: pand %xmm1, %xmm0 26321; SSE2-NEXT: paddb %xmm2, %xmm0 26322; SSE2-NEXT: movdqa %xmm0, %xmm1 26323; SSE2-NEXT: psrlw $4, %xmm1 26324; SSE2-NEXT: paddb %xmm0, %xmm1 26325; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26326; SSE2-NEXT: pxor %xmm0, %xmm0 26327; SSE2-NEXT: psadbw %xmm1, %xmm0 26328; SSE2-NEXT: por {{.*}}(%rip), %xmm0 26329; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26330; SSE2-NEXT: movdqa %xmm1, %xmm2 26331; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 26332; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26333; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 26334; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26335; SSE2-NEXT: pand %xmm3, %xmm1 26336; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26337; SSE2-NEXT: por %xmm1, %xmm0 26338; SSE2-NEXT: retq 26339; 26340; SSE3-LABEL: ult_27_v2i64: 26341; SSE3: # %bb.0: 26342; SSE3-NEXT: movdqa %xmm0, %xmm1 26343; SSE3-NEXT: psrlw $1, %xmm1 26344; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26345; SSE3-NEXT: psubb %xmm1, %xmm0 26346; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26347; SSE3-NEXT: movdqa %xmm0, %xmm2 26348; SSE3-NEXT: pand %xmm1, %xmm2 26349; SSE3-NEXT: psrlw $2, %xmm0 26350; SSE3-NEXT: pand %xmm1, %xmm0 26351; SSE3-NEXT: paddb %xmm2, %xmm0 26352; SSE3-NEXT: movdqa %xmm0, %xmm1 26353; SSE3-NEXT: psrlw $4, %xmm1 26354; SSE3-NEXT: paddb %xmm0, %xmm1 26355; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26356; SSE3-NEXT: pxor %xmm0, %xmm0 26357; SSE3-NEXT: psadbw %xmm1, %xmm0 26358; SSE3-NEXT: por {{.*}}(%rip), %xmm0 26359; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26360; SSE3-NEXT: movdqa %xmm1, %xmm2 26361; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 26362; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26363; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 26364; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26365; SSE3-NEXT: pand %xmm3, %xmm1 26366; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26367; SSE3-NEXT: por %xmm1, %xmm0 26368; SSE3-NEXT: retq 26369; 26370; SSSE3-LABEL: ult_27_v2i64: 26371; SSSE3: # %bb.0: 26372; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26373; SSSE3-NEXT: movdqa %xmm0, %xmm2 26374; SSSE3-NEXT: pand %xmm1, %xmm2 26375; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26376; SSSE3-NEXT: movdqa %xmm3, %xmm4 26377; SSSE3-NEXT: pshufb %xmm2, %xmm4 26378; SSSE3-NEXT: psrlw $4, %xmm0 26379; SSSE3-NEXT: pand %xmm1, %xmm0 26380; SSSE3-NEXT: pshufb %xmm0, %xmm3 26381; SSSE3-NEXT: paddb %xmm4, %xmm3 26382; SSSE3-NEXT: pxor %xmm0, %xmm0 26383; SSSE3-NEXT: psadbw %xmm3, %xmm0 26384; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 26385; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26386; SSSE3-NEXT: movdqa %xmm1, %xmm2 26387; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 26388; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26389; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 26390; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26391; SSSE3-NEXT: pand %xmm3, %xmm1 26392; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26393; SSSE3-NEXT: por %xmm1, %xmm0 26394; SSSE3-NEXT: retq 26395; 26396; SSE41-LABEL: ult_27_v2i64: 26397; SSE41: # %bb.0: 26398; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26399; SSE41-NEXT: movdqa %xmm0, %xmm2 26400; SSE41-NEXT: pand %xmm1, %xmm2 26401; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26402; SSE41-NEXT: movdqa %xmm3, %xmm4 26403; SSE41-NEXT: pshufb %xmm2, %xmm4 26404; SSE41-NEXT: psrlw $4, %xmm0 26405; SSE41-NEXT: pand %xmm1, %xmm0 26406; SSE41-NEXT: pshufb %xmm0, %xmm3 26407; SSE41-NEXT: paddb %xmm4, %xmm3 26408; SSE41-NEXT: pxor %xmm0, %xmm0 26409; SSE41-NEXT: psadbw %xmm3, %xmm0 26410; SSE41-NEXT: por {{.*}}(%rip), %xmm0 26411; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26412; SSE41-NEXT: movdqa %xmm1, %xmm2 26413; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 26414; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26415; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 26416; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26417; SSE41-NEXT: pand %xmm3, %xmm1 26418; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26419; SSE41-NEXT: por %xmm1, %xmm0 26420; SSE41-NEXT: retq 26421; 26422; AVX1-LABEL: ult_27_v2i64: 26423; AVX1: # %bb.0: 26424; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26425; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 26426; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26427; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26428; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26429; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 26430; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26431; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26432; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 26433; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26434; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] 26435; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26436; AVX1-NEXT: retq 26437; 26438; AVX2-LABEL: ult_27_v2i64: 26439; AVX2: # %bb.0: 26440; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26441; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 26442; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26443; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26444; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 26445; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 26446; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26447; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26448; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 26449; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26450; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] 26451; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26452; AVX2-NEXT: retq 26453; 26454; AVX512VPOPCNTDQ-LABEL: ult_27_v2i64: 26455; AVX512VPOPCNTDQ: # %bb.0: 26456; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26457; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 26458; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] 26459; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26460; AVX512VPOPCNTDQ-NEXT: vzeroupper 26461; AVX512VPOPCNTDQ-NEXT: retq 26462; 26463; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64: 26464; AVX512VPOPCNTDQVL: # %bb.0: 26465; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 26466; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 26467; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26468; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26469; AVX512VPOPCNTDQVL-NEXT: retq 26470; 26471; BITALG_NOVLX-LABEL: ult_27_v2i64: 26472; BITALG_NOVLX: # %bb.0: 26473; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26474; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 26475; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 26476; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26477; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] 26478; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26479; BITALG_NOVLX-NEXT: vzeroupper 26480; BITALG_NOVLX-NEXT: retq 26481; 26482; BITALG-LABEL: ult_27_v2i64: 26483; BITALG: # %bb.0: 26484; BITALG-NEXT: vpopcntb %xmm0, %xmm0 26485; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 26486; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26487; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 26488; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26489; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26490; BITALG-NEXT: retq 26491 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 26492 %3 = icmp ult <2 x i64> %2, <i64 27, i64 27> 26493 %4 = sext <2 x i1> %3 to <2 x i64> 26494 ret <2 x i64> %4 26495} 26496 26497define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { 26498; SSE2-LABEL: ugt_27_v2i64: 26499; SSE2: # %bb.0: 26500; SSE2-NEXT: movdqa %xmm0, %xmm1 26501; SSE2-NEXT: psrlw $1, %xmm1 26502; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26503; SSE2-NEXT: psubb %xmm1, %xmm0 26504; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26505; SSE2-NEXT: movdqa %xmm0, %xmm2 26506; SSE2-NEXT: pand %xmm1, %xmm2 26507; SSE2-NEXT: psrlw $2, %xmm0 26508; SSE2-NEXT: pand %xmm1, %xmm0 26509; SSE2-NEXT: paddb %xmm2, %xmm0 26510; SSE2-NEXT: movdqa %xmm0, %xmm1 26511; SSE2-NEXT: psrlw $4, %xmm1 26512; SSE2-NEXT: paddb %xmm0, %xmm1 26513; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26514; SSE2-NEXT: pxor %xmm0, %xmm0 26515; SSE2-NEXT: psadbw %xmm1, %xmm0 26516; SSE2-NEXT: por {{.*}}(%rip), %xmm0 26517; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26518; SSE2-NEXT: movdqa %xmm0, %xmm2 26519; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 26520; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26521; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 26522; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26523; SSE2-NEXT: pand %xmm3, %xmm1 26524; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26525; SSE2-NEXT: por %xmm1, %xmm0 26526; SSE2-NEXT: retq 26527; 26528; SSE3-LABEL: ugt_27_v2i64: 26529; SSE3: # %bb.0: 26530; SSE3-NEXT: movdqa %xmm0, %xmm1 26531; SSE3-NEXT: psrlw $1, %xmm1 26532; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26533; SSE3-NEXT: psubb %xmm1, %xmm0 26534; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26535; SSE3-NEXT: movdqa %xmm0, %xmm2 26536; SSE3-NEXT: pand %xmm1, %xmm2 26537; SSE3-NEXT: psrlw $2, %xmm0 26538; SSE3-NEXT: pand %xmm1, %xmm0 26539; SSE3-NEXT: paddb %xmm2, %xmm0 26540; SSE3-NEXT: movdqa %xmm0, %xmm1 26541; SSE3-NEXT: psrlw $4, %xmm1 26542; SSE3-NEXT: paddb %xmm0, %xmm1 26543; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26544; SSE3-NEXT: pxor %xmm0, %xmm0 26545; SSE3-NEXT: psadbw %xmm1, %xmm0 26546; SSE3-NEXT: por {{.*}}(%rip), %xmm0 26547; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26548; SSE3-NEXT: movdqa %xmm0, %xmm2 26549; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 26550; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26551; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 26552; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26553; SSE3-NEXT: pand %xmm3, %xmm1 26554; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26555; SSE3-NEXT: por %xmm1, %xmm0 26556; SSE3-NEXT: retq 26557; 26558; SSSE3-LABEL: ugt_27_v2i64: 26559; SSSE3: # %bb.0: 26560; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26561; SSSE3-NEXT: movdqa %xmm0, %xmm2 26562; SSSE3-NEXT: pand %xmm1, %xmm2 26563; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26564; SSSE3-NEXT: movdqa %xmm3, %xmm4 26565; SSSE3-NEXT: pshufb %xmm2, %xmm4 26566; SSSE3-NEXT: psrlw $4, %xmm0 26567; SSSE3-NEXT: pand %xmm1, %xmm0 26568; SSSE3-NEXT: pshufb %xmm0, %xmm3 26569; SSSE3-NEXT: paddb %xmm4, %xmm3 26570; SSSE3-NEXT: pxor %xmm0, %xmm0 26571; SSSE3-NEXT: psadbw %xmm3, %xmm0 26572; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 26573; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26574; SSSE3-NEXT: movdqa %xmm0, %xmm2 26575; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 26576; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26577; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 26578; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26579; SSSE3-NEXT: pand %xmm3, %xmm1 26580; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26581; SSSE3-NEXT: por %xmm1, %xmm0 26582; SSSE3-NEXT: retq 26583; 26584; SSE41-LABEL: ugt_27_v2i64: 26585; SSE41: # %bb.0: 26586; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26587; SSE41-NEXT: movdqa %xmm0, %xmm2 26588; SSE41-NEXT: pand %xmm1, %xmm2 26589; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26590; SSE41-NEXT: movdqa %xmm3, %xmm4 26591; SSE41-NEXT: pshufb %xmm2, %xmm4 26592; SSE41-NEXT: psrlw $4, %xmm0 26593; SSE41-NEXT: pand %xmm1, %xmm0 26594; SSE41-NEXT: pshufb %xmm0, %xmm3 26595; SSE41-NEXT: paddb %xmm4, %xmm3 26596; SSE41-NEXT: pxor %xmm0, %xmm0 26597; SSE41-NEXT: psadbw %xmm3, %xmm0 26598; SSE41-NEXT: por {{.*}}(%rip), %xmm0 26599; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] 26600; SSE41-NEXT: movdqa %xmm0, %xmm2 26601; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 26602; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26603; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 26604; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26605; SSE41-NEXT: pand %xmm3, %xmm1 26606; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26607; SSE41-NEXT: por %xmm1, %xmm0 26608; SSE41-NEXT: retq 26609; 26610; AVX1-LABEL: ugt_27_v2i64: 26611; AVX1: # %bb.0: 26612; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26613; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 26614; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26615; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26616; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26617; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 26618; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26619; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26620; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 26621; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26622; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26623; AVX1-NEXT: retq 26624; 26625; AVX2-LABEL: ugt_27_v2i64: 26626; AVX2: # %bb.0: 26627; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26628; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 26629; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26630; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26631; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 26632; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 26633; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26634; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26635; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 26636; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26637; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26638; AVX2-NEXT: retq 26639; 26640; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64: 26641; AVX512VPOPCNTDQ: # %bb.0: 26642; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26643; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 26644; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26645; AVX512VPOPCNTDQ-NEXT: vzeroupper 26646; AVX512VPOPCNTDQ-NEXT: retq 26647; 26648; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64: 26649; AVX512VPOPCNTDQVL: # %bb.0: 26650; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 26651; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 26652; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26653; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26654; AVX512VPOPCNTDQVL-NEXT: retq 26655; 26656; BITALG_NOVLX-LABEL: ugt_27_v2i64: 26657; BITALG_NOVLX: # %bb.0: 26658; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26659; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 26660; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 26661; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26662; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26663; BITALG_NOVLX-NEXT: vzeroupper 26664; BITALG_NOVLX-NEXT: retq 26665; 26666; BITALG-LABEL: ugt_27_v2i64: 26667; BITALG: # %bb.0: 26668; BITALG-NEXT: vpopcntb %xmm0, %xmm0 26669; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 26670; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26671; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 26672; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26673; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26674; BITALG-NEXT: retq 26675 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 26676 %3 = icmp ugt <2 x i64> %2, <i64 27, i64 27> 26677 %4 = sext <2 x i1> %3 to <2 x i64> 26678 ret <2 x i64> %4 26679} 26680 26681define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { 26682; SSE2-LABEL: ult_28_v2i64: 26683; SSE2: # %bb.0: 26684; SSE2-NEXT: movdqa %xmm0, %xmm1 26685; SSE2-NEXT: psrlw $1, %xmm1 26686; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26687; SSE2-NEXT: psubb %xmm1, %xmm0 26688; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26689; SSE2-NEXT: movdqa %xmm0, %xmm2 26690; SSE2-NEXT: pand %xmm1, %xmm2 26691; SSE2-NEXT: psrlw $2, %xmm0 26692; SSE2-NEXT: pand %xmm1, %xmm0 26693; SSE2-NEXT: paddb %xmm2, %xmm0 26694; SSE2-NEXT: movdqa %xmm0, %xmm1 26695; SSE2-NEXT: psrlw $4, %xmm1 26696; SSE2-NEXT: paddb %xmm0, %xmm1 26697; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26698; SSE2-NEXT: pxor %xmm0, %xmm0 26699; SSE2-NEXT: psadbw %xmm1, %xmm0 26700; SSE2-NEXT: por {{.*}}(%rip), %xmm0 26701; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26702; SSE2-NEXT: movdqa %xmm1, %xmm2 26703; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 26704; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26705; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 26706; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26707; SSE2-NEXT: pand %xmm3, %xmm1 26708; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26709; SSE2-NEXT: por %xmm1, %xmm0 26710; SSE2-NEXT: retq 26711; 26712; SSE3-LABEL: ult_28_v2i64: 26713; SSE3: # %bb.0: 26714; SSE3-NEXT: movdqa %xmm0, %xmm1 26715; SSE3-NEXT: psrlw $1, %xmm1 26716; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26717; SSE3-NEXT: psubb %xmm1, %xmm0 26718; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26719; SSE3-NEXT: movdqa %xmm0, %xmm2 26720; SSE3-NEXT: pand %xmm1, %xmm2 26721; SSE3-NEXT: psrlw $2, %xmm0 26722; SSE3-NEXT: pand %xmm1, %xmm0 26723; SSE3-NEXT: paddb %xmm2, %xmm0 26724; SSE3-NEXT: movdqa %xmm0, %xmm1 26725; SSE3-NEXT: psrlw $4, %xmm1 26726; SSE3-NEXT: paddb %xmm0, %xmm1 26727; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26728; SSE3-NEXT: pxor %xmm0, %xmm0 26729; SSE3-NEXT: psadbw %xmm1, %xmm0 26730; SSE3-NEXT: por {{.*}}(%rip), %xmm0 26731; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26732; SSE3-NEXT: movdqa %xmm1, %xmm2 26733; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 26734; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26735; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 26736; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26737; SSE3-NEXT: pand %xmm3, %xmm1 26738; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26739; SSE3-NEXT: por %xmm1, %xmm0 26740; SSE3-NEXT: retq 26741; 26742; SSSE3-LABEL: ult_28_v2i64: 26743; SSSE3: # %bb.0: 26744; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26745; SSSE3-NEXT: movdqa %xmm0, %xmm2 26746; SSSE3-NEXT: pand %xmm1, %xmm2 26747; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26748; SSSE3-NEXT: movdqa %xmm3, %xmm4 26749; SSSE3-NEXT: pshufb %xmm2, %xmm4 26750; SSSE3-NEXT: psrlw $4, %xmm0 26751; SSSE3-NEXT: pand %xmm1, %xmm0 26752; SSSE3-NEXT: pshufb %xmm0, %xmm3 26753; SSSE3-NEXT: paddb %xmm4, %xmm3 26754; SSSE3-NEXT: pxor %xmm0, %xmm0 26755; SSSE3-NEXT: psadbw %xmm3, %xmm0 26756; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 26757; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26758; SSSE3-NEXT: movdqa %xmm1, %xmm2 26759; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 26760; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26761; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 26762; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26763; SSSE3-NEXT: pand %xmm3, %xmm1 26764; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26765; SSSE3-NEXT: por %xmm1, %xmm0 26766; SSSE3-NEXT: retq 26767; 26768; SSE41-LABEL: ult_28_v2i64: 26769; SSE41: # %bb.0: 26770; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26771; SSE41-NEXT: movdqa %xmm0, %xmm2 26772; SSE41-NEXT: pand %xmm1, %xmm2 26773; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26774; SSE41-NEXT: movdqa %xmm3, %xmm4 26775; SSE41-NEXT: pshufb %xmm2, %xmm4 26776; SSE41-NEXT: psrlw $4, %xmm0 26777; SSE41-NEXT: pand %xmm1, %xmm0 26778; SSE41-NEXT: pshufb %xmm0, %xmm3 26779; SSE41-NEXT: paddb %xmm4, %xmm3 26780; SSE41-NEXT: pxor %xmm0, %xmm0 26781; SSE41-NEXT: psadbw %xmm3, %xmm0 26782; SSE41-NEXT: por {{.*}}(%rip), %xmm0 26783; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26784; SSE41-NEXT: movdqa %xmm1, %xmm2 26785; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 26786; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26787; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 26788; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26789; SSE41-NEXT: pand %xmm3, %xmm1 26790; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26791; SSE41-NEXT: por %xmm1, %xmm0 26792; SSE41-NEXT: retq 26793; 26794; AVX1-LABEL: ult_28_v2i64: 26795; AVX1: # %bb.0: 26796; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26797; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 26798; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26799; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26800; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26801; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 26802; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26803; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26804; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 26805; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26806; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] 26807; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26808; AVX1-NEXT: retq 26809; 26810; AVX2-LABEL: ult_28_v2i64: 26811; AVX2: # %bb.0: 26812; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26813; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 26814; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26815; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26816; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 26817; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 26818; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26819; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26820; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 26821; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26822; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] 26823; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26824; AVX2-NEXT: retq 26825; 26826; AVX512VPOPCNTDQ-LABEL: ult_28_v2i64: 26827; AVX512VPOPCNTDQ: # %bb.0: 26828; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26829; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 26830; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] 26831; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26832; AVX512VPOPCNTDQ-NEXT: vzeroupper 26833; AVX512VPOPCNTDQ-NEXT: retq 26834; 26835; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64: 26836; AVX512VPOPCNTDQVL: # %bb.0: 26837; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 26838; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 26839; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26840; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26841; AVX512VPOPCNTDQVL-NEXT: retq 26842; 26843; BITALG_NOVLX-LABEL: ult_28_v2i64: 26844; BITALG_NOVLX: # %bb.0: 26845; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 26846; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 26847; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 26848; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26849; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] 26850; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 26851; BITALG_NOVLX-NEXT: vzeroupper 26852; BITALG_NOVLX-NEXT: retq 26853; 26854; BITALG-LABEL: ult_28_v2i64: 26855; BITALG: # %bb.0: 26856; BITALG-NEXT: vpopcntb %xmm0, %xmm0 26857; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 26858; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26859; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 26860; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 26861; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 26862; BITALG-NEXT: retq 26863 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 26864 %3 = icmp ult <2 x i64> %2, <i64 28, i64 28> 26865 %4 = sext <2 x i1> %3 to <2 x i64> 26866 ret <2 x i64> %4 26867} 26868 26869define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { 26870; SSE2-LABEL: ugt_28_v2i64: 26871; SSE2: # %bb.0: 26872; SSE2-NEXT: movdqa %xmm0, %xmm1 26873; SSE2-NEXT: psrlw $1, %xmm1 26874; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26875; SSE2-NEXT: psubb %xmm1, %xmm0 26876; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26877; SSE2-NEXT: movdqa %xmm0, %xmm2 26878; SSE2-NEXT: pand %xmm1, %xmm2 26879; SSE2-NEXT: psrlw $2, %xmm0 26880; SSE2-NEXT: pand %xmm1, %xmm0 26881; SSE2-NEXT: paddb %xmm2, %xmm0 26882; SSE2-NEXT: movdqa %xmm0, %xmm1 26883; SSE2-NEXT: psrlw $4, %xmm1 26884; SSE2-NEXT: paddb %xmm0, %xmm1 26885; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 26886; SSE2-NEXT: pxor %xmm0, %xmm0 26887; SSE2-NEXT: psadbw %xmm1, %xmm0 26888; SSE2-NEXT: por {{.*}}(%rip), %xmm0 26889; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26890; SSE2-NEXT: movdqa %xmm0, %xmm2 26891; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 26892; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26893; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 26894; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26895; SSE2-NEXT: pand %xmm3, %xmm1 26896; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26897; SSE2-NEXT: por %xmm1, %xmm0 26898; SSE2-NEXT: retq 26899; 26900; SSE3-LABEL: ugt_28_v2i64: 26901; SSE3: # %bb.0: 26902; SSE3-NEXT: movdqa %xmm0, %xmm1 26903; SSE3-NEXT: psrlw $1, %xmm1 26904; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26905; SSE3-NEXT: psubb %xmm1, %xmm0 26906; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 26907; SSE3-NEXT: movdqa %xmm0, %xmm2 26908; SSE3-NEXT: pand %xmm1, %xmm2 26909; SSE3-NEXT: psrlw $2, %xmm0 26910; SSE3-NEXT: pand %xmm1, %xmm0 26911; SSE3-NEXT: paddb %xmm2, %xmm0 26912; SSE3-NEXT: movdqa %xmm0, %xmm1 26913; SSE3-NEXT: psrlw $4, %xmm1 26914; SSE3-NEXT: paddb %xmm0, %xmm1 26915; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 26916; SSE3-NEXT: pxor %xmm0, %xmm0 26917; SSE3-NEXT: psadbw %xmm1, %xmm0 26918; SSE3-NEXT: por {{.*}}(%rip), %xmm0 26919; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26920; SSE3-NEXT: movdqa %xmm0, %xmm2 26921; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 26922; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26923; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 26924; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26925; SSE3-NEXT: pand %xmm3, %xmm1 26926; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26927; SSE3-NEXT: por %xmm1, %xmm0 26928; SSE3-NEXT: retq 26929; 26930; SSSE3-LABEL: ugt_28_v2i64: 26931; SSSE3: # %bb.0: 26932; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26933; SSSE3-NEXT: movdqa %xmm0, %xmm2 26934; SSSE3-NEXT: pand %xmm1, %xmm2 26935; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26936; SSSE3-NEXT: movdqa %xmm3, %xmm4 26937; SSSE3-NEXT: pshufb %xmm2, %xmm4 26938; SSSE3-NEXT: psrlw $4, %xmm0 26939; SSSE3-NEXT: pand %xmm1, %xmm0 26940; SSSE3-NEXT: pshufb %xmm0, %xmm3 26941; SSSE3-NEXT: paddb %xmm4, %xmm3 26942; SSSE3-NEXT: pxor %xmm0, %xmm0 26943; SSSE3-NEXT: psadbw %xmm3, %xmm0 26944; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 26945; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26946; SSSE3-NEXT: movdqa %xmm0, %xmm2 26947; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 26948; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26949; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 26950; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26951; SSSE3-NEXT: pand %xmm3, %xmm1 26952; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26953; SSSE3-NEXT: por %xmm1, %xmm0 26954; SSSE3-NEXT: retq 26955; 26956; SSE41-LABEL: ugt_28_v2i64: 26957; SSE41: # %bb.0: 26958; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26959; SSE41-NEXT: movdqa %xmm0, %xmm2 26960; SSE41-NEXT: pand %xmm1, %xmm2 26961; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26962; SSE41-NEXT: movdqa %xmm3, %xmm4 26963; SSE41-NEXT: pshufb %xmm2, %xmm4 26964; SSE41-NEXT: psrlw $4, %xmm0 26965; SSE41-NEXT: pand %xmm1, %xmm0 26966; SSE41-NEXT: pshufb %xmm0, %xmm3 26967; SSE41-NEXT: paddb %xmm4, %xmm3 26968; SSE41-NEXT: pxor %xmm0, %xmm0 26969; SSE41-NEXT: psadbw %xmm3, %xmm0 26970; SSE41-NEXT: por {{.*}}(%rip), %xmm0 26971; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] 26972; SSE41-NEXT: movdqa %xmm0, %xmm2 26973; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 26974; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 26975; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 26976; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 26977; SSE41-NEXT: pand %xmm3, %xmm1 26978; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26979; SSE41-NEXT: por %xmm1, %xmm0 26980; SSE41-NEXT: retq 26981; 26982; AVX1-LABEL: ugt_28_v2i64: 26983; AVX1: # %bb.0: 26984; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26985; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 26986; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 26987; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 26988; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26989; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 26990; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 26991; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 26992; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 26993; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 26994; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 26995; AVX1-NEXT: retq 26996; 26997; AVX2-LABEL: ugt_28_v2i64: 26998; AVX2: # %bb.0: 26999; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27000; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 27001; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27002; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27003; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 27004; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 27005; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27006; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27007; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 27008; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27009; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27010; AVX2-NEXT: retq 27011; 27012; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64: 27013; AVX512VPOPCNTDQ: # %bb.0: 27014; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27015; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 27016; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27017; AVX512VPOPCNTDQ-NEXT: vzeroupper 27018; AVX512VPOPCNTDQ-NEXT: retq 27019; 27020; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64: 27021; AVX512VPOPCNTDQVL: # %bb.0: 27022; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 27023; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 27024; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27025; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27026; AVX512VPOPCNTDQVL-NEXT: retq 27027; 27028; BITALG_NOVLX-LABEL: ugt_28_v2i64: 27029; BITALG_NOVLX: # %bb.0: 27030; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27031; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 27032; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 27033; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27034; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27035; BITALG_NOVLX-NEXT: vzeroupper 27036; BITALG_NOVLX-NEXT: retq 27037; 27038; BITALG-LABEL: ugt_28_v2i64: 27039; BITALG: # %bb.0: 27040; BITALG-NEXT: vpopcntb %xmm0, %xmm0 27041; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 27042; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27043; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 27044; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27045; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27046; BITALG-NEXT: retq 27047 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 27048 %3 = icmp ugt <2 x i64> %2, <i64 28, i64 28> 27049 %4 = sext <2 x i1> %3 to <2 x i64> 27050 ret <2 x i64> %4 27051} 27052 27053define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { 27054; SSE2-LABEL: ult_29_v2i64: 27055; SSE2: # %bb.0: 27056; SSE2-NEXT: movdqa %xmm0, %xmm1 27057; SSE2-NEXT: psrlw $1, %xmm1 27058; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27059; SSE2-NEXT: psubb %xmm1, %xmm0 27060; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27061; SSE2-NEXT: movdqa %xmm0, %xmm2 27062; SSE2-NEXT: pand %xmm1, %xmm2 27063; SSE2-NEXT: psrlw $2, %xmm0 27064; SSE2-NEXT: pand %xmm1, %xmm0 27065; SSE2-NEXT: paddb %xmm2, %xmm0 27066; SSE2-NEXT: movdqa %xmm0, %xmm1 27067; SSE2-NEXT: psrlw $4, %xmm1 27068; SSE2-NEXT: paddb %xmm0, %xmm1 27069; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27070; SSE2-NEXT: pxor %xmm0, %xmm0 27071; SSE2-NEXT: psadbw %xmm1, %xmm0 27072; SSE2-NEXT: por {{.*}}(%rip), %xmm0 27073; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27074; SSE2-NEXT: movdqa %xmm1, %xmm2 27075; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 27076; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27077; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 27078; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27079; SSE2-NEXT: pand %xmm3, %xmm1 27080; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27081; SSE2-NEXT: por %xmm1, %xmm0 27082; SSE2-NEXT: retq 27083; 27084; SSE3-LABEL: ult_29_v2i64: 27085; SSE3: # %bb.0: 27086; SSE3-NEXT: movdqa %xmm0, %xmm1 27087; SSE3-NEXT: psrlw $1, %xmm1 27088; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27089; SSE3-NEXT: psubb %xmm1, %xmm0 27090; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27091; SSE3-NEXT: movdqa %xmm0, %xmm2 27092; SSE3-NEXT: pand %xmm1, %xmm2 27093; SSE3-NEXT: psrlw $2, %xmm0 27094; SSE3-NEXT: pand %xmm1, %xmm0 27095; SSE3-NEXT: paddb %xmm2, %xmm0 27096; SSE3-NEXT: movdqa %xmm0, %xmm1 27097; SSE3-NEXT: psrlw $4, %xmm1 27098; SSE3-NEXT: paddb %xmm0, %xmm1 27099; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27100; SSE3-NEXT: pxor %xmm0, %xmm0 27101; SSE3-NEXT: psadbw %xmm1, %xmm0 27102; SSE3-NEXT: por {{.*}}(%rip), %xmm0 27103; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27104; SSE3-NEXT: movdqa %xmm1, %xmm2 27105; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 27106; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27107; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 27108; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27109; SSE3-NEXT: pand %xmm3, %xmm1 27110; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27111; SSE3-NEXT: por %xmm1, %xmm0 27112; SSE3-NEXT: retq 27113; 27114; SSSE3-LABEL: ult_29_v2i64: 27115; SSSE3: # %bb.0: 27116; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27117; SSSE3-NEXT: movdqa %xmm0, %xmm2 27118; SSSE3-NEXT: pand %xmm1, %xmm2 27119; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27120; SSSE3-NEXT: movdqa %xmm3, %xmm4 27121; SSSE3-NEXT: pshufb %xmm2, %xmm4 27122; SSSE3-NEXT: psrlw $4, %xmm0 27123; SSSE3-NEXT: pand %xmm1, %xmm0 27124; SSSE3-NEXT: pshufb %xmm0, %xmm3 27125; SSSE3-NEXT: paddb %xmm4, %xmm3 27126; SSSE3-NEXT: pxor %xmm0, %xmm0 27127; SSSE3-NEXT: psadbw %xmm3, %xmm0 27128; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 27129; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27130; SSSE3-NEXT: movdqa %xmm1, %xmm2 27131; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 27132; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27133; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 27134; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27135; SSSE3-NEXT: pand %xmm3, %xmm1 27136; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27137; SSSE3-NEXT: por %xmm1, %xmm0 27138; SSSE3-NEXT: retq 27139; 27140; SSE41-LABEL: ult_29_v2i64: 27141; SSE41: # %bb.0: 27142; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27143; SSE41-NEXT: movdqa %xmm0, %xmm2 27144; SSE41-NEXT: pand %xmm1, %xmm2 27145; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27146; SSE41-NEXT: movdqa %xmm3, %xmm4 27147; SSE41-NEXT: pshufb %xmm2, %xmm4 27148; SSE41-NEXT: psrlw $4, %xmm0 27149; SSE41-NEXT: pand %xmm1, %xmm0 27150; SSE41-NEXT: pshufb %xmm0, %xmm3 27151; SSE41-NEXT: paddb %xmm4, %xmm3 27152; SSE41-NEXT: pxor %xmm0, %xmm0 27153; SSE41-NEXT: psadbw %xmm3, %xmm0 27154; SSE41-NEXT: por {{.*}}(%rip), %xmm0 27155; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27156; SSE41-NEXT: movdqa %xmm1, %xmm2 27157; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 27158; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27159; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 27160; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27161; SSE41-NEXT: pand %xmm3, %xmm1 27162; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27163; SSE41-NEXT: por %xmm1, %xmm0 27164; SSE41-NEXT: retq 27165; 27166; AVX1-LABEL: ult_29_v2i64: 27167; AVX1: # %bb.0: 27168; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27169; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 27170; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27171; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27172; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 27173; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 27174; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27175; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27176; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 27177; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27178; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] 27179; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27180; AVX1-NEXT: retq 27181; 27182; AVX2-LABEL: ult_29_v2i64: 27183; AVX2: # %bb.0: 27184; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27185; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 27186; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27187; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27188; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 27189; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 27190; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27191; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27192; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 27193; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27194; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] 27195; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27196; AVX2-NEXT: retq 27197; 27198; AVX512VPOPCNTDQ-LABEL: ult_29_v2i64: 27199; AVX512VPOPCNTDQ: # %bb.0: 27200; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27201; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 27202; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] 27203; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27204; AVX512VPOPCNTDQ-NEXT: vzeroupper 27205; AVX512VPOPCNTDQ-NEXT: retq 27206; 27207; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64: 27208; AVX512VPOPCNTDQVL: # %bb.0: 27209; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 27210; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 27211; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27212; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27213; AVX512VPOPCNTDQVL-NEXT: retq 27214; 27215; BITALG_NOVLX-LABEL: ult_29_v2i64: 27216; BITALG_NOVLX: # %bb.0: 27217; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27218; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 27219; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 27220; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27221; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] 27222; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27223; BITALG_NOVLX-NEXT: vzeroupper 27224; BITALG_NOVLX-NEXT: retq 27225; 27226; BITALG-LABEL: ult_29_v2i64: 27227; BITALG: # %bb.0: 27228; BITALG-NEXT: vpopcntb %xmm0, %xmm0 27229; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 27230; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27231; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 27232; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27233; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27234; BITALG-NEXT: retq 27235 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 27236 %3 = icmp ult <2 x i64> %2, <i64 29, i64 29> 27237 %4 = sext <2 x i1> %3 to <2 x i64> 27238 ret <2 x i64> %4 27239} 27240 27241define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { 27242; SSE2-LABEL: ugt_29_v2i64: 27243; SSE2: # %bb.0: 27244; SSE2-NEXT: movdqa %xmm0, %xmm1 27245; SSE2-NEXT: psrlw $1, %xmm1 27246; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27247; SSE2-NEXT: psubb %xmm1, %xmm0 27248; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27249; SSE2-NEXT: movdqa %xmm0, %xmm2 27250; SSE2-NEXT: pand %xmm1, %xmm2 27251; SSE2-NEXT: psrlw $2, %xmm0 27252; SSE2-NEXT: pand %xmm1, %xmm0 27253; SSE2-NEXT: paddb %xmm2, %xmm0 27254; SSE2-NEXT: movdqa %xmm0, %xmm1 27255; SSE2-NEXT: psrlw $4, %xmm1 27256; SSE2-NEXT: paddb %xmm0, %xmm1 27257; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27258; SSE2-NEXT: pxor %xmm0, %xmm0 27259; SSE2-NEXT: psadbw %xmm1, %xmm0 27260; SSE2-NEXT: por {{.*}}(%rip), %xmm0 27261; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27262; SSE2-NEXT: movdqa %xmm0, %xmm2 27263; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 27264; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27265; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 27266; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27267; SSE2-NEXT: pand %xmm3, %xmm1 27268; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27269; SSE2-NEXT: por %xmm1, %xmm0 27270; SSE2-NEXT: retq 27271; 27272; SSE3-LABEL: ugt_29_v2i64: 27273; SSE3: # %bb.0: 27274; SSE3-NEXT: movdqa %xmm0, %xmm1 27275; SSE3-NEXT: psrlw $1, %xmm1 27276; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27277; SSE3-NEXT: psubb %xmm1, %xmm0 27278; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27279; SSE3-NEXT: movdqa %xmm0, %xmm2 27280; SSE3-NEXT: pand %xmm1, %xmm2 27281; SSE3-NEXT: psrlw $2, %xmm0 27282; SSE3-NEXT: pand %xmm1, %xmm0 27283; SSE3-NEXT: paddb %xmm2, %xmm0 27284; SSE3-NEXT: movdqa %xmm0, %xmm1 27285; SSE3-NEXT: psrlw $4, %xmm1 27286; SSE3-NEXT: paddb %xmm0, %xmm1 27287; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27288; SSE3-NEXT: pxor %xmm0, %xmm0 27289; SSE3-NEXT: psadbw %xmm1, %xmm0 27290; SSE3-NEXT: por {{.*}}(%rip), %xmm0 27291; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27292; SSE3-NEXT: movdqa %xmm0, %xmm2 27293; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 27294; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27295; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 27296; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27297; SSE3-NEXT: pand %xmm3, %xmm1 27298; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27299; SSE3-NEXT: por %xmm1, %xmm0 27300; SSE3-NEXT: retq 27301; 27302; SSSE3-LABEL: ugt_29_v2i64: 27303; SSSE3: # %bb.0: 27304; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27305; SSSE3-NEXT: movdqa %xmm0, %xmm2 27306; SSSE3-NEXT: pand %xmm1, %xmm2 27307; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27308; SSSE3-NEXT: movdqa %xmm3, %xmm4 27309; SSSE3-NEXT: pshufb %xmm2, %xmm4 27310; SSSE3-NEXT: psrlw $4, %xmm0 27311; SSSE3-NEXT: pand %xmm1, %xmm0 27312; SSSE3-NEXT: pshufb %xmm0, %xmm3 27313; SSSE3-NEXT: paddb %xmm4, %xmm3 27314; SSSE3-NEXT: pxor %xmm0, %xmm0 27315; SSSE3-NEXT: psadbw %xmm3, %xmm0 27316; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 27317; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27318; SSSE3-NEXT: movdqa %xmm0, %xmm2 27319; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 27320; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27321; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 27322; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27323; SSSE3-NEXT: pand %xmm3, %xmm1 27324; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27325; SSSE3-NEXT: por %xmm1, %xmm0 27326; SSSE3-NEXT: retq 27327; 27328; SSE41-LABEL: ugt_29_v2i64: 27329; SSE41: # %bb.0: 27330; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27331; SSE41-NEXT: movdqa %xmm0, %xmm2 27332; SSE41-NEXT: pand %xmm1, %xmm2 27333; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27334; SSE41-NEXT: movdqa %xmm3, %xmm4 27335; SSE41-NEXT: pshufb %xmm2, %xmm4 27336; SSE41-NEXT: psrlw $4, %xmm0 27337; SSE41-NEXT: pand %xmm1, %xmm0 27338; SSE41-NEXT: pshufb %xmm0, %xmm3 27339; SSE41-NEXT: paddb %xmm4, %xmm3 27340; SSE41-NEXT: pxor %xmm0, %xmm0 27341; SSE41-NEXT: psadbw %xmm3, %xmm0 27342; SSE41-NEXT: por {{.*}}(%rip), %xmm0 27343; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] 27344; SSE41-NEXT: movdqa %xmm0, %xmm2 27345; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 27346; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27347; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 27348; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27349; SSE41-NEXT: pand %xmm3, %xmm1 27350; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27351; SSE41-NEXT: por %xmm1, %xmm0 27352; SSE41-NEXT: retq 27353; 27354; AVX1-LABEL: ugt_29_v2i64: 27355; AVX1: # %bb.0: 27356; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27357; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 27358; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27359; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27360; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 27361; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 27362; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27363; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27364; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 27365; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27366; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27367; AVX1-NEXT: retq 27368; 27369; AVX2-LABEL: ugt_29_v2i64: 27370; AVX2: # %bb.0: 27371; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27372; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 27373; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27374; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27375; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 27376; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 27377; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27378; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27379; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 27380; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27381; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27382; AVX2-NEXT: retq 27383; 27384; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64: 27385; AVX512VPOPCNTDQ: # %bb.0: 27386; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27387; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 27388; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27389; AVX512VPOPCNTDQ-NEXT: vzeroupper 27390; AVX512VPOPCNTDQ-NEXT: retq 27391; 27392; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64: 27393; AVX512VPOPCNTDQVL: # %bb.0: 27394; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 27395; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 27396; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27397; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27398; AVX512VPOPCNTDQVL-NEXT: retq 27399; 27400; BITALG_NOVLX-LABEL: ugt_29_v2i64: 27401; BITALG_NOVLX: # %bb.0: 27402; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27403; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 27404; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 27405; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27406; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27407; BITALG_NOVLX-NEXT: vzeroupper 27408; BITALG_NOVLX-NEXT: retq 27409; 27410; BITALG-LABEL: ugt_29_v2i64: 27411; BITALG: # %bb.0: 27412; BITALG-NEXT: vpopcntb %xmm0, %xmm0 27413; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 27414; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27415; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 27416; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27417; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27418; BITALG-NEXT: retq 27419 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 27420 %3 = icmp ugt <2 x i64> %2, <i64 29, i64 29> 27421 %4 = sext <2 x i1> %3 to <2 x i64> 27422 ret <2 x i64> %4 27423} 27424 27425define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { 27426; SSE2-LABEL: ult_30_v2i64: 27427; SSE2: # %bb.0: 27428; SSE2-NEXT: movdqa %xmm0, %xmm1 27429; SSE2-NEXT: psrlw $1, %xmm1 27430; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27431; SSE2-NEXT: psubb %xmm1, %xmm0 27432; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27433; SSE2-NEXT: movdqa %xmm0, %xmm2 27434; SSE2-NEXT: pand %xmm1, %xmm2 27435; SSE2-NEXT: psrlw $2, %xmm0 27436; SSE2-NEXT: pand %xmm1, %xmm0 27437; SSE2-NEXT: paddb %xmm2, %xmm0 27438; SSE2-NEXT: movdqa %xmm0, %xmm1 27439; SSE2-NEXT: psrlw $4, %xmm1 27440; SSE2-NEXT: paddb %xmm0, %xmm1 27441; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27442; SSE2-NEXT: pxor %xmm0, %xmm0 27443; SSE2-NEXT: psadbw %xmm1, %xmm0 27444; SSE2-NEXT: por {{.*}}(%rip), %xmm0 27445; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27446; SSE2-NEXT: movdqa %xmm1, %xmm2 27447; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 27448; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27449; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 27450; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27451; SSE2-NEXT: pand %xmm3, %xmm1 27452; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27453; SSE2-NEXT: por %xmm1, %xmm0 27454; SSE2-NEXT: retq 27455; 27456; SSE3-LABEL: ult_30_v2i64: 27457; SSE3: # %bb.0: 27458; SSE3-NEXT: movdqa %xmm0, %xmm1 27459; SSE3-NEXT: psrlw $1, %xmm1 27460; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27461; SSE3-NEXT: psubb %xmm1, %xmm0 27462; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27463; SSE3-NEXT: movdqa %xmm0, %xmm2 27464; SSE3-NEXT: pand %xmm1, %xmm2 27465; SSE3-NEXT: psrlw $2, %xmm0 27466; SSE3-NEXT: pand %xmm1, %xmm0 27467; SSE3-NEXT: paddb %xmm2, %xmm0 27468; SSE3-NEXT: movdqa %xmm0, %xmm1 27469; SSE3-NEXT: psrlw $4, %xmm1 27470; SSE3-NEXT: paddb %xmm0, %xmm1 27471; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27472; SSE3-NEXT: pxor %xmm0, %xmm0 27473; SSE3-NEXT: psadbw %xmm1, %xmm0 27474; SSE3-NEXT: por {{.*}}(%rip), %xmm0 27475; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27476; SSE3-NEXT: movdqa %xmm1, %xmm2 27477; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 27478; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27479; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 27480; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27481; SSE3-NEXT: pand %xmm3, %xmm1 27482; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27483; SSE3-NEXT: por %xmm1, %xmm0 27484; SSE3-NEXT: retq 27485; 27486; SSSE3-LABEL: ult_30_v2i64: 27487; SSSE3: # %bb.0: 27488; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27489; SSSE3-NEXT: movdqa %xmm0, %xmm2 27490; SSSE3-NEXT: pand %xmm1, %xmm2 27491; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27492; SSSE3-NEXT: movdqa %xmm3, %xmm4 27493; SSSE3-NEXT: pshufb %xmm2, %xmm4 27494; SSSE3-NEXT: psrlw $4, %xmm0 27495; SSSE3-NEXT: pand %xmm1, %xmm0 27496; SSSE3-NEXT: pshufb %xmm0, %xmm3 27497; SSSE3-NEXT: paddb %xmm4, %xmm3 27498; SSSE3-NEXT: pxor %xmm0, %xmm0 27499; SSSE3-NEXT: psadbw %xmm3, %xmm0 27500; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 27501; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27502; SSSE3-NEXT: movdqa %xmm1, %xmm2 27503; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 27504; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27505; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 27506; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27507; SSSE3-NEXT: pand %xmm3, %xmm1 27508; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27509; SSSE3-NEXT: por %xmm1, %xmm0 27510; SSSE3-NEXT: retq 27511; 27512; SSE41-LABEL: ult_30_v2i64: 27513; SSE41: # %bb.0: 27514; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27515; SSE41-NEXT: movdqa %xmm0, %xmm2 27516; SSE41-NEXT: pand %xmm1, %xmm2 27517; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27518; SSE41-NEXT: movdqa %xmm3, %xmm4 27519; SSE41-NEXT: pshufb %xmm2, %xmm4 27520; SSE41-NEXT: psrlw $4, %xmm0 27521; SSE41-NEXT: pand %xmm1, %xmm0 27522; SSE41-NEXT: pshufb %xmm0, %xmm3 27523; SSE41-NEXT: paddb %xmm4, %xmm3 27524; SSE41-NEXT: pxor %xmm0, %xmm0 27525; SSE41-NEXT: psadbw %xmm3, %xmm0 27526; SSE41-NEXT: por {{.*}}(%rip), %xmm0 27527; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27528; SSE41-NEXT: movdqa %xmm1, %xmm2 27529; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 27530; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27531; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 27532; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27533; SSE41-NEXT: pand %xmm3, %xmm1 27534; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27535; SSE41-NEXT: por %xmm1, %xmm0 27536; SSE41-NEXT: retq 27537; 27538; AVX1-LABEL: ult_30_v2i64: 27539; AVX1: # %bb.0: 27540; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27541; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 27542; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27543; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27544; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 27545; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 27546; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27547; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27548; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 27549; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27550; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] 27551; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27552; AVX1-NEXT: retq 27553; 27554; AVX2-LABEL: ult_30_v2i64: 27555; AVX2: # %bb.0: 27556; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27557; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 27558; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27559; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27560; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 27561; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 27562; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27563; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27564; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 27565; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27566; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] 27567; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27568; AVX2-NEXT: retq 27569; 27570; AVX512VPOPCNTDQ-LABEL: ult_30_v2i64: 27571; AVX512VPOPCNTDQ: # %bb.0: 27572; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27573; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 27574; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] 27575; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27576; AVX512VPOPCNTDQ-NEXT: vzeroupper 27577; AVX512VPOPCNTDQ-NEXT: retq 27578; 27579; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64: 27580; AVX512VPOPCNTDQVL: # %bb.0: 27581; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 27582; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 27583; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27584; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27585; AVX512VPOPCNTDQVL-NEXT: retq 27586; 27587; BITALG_NOVLX-LABEL: ult_30_v2i64: 27588; BITALG_NOVLX: # %bb.0: 27589; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27590; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 27591; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 27592; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27593; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] 27594; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27595; BITALG_NOVLX-NEXT: vzeroupper 27596; BITALG_NOVLX-NEXT: retq 27597; 27598; BITALG-LABEL: ult_30_v2i64: 27599; BITALG: # %bb.0: 27600; BITALG-NEXT: vpopcntb %xmm0, %xmm0 27601; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 27602; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27603; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 27604; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27605; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27606; BITALG-NEXT: retq 27607 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 27608 %3 = icmp ult <2 x i64> %2, <i64 30, i64 30> 27609 %4 = sext <2 x i1> %3 to <2 x i64> 27610 ret <2 x i64> %4 27611} 27612 27613define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { 27614; SSE2-LABEL: ugt_30_v2i64: 27615; SSE2: # %bb.0: 27616; SSE2-NEXT: movdqa %xmm0, %xmm1 27617; SSE2-NEXT: psrlw $1, %xmm1 27618; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27619; SSE2-NEXT: psubb %xmm1, %xmm0 27620; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27621; SSE2-NEXT: movdqa %xmm0, %xmm2 27622; SSE2-NEXT: pand %xmm1, %xmm2 27623; SSE2-NEXT: psrlw $2, %xmm0 27624; SSE2-NEXT: pand %xmm1, %xmm0 27625; SSE2-NEXT: paddb %xmm2, %xmm0 27626; SSE2-NEXT: movdqa %xmm0, %xmm1 27627; SSE2-NEXT: psrlw $4, %xmm1 27628; SSE2-NEXT: paddb %xmm0, %xmm1 27629; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27630; SSE2-NEXT: pxor %xmm0, %xmm0 27631; SSE2-NEXT: psadbw %xmm1, %xmm0 27632; SSE2-NEXT: por {{.*}}(%rip), %xmm0 27633; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27634; SSE2-NEXT: movdqa %xmm0, %xmm2 27635; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 27636; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27637; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 27638; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27639; SSE2-NEXT: pand %xmm3, %xmm1 27640; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27641; SSE2-NEXT: por %xmm1, %xmm0 27642; SSE2-NEXT: retq 27643; 27644; SSE3-LABEL: ugt_30_v2i64: 27645; SSE3: # %bb.0: 27646; SSE3-NEXT: movdqa %xmm0, %xmm1 27647; SSE3-NEXT: psrlw $1, %xmm1 27648; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27649; SSE3-NEXT: psubb %xmm1, %xmm0 27650; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27651; SSE3-NEXT: movdqa %xmm0, %xmm2 27652; SSE3-NEXT: pand %xmm1, %xmm2 27653; SSE3-NEXT: psrlw $2, %xmm0 27654; SSE3-NEXT: pand %xmm1, %xmm0 27655; SSE3-NEXT: paddb %xmm2, %xmm0 27656; SSE3-NEXT: movdqa %xmm0, %xmm1 27657; SSE3-NEXT: psrlw $4, %xmm1 27658; SSE3-NEXT: paddb %xmm0, %xmm1 27659; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27660; SSE3-NEXT: pxor %xmm0, %xmm0 27661; SSE3-NEXT: psadbw %xmm1, %xmm0 27662; SSE3-NEXT: por {{.*}}(%rip), %xmm0 27663; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27664; SSE3-NEXT: movdqa %xmm0, %xmm2 27665; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 27666; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27667; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 27668; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27669; SSE3-NEXT: pand %xmm3, %xmm1 27670; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27671; SSE3-NEXT: por %xmm1, %xmm0 27672; SSE3-NEXT: retq 27673; 27674; SSSE3-LABEL: ugt_30_v2i64: 27675; SSSE3: # %bb.0: 27676; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27677; SSSE3-NEXT: movdqa %xmm0, %xmm2 27678; SSSE3-NEXT: pand %xmm1, %xmm2 27679; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27680; SSSE3-NEXT: movdqa %xmm3, %xmm4 27681; SSSE3-NEXT: pshufb %xmm2, %xmm4 27682; SSSE3-NEXT: psrlw $4, %xmm0 27683; SSSE3-NEXT: pand %xmm1, %xmm0 27684; SSSE3-NEXT: pshufb %xmm0, %xmm3 27685; SSSE3-NEXT: paddb %xmm4, %xmm3 27686; SSSE3-NEXT: pxor %xmm0, %xmm0 27687; SSSE3-NEXT: psadbw %xmm3, %xmm0 27688; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 27689; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27690; SSSE3-NEXT: movdqa %xmm0, %xmm2 27691; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 27692; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27693; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 27694; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27695; SSSE3-NEXT: pand %xmm3, %xmm1 27696; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27697; SSSE3-NEXT: por %xmm1, %xmm0 27698; SSSE3-NEXT: retq 27699; 27700; SSE41-LABEL: ugt_30_v2i64: 27701; SSE41: # %bb.0: 27702; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27703; SSE41-NEXT: movdqa %xmm0, %xmm2 27704; SSE41-NEXT: pand %xmm1, %xmm2 27705; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27706; SSE41-NEXT: movdqa %xmm3, %xmm4 27707; SSE41-NEXT: pshufb %xmm2, %xmm4 27708; SSE41-NEXT: psrlw $4, %xmm0 27709; SSE41-NEXT: pand %xmm1, %xmm0 27710; SSE41-NEXT: pshufb %xmm0, %xmm3 27711; SSE41-NEXT: paddb %xmm4, %xmm3 27712; SSE41-NEXT: pxor %xmm0, %xmm0 27713; SSE41-NEXT: psadbw %xmm3, %xmm0 27714; SSE41-NEXT: por {{.*}}(%rip), %xmm0 27715; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] 27716; SSE41-NEXT: movdqa %xmm0, %xmm2 27717; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 27718; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27719; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 27720; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27721; SSE41-NEXT: pand %xmm3, %xmm1 27722; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27723; SSE41-NEXT: por %xmm1, %xmm0 27724; SSE41-NEXT: retq 27725; 27726; AVX1-LABEL: ugt_30_v2i64: 27727; AVX1: # %bb.0: 27728; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27729; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 27730; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27731; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27732; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 27733; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 27734; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27735; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27736; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 27737; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27738; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27739; AVX1-NEXT: retq 27740; 27741; AVX2-LABEL: ugt_30_v2i64: 27742; AVX2: # %bb.0: 27743; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27744; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 27745; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27746; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27747; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 27748; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 27749; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27750; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27751; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 27752; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27753; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27754; AVX2-NEXT: retq 27755; 27756; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64: 27757; AVX512VPOPCNTDQ: # %bb.0: 27758; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27759; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 27760; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27761; AVX512VPOPCNTDQ-NEXT: vzeroupper 27762; AVX512VPOPCNTDQ-NEXT: retq 27763; 27764; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64: 27765; AVX512VPOPCNTDQVL: # %bb.0: 27766; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 27767; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 27768; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27769; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27770; AVX512VPOPCNTDQVL-NEXT: retq 27771; 27772; BITALG_NOVLX-LABEL: ugt_30_v2i64: 27773; BITALG_NOVLX: # %bb.0: 27774; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27775; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 27776; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 27777; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27778; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 27779; BITALG_NOVLX-NEXT: vzeroupper 27780; BITALG_NOVLX-NEXT: retq 27781; 27782; BITALG-LABEL: ugt_30_v2i64: 27783; BITALG: # %bb.0: 27784; BITALG-NEXT: vpopcntb %xmm0, %xmm0 27785; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 27786; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27787; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 27788; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27789; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27790; BITALG-NEXT: retq 27791 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 27792 %3 = icmp ugt <2 x i64> %2, <i64 30, i64 30> 27793 %4 = sext <2 x i1> %3 to <2 x i64> 27794 ret <2 x i64> %4 27795} 27796 27797define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { 27798; SSE2-LABEL: ult_31_v2i64: 27799; SSE2: # %bb.0: 27800; SSE2-NEXT: movdqa %xmm0, %xmm1 27801; SSE2-NEXT: psrlw $1, %xmm1 27802; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27803; SSE2-NEXT: psubb %xmm1, %xmm0 27804; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27805; SSE2-NEXT: movdqa %xmm0, %xmm2 27806; SSE2-NEXT: pand %xmm1, %xmm2 27807; SSE2-NEXT: psrlw $2, %xmm0 27808; SSE2-NEXT: pand %xmm1, %xmm0 27809; SSE2-NEXT: paddb %xmm2, %xmm0 27810; SSE2-NEXT: movdqa %xmm0, %xmm1 27811; SSE2-NEXT: psrlw $4, %xmm1 27812; SSE2-NEXT: paddb %xmm0, %xmm1 27813; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27814; SSE2-NEXT: pxor %xmm0, %xmm0 27815; SSE2-NEXT: psadbw %xmm1, %xmm0 27816; SSE2-NEXT: por {{.*}}(%rip), %xmm0 27817; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 27818; SSE2-NEXT: movdqa %xmm1, %xmm2 27819; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 27820; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27821; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 27822; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27823; SSE2-NEXT: pand %xmm3, %xmm1 27824; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27825; SSE2-NEXT: por %xmm1, %xmm0 27826; SSE2-NEXT: retq 27827; 27828; SSE3-LABEL: ult_31_v2i64: 27829; SSE3: # %bb.0: 27830; SSE3-NEXT: movdqa %xmm0, %xmm1 27831; SSE3-NEXT: psrlw $1, %xmm1 27832; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27833; SSE3-NEXT: psubb %xmm1, %xmm0 27834; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27835; SSE3-NEXT: movdqa %xmm0, %xmm2 27836; SSE3-NEXT: pand %xmm1, %xmm2 27837; SSE3-NEXT: psrlw $2, %xmm0 27838; SSE3-NEXT: pand %xmm1, %xmm0 27839; SSE3-NEXT: paddb %xmm2, %xmm0 27840; SSE3-NEXT: movdqa %xmm0, %xmm1 27841; SSE3-NEXT: psrlw $4, %xmm1 27842; SSE3-NEXT: paddb %xmm0, %xmm1 27843; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 27844; SSE3-NEXT: pxor %xmm0, %xmm0 27845; SSE3-NEXT: psadbw %xmm1, %xmm0 27846; SSE3-NEXT: por {{.*}}(%rip), %xmm0 27847; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 27848; SSE3-NEXT: movdqa %xmm1, %xmm2 27849; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 27850; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27851; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 27852; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27853; SSE3-NEXT: pand %xmm3, %xmm1 27854; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27855; SSE3-NEXT: por %xmm1, %xmm0 27856; SSE3-NEXT: retq 27857; 27858; SSSE3-LABEL: ult_31_v2i64: 27859; SSSE3: # %bb.0: 27860; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27861; SSSE3-NEXT: movdqa %xmm0, %xmm2 27862; SSSE3-NEXT: pand %xmm1, %xmm2 27863; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27864; SSSE3-NEXT: movdqa %xmm3, %xmm4 27865; SSSE3-NEXT: pshufb %xmm2, %xmm4 27866; SSSE3-NEXT: psrlw $4, %xmm0 27867; SSSE3-NEXT: pand %xmm1, %xmm0 27868; SSSE3-NEXT: pshufb %xmm0, %xmm3 27869; SSSE3-NEXT: paddb %xmm4, %xmm3 27870; SSSE3-NEXT: pxor %xmm0, %xmm0 27871; SSSE3-NEXT: psadbw %xmm3, %xmm0 27872; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 27873; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 27874; SSSE3-NEXT: movdqa %xmm1, %xmm2 27875; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 27876; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27877; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 27878; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27879; SSSE3-NEXT: pand %xmm3, %xmm1 27880; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27881; SSSE3-NEXT: por %xmm1, %xmm0 27882; SSSE3-NEXT: retq 27883; 27884; SSE41-LABEL: ult_31_v2i64: 27885; SSE41: # %bb.0: 27886; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27887; SSE41-NEXT: movdqa %xmm0, %xmm2 27888; SSE41-NEXT: pand %xmm1, %xmm2 27889; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27890; SSE41-NEXT: movdqa %xmm3, %xmm4 27891; SSE41-NEXT: pshufb %xmm2, %xmm4 27892; SSE41-NEXT: psrlw $4, %xmm0 27893; SSE41-NEXT: pand %xmm1, %xmm0 27894; SSE41-NEXT: pshufb %xmm0, %xmm3 27895; SSE41-NEXT: paddb %xmm4, %xmm3 27896; SSE41-NEXT: pxor %xmm0, %xmm0 27897; SSE41-NEXT: psadbw %xmm3, %xmm0 27898; SSE41-NEXT: por {{.*}}(%rip), %xmm0 27899; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 27900; SSE41-NEXT: movdqa %xmm1, %xmm2 27901; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 27902; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 27903; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 27904; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 27905; SSE41-NEXT: pand %xmm3, %xmm1 27906; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 27907; SSE41-NEXT: por %xmm1, %xmm0 27908; SSE41-NEXT: retq 27909; 27910; AVX1-LABEL: ult_31_v2i64: 27911; AVX1: # %bb.0: 27912; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27913; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 27914; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27915; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27916; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 27917; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 27918; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27919; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27920; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 27921; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27922; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] 27923; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27924; AVX1-NEXT: retq 27925; 27926; AVX2-LABEL: ult_31_v2i64: 27927; AVX2: # %bb.0: 27928; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 27929; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 27930; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 27931; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 27932; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 27933; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 27934; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 27935; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 27936; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 27937; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27938; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] 27939; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27940; AVX2-NEXT: retq 27941; 27942; AVX512VPOPCNTDQ-LABEL: ult_31_v2i64: 27943; AVX512VPOPCNTDQ: # %bb.0: 27944; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27945; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 27946; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] 27947; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27948; AVX512VPOPCNTDQ-NEXT: vzeroupper 27949; AVX512VPOPCNTDQ-NEXT: retq 27950; 27951; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64: 27952; AVX512VPOPCNTDQVL: # %bb.0: 27953; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 27954; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 27955; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27956; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27957; AVX512VPOPCNTDQVL-NEXT: retq 27958; 27959; BITALG_NOVLX-LABEL: ult_31_v2i64: 27960; BITALG_NOVLX: # %bb.0: 27961; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 27962; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 27963; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 27964; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27965; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] 27966; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 27967; BITALG_NOVLX-NEXT: vzeroupper 27968; BITALG_NOVLX-NEXT: retq 27969; 27970; BITALG-LABEL: ult_31_v2i64: 27971; BITALG: # %bb.0: 27972; BITALG-NEXT: vpopcntb %xmm0, %xmm0 27973; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 27974; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 27975; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 27976; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 27977; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 27978; BITALG-NEXT: retq 27979 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 27980 %3 = icmp ult <2 x i64> %2, <i64 31, i64 31> 27981 %4 = sext <2 x i1> %3 to <2 x i64> 27982 ret <2 x i64> %4 27983} 27984 27985define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { 27986; SSE2-LABEL: ugt_31_v2i64: 27987; SSE2: # %bb.0: 27988; SSE2-NEXT: movdqa %xmm0, %xmm1 27989; SSE2-NEXT: psrlw $1, %xmm1 27990; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 27991; SSE2-NEXT: psubb %xmm1, %xmm0 27992; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 27993; SSE2-NEXT: movdqa %xmm0, %xmm2 27994; SSE2-NEXT: pand %xmm1, %xmm2 27995; SSE2-NEXT: psrlw $2, %xmm0 27996; SSE2-NEXT: pand %xmm1, %xmm0 27997; SSE2-NEXT: paddb %xmm2, %xmm0 27998; SSE2-NEXT: movdqa %xmm0, %xmm1 27999; SSE2-NEXT: psrlw $4, %xmm1 28000; SSE2-NEXT: paddb %xmm0, %xmm1 28001; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28002; SSE2-NEXT: pxor %xmm0, %xmm0 28003; SSE2-NEXT: psadbw %xmm1, %xmm0 28004; SSE2-NEXT: por {{.*}}(%rip), %xmm0 28005; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 28006; SSE2-NEXT: movdqa %xmm0, %xmm2 28007; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 28008; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28009; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 28010; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28011; SSE2-NEXT: pand %xmm3, %xmm1 28012; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28013; SSE2-NEXT: por %xmm1, %xmm0 28014; SSE2-NEXT: retq 28015; 28016; SSE3-LABEL: ugt_31_v2i64: 28017; SSE3: # %bb.0: 28018; SSE3-NEXT: movdqa %xmm0, %xmm1 28019; SSE3-NEXT: psrlw $1, %xmm1 28020; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28021; SSE3-NEXT: psubb %xmm1, %xmm0 28022; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28023; SSE3-NEXT: movdqa %xmm0, %xmm2 28024; SSE3-NEXT: pand %xmm1, %xmm2 28025; SSE3-NEXT: psrlw $2, %xmm0 28026; SSE3-NEXT: pand %xmm1, %xmm0 28027; SSE3-NEXT: paddb %xmm2, %xmm0 28028; SSE3-NEXT: movdqa %xmm0, %xmm1 28029; SSE3-NEXT: psrlw $4, %xmm1 28030; SSE3-NEXT: paddb %xmm0, %xmm1 28031; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28032; SSE3-NEXT: pxor %xmm0, %xmm0 28033; SSE3-NEXT: psadbw %xmm1, %xmm0 28034; SSE3-NEXT: por {{.*}}(%rip), %xmm0 28035; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 28036; SSE3-NEXT: movdqa %xmm0, %xmm2 28037; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 28038; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28039; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 28040; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28041; SSE3-NEXT: pand %xmm3, %xmm1 28042; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28043; SSE3-NEXT: por %xmm1, %xmm0 28044; SSE3-NEXT: retq 28045; 28046; SSSE3-LABEL: ugt_31_v2i64: 28047; SSSE3: # %bb.0: 28048; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28049; SSSE3-NEXT: movdqa %xmm0, %xmm2 28050; SSSE3-NEXT: pand %xmm1, %xmm2 28051; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28052; SSSE3-NEXT: movdqa %xmm3, %xmm4 28053; SSSE3-NEXT: pshufb %xmm2, %xmm4 28054; SSSE3-NEXT: psrlw $4, %xmm0 28055; SSSE3-NEXT: pand %xmm1, %xmm0 28056; SSSE3-NEXT: pshufb %xmm0, %xmm3 28057; SSSE3-NEXT: paddb %xmm4, %xmm3 28058; SSSE3-NEXT: pxor %xmm0, %xmm0 28059; SSSE3-NEXT: psadbw %xmm3, %xmm0 28060; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 28061; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 28062; SSSE3-NEXT: movdqa %xmm0, %xmm2 28063; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 28064; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28065; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 28066; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28067; SSSE3-NEXT: pand %xmm3, %xmm1 28068; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28069; SSSE3-NEXT: por %xmm1, %xmm0 28070; SSSE3-NEXT: retq 28071; 28072; SSE41-LABEL: ugt_31_v2i64: 28073; SSE41: # %bb.0: 28074; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28075; SSE41-NEXT: movdqa %xmm0, %xmm2 28076; SSE41-NEXT: pand %xmm1, %xmm2 28077; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28078; SSE41-NEXT: movdqa %xmm3, %xmm4 28079; SSE41-NEXT: pshufb %xmm2, %xmm4 28080; SSE41-NEXT: psrlw $4, %xmm0 28081; SSE41-NEXT: pand %xmm1, %xmm0 28082; SSE41-NEXT: pshufb %xmm0, %xmm3 28083; SSE41-NEXT: paddb %xmm4, %xmm3 28084; SSE41-NEXT: pxor %xmm0, %xmm0 28085; SSE41-NEXT: psadbw %xmm3, %xmm0 28086; SSE41-NEXT: por {{.*}}(%rip), %xmm0 28087; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] 28088; SSE41-NEXT: movdqa %xmm0, %xmm2 28089; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 28090; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28091; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 28092; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28093; SSE41-NEXT: pand %xmm3, %xmm1 28094; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28095; SSE41-NEXT: por %xmm1, %xmm0 28096; SSE41-NEXT: retq 28097; 28098; AVX1-LABEL: ugt_31_v2i64: 28099; AVX1: # %bb.0: 28100; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28101; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 28102; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28103; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28104; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 28105; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 28106; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28107; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28108; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 28109; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28110; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28111; AVX1-NEXT: retq 28112; 28113; AVX2-LABEL: ugt_31_v2i64: 28114; AVX2: # %bb.0: 28115; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28116; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 28117; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28118; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28119; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 28120; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 28121; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28122; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28123; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 28124; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28125; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28126; AVX2-NEXT: retq 28127; 28128; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64: 28129; AVX512VPOPCNTDQ: # %bb.0: 28130; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28131; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 28132; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28133; AVX512VPOPCNTDQ-NEXT: vzeroupper 28134; AVX512VPOPCNTDQ-NEXT: retq 28135; 28136; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64: 28137; AVX512VPOPCNTDQVL: # %bb.0: 28138; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 28139; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 28140; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28141; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28142; AVX512VPOPCNTDQVL-NEXT: retq 28143; 28144; BITALG_NOVLX-LABEL: ugt_31_v2i64: 28145; BITALG_NOVLX: # %bb.0: 28146; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28147; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 28148; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 28149; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28150; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28151; BITALG_NOVLX-NEXT: vzeroupper 28152; BITALG_NOVLX-NEXT: retq 28153; 28154; BITALG-LABEL: ugt_31_v2i64: 28155; BITALG: # %bb.0: 28156; BITALG-NEXT: vpopcntb %xmm0, %xmm0 28157; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 28158; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28159; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 28160; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28161; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28162; BITALG-NEXT: retq 28163 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 28164 %3 = icmp ugt <2 x i64> %2, <i64 31, i64 31> 28165 %4 = sext <2 x i1> %3 to <2 x i64> 28166 ret <2 x i64> %4 28167} 28168 28169define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { 28170; SSE2-LABEL: ult_32_v2i64: 28171; SSE2: # %bb.0: 28172; SSE2-NEXT: movdqa %xmm0, %xmm1 28173; SSE2-NEXT: psrlw $1, %xmm1 28174; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28175; SSE2-NEXT: psubb %xmm1, %xmm0 28176; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28177; SSE2-NEXT: movdqa %xmm0, %xmm2 28178; SSE2-NEXT: pand %xmm1, %xmm2 28179; SSE2-NEXT: psrlw $2, %xmm0 28180; SSE2-NEXT: pand %xmm1, %xmm0 28181; SSE2-NEXT: paddb %xmm2, %xmm0 28182; SSE2-NEXT: movdqa %xmm0, %xmm1 28183; SSE2-NEXT: psrlw $4, %xmm1 28184; SSE2-NEXT: paddb %xmm0, %xmm1 28185; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28186; SSE2-NEXT: pxor %xmm0, %xmm0 28187; SSE2-NEXT: psadbw %xmm1, %xmm0 28188; SSE2-NEXT: por {{.*}}(%rip), %xmm0 28189; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28190; SSE2-NEXT: movdqa %xmm1, %xmm2 28191; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 28192; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28193; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 28194; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28195; SSE2-NEXT: pand %xmm3, %xmm1 28196; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28197; SSE2-NEXT: por %xmm1, %xmm0 28198; SSE2-NEXT: retq 28199; 28200; SSE3-LABEL: ult_32_v2i64: 28201; SSE3: # %bb.0: 28202; SSE3-NEXT: movdqa %xmm0, %xmm1 28203; SSE3-NEXT: psrlw $1, %xmm1 28204; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28205; SSE3-NEXT: psubb %xmm1, %xmm0 28206; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28207; SSE3-NEXT: movdqa %xmm0, %xmm2 28208; SSE3-NEXT: pand %xmm1, %xmm2 28209; SSE3-NEXT: psrlw $2, %xmm0 28210; SSE3-NEXT: pand %xmm1, %xmm0 28211; SSE3-NEXT: paddb %xmm2, %xmm0 28212; SSE3-NEXT: movdqa %xmm0, %xmm1 28213; SSE3-NEXT: psrlw $4, %xmm1 28214; SSE3-NEXT: paddb %xmm0, %xmm1 28215; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28216; SSE3-NEXT: pxor %xmm0, %xmm0 28217; SSE3-NEXT: psadbw %xmm1, %xmm0 28218; SSE3-NEXT: por {{.*}}(%rip), %xmm0 28219; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28220; SSE3-NEXT: movdqa %xmm1, %xmm2 28221; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 28222; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28223; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 28224; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28225; SSE3-NEXT: pand %xmm3, %xmm1 28226; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28227; SSE3-NEXT: por %xmm1, %xmm0 28228; SSE3-NEXT: retq 28229; 28230; SSSE3-LABEL: ult_32_v2i64: 28231; SSSE3: # %bb.0: 28232; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28233; SSSE3-NEXT: movdqa %xmm0, %xmm2 28234; SSSE3-NEXT: pand %xmm1, %xmm2 28235; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28236; SSSE3-NEXT: movdqa %xmm3, %xmm4 28237; SSSE3-NEXT: pshufb %xmm2, %xmm4 28238; SSSE3-NEXT: psrlw $4, %xmm0 28239; SSSE3-NEXT: pand %xmm1, %xmm0 28240; SSSE3-NEXT: pshufb %xmm0, %xmm3 28241; SSSE3-NEXT: paddb %xmm4, %xmm3 28242; SSSE3-NEXT: pxor %xmm0, %xmm0 28243; SSSE3-NEXT: psadbw %xmm3, %xmm0 28244; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 28245; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28246; SSSE3-NEXT: movdqa %xmm1, %xmm2 28247; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 28248; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28249; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 28250; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28251; SSSE3-NEXT: pand %xmm3, %xmm1 28252; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28253; SSSE3-NEXT: por %xmm1, %xmm0 28254; SSSE3-NEXT: retq 28255; 28256; SSE41-LABEL: ult_32_v2i64: 28257; SSE41: # %bb.0: 28258; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28259; SSE41-NEXT: movdqa %xmm0, %xmm2 28260; SSE41-NEXT: pand %xmm1, %xmm2 28261; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28262; SSE41-NEXT: movdqa %xmm3, %xmm4 28263; SSE41-NEXT: pshufb %xmm2, %xmm4 28264; SSE41-NEXT: psrlw $4, %xmm0 28265; SSE41-NEXT: pand %xmm1, %xmm0 28266; SSE41-NEXT: pshufb %xmm0, %xmm3 28267; SSE41-NEXT: paddb %xmm4, %xmm3 28268; SSE41-NEXT: pxor %xmm0, %xmm0 28269; SSE41-NEXT: psadbw %xmm3, %xmm0 28270; SSE41-NEXT: por {{.*}}(%rip), %xmm0 28271; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28272; SSE41-NEXT: movdqa %xmm1, %xmm2 28273; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 28274; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28275; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 28276; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28277; SSE41-NEXT: pand %xmm3, %xmm1 28278; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28279; SSE41-NEXT: por %xmm1, %xmm0 28280; SSE41-NEXT: retq 28281; 28282; AVX1-LABEL: ult_32_v2i64: 28283; AVX1: # %bb.0: 28284; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28285; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 28286; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28287; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28288; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 28289; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 28290; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28291; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28292; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 28293; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28294; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] 28295; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28296; AVX1-NEXT: retq 28297; 28298; AVX2-LABEL: ult_32_v2i64: 28299; AVX2: # %bb.0: 28300; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28301; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 28302; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28303; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28304; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 28305; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 28306; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28307; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28308; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 28309; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28310; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] 28311; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28312; AVX2-NEXT: retq 28313; 28314; AVX512VPOPCNTDQ-LABEL: ult_32_v2i64: 28315; AVX512VPOPCNTDQ: # %bb.0: 28316; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28317; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 28318; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] 28319; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28320; AVX512VPOPCNTDQ-NEXT: vzeroupper 28321; AVX512VPOPCNTDQ-NEXT: retq 28322; 28323; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64: 28324; AVX512VPOPCNTDQVL: # %bb.0: 28325; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 28326; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 28327; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28328; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28329; AVX512VPOPCNTDQVL-NEXT: retq 28330; 28331; BITALG_NOVLX-LABEL: ult_32_v2i64: 28332; BITALG_NOVLX: # %bb.0: 28333; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28334; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 28335; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 28336; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28337; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] 28338; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28339; BITALG_NOVLX-NEXT: vzeroupper 28340; BITALG_NOVLX-NEXT: retq 28341; 28342; BITALG-LABEL: ult_32_v2i64: 28343; BITALG: # %bb.0: 28344; BITALG-NEXT: vpopcntb %xmm0, %xmm0 28345; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 28346; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28347; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 28348; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28349; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28350; BITALG-NEXT: retq 28351 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 28352 %3 = icmp ult <2 x i64> %2, <i64 32, i64 32> 28353 %4 = sext <2 x i1> %3 to <2 x i64> 28354 ret <2 x i64> %4 28355} 28356 28357define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { 28358; SSE2-LABEL: ugt_32_v2i64: 28359; SSE2: # %bb.0: 28360; SSE2-NEXT: movdqa %xmm0, %xmm1 28361; SSE2-NEXT: psrlw $1, %xmm1 28362; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28363; SSE2-NEXT: psubb %xmm1, %xmm0 28364; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28365; SSE2-NEXT: movdqa %xmm0, %xmm2 28366; SSE2-NEXT: pand %xmm1, %xmm2 28367; SSE2-NEXT: psrlw $2, %xmm0 28368; SSE2-NEXT: pand %xmm1, %xmm0 28369; SSE2-NEXT: paddb %xmm2, %xmm0 28370; SSE2-NEXT: movdqa %xmm0, %xmm1 28371; SSE2-NEXT: psrlw $4, %xmm1 28372; SSE2-NEXT: paddb %xmm0, %xmm1 28373; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28374; SSE2-NEXT: pxor %xmm0, %xmm0 28375; SSE2-NEXT: psadbw %xmm1, %xmm0 28376; SSE2-NEXT: por {{.*}}(%rip), %xmm0 28377; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28378; SSE2-NEXT: movdqa %xmm0, %xmm2 28379; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 28380; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28381; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 28382; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28383; SSE2-NEXT: pand %xmm3, %xmm1 28384; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28385; SSE2-NEXT: por %xmm1, %xmm0 28386; SSE2-NEXT: retq 28387; 28388; SSE3-LABEL: ugt_32_v2i64: 28389; SSE3: # %bb.0: 28390; SSE3-NEXT: movdqa %xmm0, %xmm1 28391; SSE3-NEXT: psrlw $1, %xmm1 28392; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28393; SSE3-NEXT: psubb %xmm1, %xmm0 28394; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28395; SSE3-NEXT: movdqa %xmm0, %xmm2 28396; SSE3-NEXT: pand %xmm1, %xmm2 28397; SSE3-NEXT: psrlw $2, %xmm0 28398; SSE3-NEXT: pand %xmm1, %xmm0 28399; SSE3-NEXT: paddb %xmm2, %xmm0 28400; SSE3-NEXT: movdqa %xmm0, %xmm1 28401; SSE3-NEXT: psrlw $4, %xmm1 28402; SSE3-NEXT: paddb %xmm0, %xmm1 28403; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28404; SSE3-NEXT: pxor %xmm0, %xmm0 28405; SSE3-NEXT: psadbw %xmm1, %xmm0 28406; SSE3-NEXT: por {{.*}}(%rip), %xmm0 28407; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28408; SSE3-NEXT: movdqa %xmm0, %xmm2 28409; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 28410; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28411; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 28412; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28413; SSE3-NEXT: pand %xmm3, %xmm1 28414; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28415; SSE3-NEXT: por %xmm1, %xmm0 28416; SSE3-NEXT: retq 28417; 28418; SSSE3-LABEL: ugt_32_v2i64: 28419; SSSE3: # %bb.0: 28420; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28421; SSSE3-NEXT: movdqa %xmm0, %xmm2 28422; SSSE3-NEXT: pand %xmm1, %xmm2 28423; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28424; SSSE3-NEXT: movdqa %xmm3, %xmm4 28425; SSSE3-NEXT: pshufb %xmm2, %xmm4 28426; SSSE3-NEXT: psrlw $4, %xmm0 28427; SSSE3-NEXT: pand %xmm1, %xmm0 28428; SSSE3-NEXT: pshufb %xmm0, %xmm3 28429; SSSE3-NEXT: paddb %xmm4, %xmm3 28430; SSSE3-NEXT: pxor %xmm0, %xmm0 28431; SSSE3-NEXT: psadbw %xmm3, %xmm0 28432; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 28433; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28434; SSSE3-NEXT: movdqa %xmm0, %xmm2 28435; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 28436; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28437; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 28438; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28439; SSSE3-NEXT: pand %xmm3, %xmm1 28440; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28441; SSSE3-NEXT: por %xmm1, %xmm0 28442; SSSE3-NEXT: retq 28443; 28444; SSE41-LABEL: ugt_32_v2i64: 28445; SSE41: # %bb.0: 28446; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28447; SSE41-NEXT: movdqa %xmm0, %xmm2 28448; SSE41-NEXT: pand %xmm1, %xmm2 28449; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28450; SSE41-NEXT: movdqa %xmm3, %xmm4 28451; SSE41-NEXT: pshufb %xmm2, %xmm4 28452; SSE41-NEXT: psrlw $4, %xmm0 28453; SSE41-NEXT: pand %xmm1, %xmm0 28454; SSE41-NEXT: pshufb %xmm0, %xmm3 28455; SSE41-NEXT: paddb %xmm4, %xmm3 28456; SSE41-NEXT: pxor %xmm0, %xmm0 28457; SSE41-NEXT: psadbw %xmm3, %xmm0 28458; SSE41-NEXT: por {{.*}}(%rip), %xmm0 28459; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] 28460; SSE41-NEXT: movdqa %xmm0, %xmm2 28461; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 28462; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28463; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 28464; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28465; SSE41-NEXT: pand %xmm3, %xmm1 28466; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28467; SSE41-NEXT: por %xmm1, %xmm0 28468; SSE41-NEXT: retq 28469; 28470; AVX1-LABEL: ugt_32_v2i64: 28471; AVX1: # %bb.0: 28472; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28473; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 28474; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28475; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28476; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 28477; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 28478; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28479; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28480; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 28481; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28482; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28483; AVX1-NEXT: retq 28484; 28485; AVX2-LABEL: ugt_32_v2i64: 28486; AVX2: # %bb.0: 28487; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28488; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 28489; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28490; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28491; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 28492; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 28493; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28494; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28495; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 28496; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28497; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28498; AVX2-NEXT: retq 28499; 28500; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64: 28501; AVX512VPOPCNTDQ: # %bb.0: 28502; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28503; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 28504; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28505; AVX512VPOPCNTDQ-NEXT: vzeroupper 28506; AVX512VPOPCNTDQ-NEXT: retq 28507; 28508; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64: 28509; AVX512VPOPCNTDQVL: # %bb.0: 28510; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 28511; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 28512; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28513; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28514; AVX512VPOPCNTDQVL-NEXT: retq 28515; 28516; BITALG_NOVLX-LABEL: ugt_32_v2i64: 28517; BITALG_NOVLX: # %bb.0: 28518; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28519; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 28520; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 28521; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28522; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28523; BITALG_NOVLX-NEXT: vzeroupper 28524; BITALG_NOVLX-NEXT: retq 28525; 28526; BITALG-LABEL: ugt_32_v2i64: 28527; BITALG: # %bb.0: 28528; BITALG-NEXT: vpopcntb %xmm0, %xmm0 28529; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 28530; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28531; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 28532; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28533; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28534; BITALG-NEXT: retq 28535 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 28536 %3 = icmp ugt <2 x i64> %2, <i64 32, i64 32> 28537 %4 = sext <2 x i1> %3 to <2 x i64> 28538 ret <2 x i64> %4 28539} 28540 28541define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { 28542; SSE2-LABEL: ult_33_v2i64: 28543; SSE2: # %bb.0: 28544; SSE2-NEXT: movdqa %xmm0, %xmm1 28545; SSE2-NEXT: psrlw $1, %xmm1 28546; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28547; SSE2-NEXT: psubb %xmm1, %xmm0 28548; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28549; SSE2-NEXT: movdqa %xmm0, %xmm2 28550; SSE2-NEXT: pand %xmm1, %xmm2 28551; SSE2-NEXT: psrlw $2, %xmm0 28552; SSE2-NEXT: pand %xmm1, %xmm0 28553; SSE2-NEXT: paddb %xmm2, %xmm0 28554; SSE2-NEXT: movdqa %xmm0, %xmm1 28555; SSE2-NEXT: psrlw $4, %xmm1 28556; SSE2-NEXT: paddb %xmm0, %xmm1 28557; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28558; SSE2-NEXT: pxor %xmm0, %xmm0 28559; SSE2-NEXT: psadbw %xmm1, %xmm0 28560; SSE2-NEXT: por {{.*}}(%rip), %xmm0 28561; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28562; SSE2-NEXT: movdqa %xmm1, %xmm2 28563; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 28564; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28565; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 28566; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28567; SSE2-NEXT: pand %xmm3, %xmm1 28568; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28569; SSE2-NEXT: por %xmm1, %xmm0 28570; SSE2-NEXT: retq 28571; 28572; SSE3-LABEL: ult_33_v2i64: 28573; SSE3: # %bb.0: 28574; SSE3-NEXT: movdqa %xmm0, %xmm1 28575; SSE3-NEXT: psrlw $1, %xmm1 28576; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28577; SSE3-NEXT: psubb %xmm1, %xmm0 28578; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28579; SSE3-NEXT: movdqa %xmm0, %xmm2 28580; SSE3-NEXT: pand %xmm1, %xmm2 28581; SSE3-NEXT: psrlw $2, %xmm0 28582; SSE3-NEXT: pand %xmm1, %xmm0 28583; SSE3-NEXT: paddb %xmm2, %xmm0 28584; SSE3-NEXT: movdqa %xmm0, %xmm1 28585; SSE3-NEXT: psrlw $4, %xmm1 28586; SSE3-NEXT: paddb %xmm0, %xmm1 28587; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28588; SSE3-NEXT: pxor %xmm0, %xmm0 28589; SSE3-NEXT: psadbw %xmm1, %xmm0 28590; SSE3-NEXT: por {{.*}}(%rip), %xmm0 28591; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28592; SSE3-NEXT: movdqa %xmm1, %xmm2 28593; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 28594; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28595; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 28596; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28597; SSE3-NEXT: pand %xmm3, %xmm1 28598; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28599; SSE3-NEXT: por %xmm1, %xmm0 28600; SSE3-NEXT: retq 28601; 28602; SSSE3-LABEL: ult_33_v2i64: 28603; SSSE3: # %bb.0: 28604; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28605; SSSE3-NEXT: movdqa %xmm0, %xmm2 28606; SSSE3-NEXT: pand %xmm1, %xmm2 28607; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28608; SSSE3-NEXT: movdqa %xmm3, %xmm4 28609; SSSE3-NEXT: pshufb %xmm2, %xmm4 28610; SSSE3-NEXT: psrlw $4, %xmm0 28611; SSSE3-NEXT: pand %xmm1, %xmm0 28612; SSSE3-NEXT: pshufb %xmm0, %xmm3 28613; SSSE3-NEXT: paddb %xmm4, %xmm3 28614; SSSE3-NEXT: pxor %xmm0, %xmm0 28615; SSSE3-NEXT: psadbw %xmm3, %xmm0 28616; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 28617; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28618; SSSE3-NEXT: movdqa %xmm1, %xmm2 28619; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 28620; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28621; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 28622; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28623; SSSE3-NEXT: pand %xmm3, %xmm1 28624; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28625; SSSE3-NEXT: por %xmm1, %xmm0 28626; SSSE3-NEXT: retq 28627; 28628; SSE41-LABEL: ult_33_v2i64: 28629; SSE41: # %bb.0: 28630; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28631; SSE41-NEXT: movdqa %xmm0, %xmm2 28632; SSE41-NEXT: pand %xmm1, %xmm2 28633; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28634; SSE41-NEXT: movdqa %xmm3, %xmm4 28635; SSE41-NEXT: pshufb %xmm2, %xmm4 28636; SSE41-NEXT: psrlw $4, %xmm0 28637; SSE41-NEXT: pand %xmm1, %xmm0 28638; SSE41-NEXT: pshufb %xmm0, %xmm3 28639; SSE41-NEXT: paddb %xmm4, %xmm3 28640; SSE41-NEXT: pxor %xmm0, %xmm0 28641; SSE41-NEXT: psadbw %xmm3, %xmm0 28642; SSE41-NEXT: por {{.*}}(%rip), %xmm0 28643; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28644; SSE41-NEXT: movdqa %xmm1, %xmm2 28645; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 28646; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28647; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 28648; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28649; SSE41-NEXT: pand %xmm3, %xmm1 28650; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28651; SSE41-NEXT: por %xmm1, %xmm0 28652; SSE41-NEXT: retq 28653; 28654; AVX1-LABEL: ult_33_v2i64: 28655; AVX1: # %bb.0: 28656; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28657; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 28658; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28659; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28660; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 28661; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 28662; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28663; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28664; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 28665; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28666; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] 28667; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28668; AVX1-NEXT: retq 28669; 28670; AVX2-LABEL: ult_33_v2i64: 28671; AVX2: # %bb.0: 28672; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28673; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 28674; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28675; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28676; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 28677; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 28678; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28679; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28680; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 28681; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28682; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] 28683; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28684; AVX2-NEXT: retq 28685; 28686; AVX512VPOPCNTDQ-LABEL: ult_33_v2i64: 28687; AVX512VPOPCNTDQ: # %bb.0: 28688; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28689; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 28690; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] 28691; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28692; AVX512VPOPCNTDQ-NEXT: vzeroupper 28693; AVX512VPOPCNTDQ-NEXT: retq 28694; 28695; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64: 28696; AVX512VPOPCNTDQVL: # %bb.0: 28697; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 28698; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 28699; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28700; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28701; AVX512VPOPCNTDQVL-NEXT: retq 28702; 28703; BITALG_NOVLX-LABEL: ult_33_v2i64: 28704; BITALG_NOVLX: # %bb.0: 28705; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28706; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 28707; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 28708; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28709; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] 28710; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 28711; BITALG_NOVLX-NEXT: vzeroupper 28712; BITALG_NOVLX-NEXT: retq 28713; 28714; BITALG-LABEL: ult_33_v2i64: 28715; BITALG: # %bb.0: 28716; BITALG-NEXT: vpopcntb %xmm0, %xmm0 28717; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 28718; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28719; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 28720; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28721; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28722; BITALG-NEXT: retq 28723 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 28724 %3 = icmp ult <2 x i64> %2, <i64 33, i64 33> 28725 %4 = sext <2 x i1> %3 to <2 x i64> 28726 ret <2 x i64> %4 28727} 28728 28729define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { 28730; SSE2-LABEL: ugt_33_v2i64: 28731; SSE2: # %bb.0: 28732; SSE2-NEXT: movdqa %xmm0, %xmm1 28733; SSE2-NEXT: psrlw $1, %xmm1 28734; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28735; SSE2-NEXT: psubb %xmm1, %xmm0 28736; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28737; SSE2-NEXT: movdqa %xmm0, %xmm2 28738; SSE2-NEXT: pand %xmm1, %xmm2 28739; SSE2-NEXT: psrlw $2, %xmm0 28740; SSE2-NEXT: pand %xmm1, %xmm0 28741; SSE2-NEXT: paddb %xmm2, %xmm0 28742; SSE2-NEXT: movdqa %xmm0, %xmm1 28743; SSE2-NEXT: psrlw $4, %xmm1 28744; SSE2-NEXT: paddb %xmm0, %xmm1 28745; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28746; SSE2-NEXT: pxor %xmm0, %xmm0 28747; SSE2-NEXT: psadbw %xmm1, %xmm0 28748; SSE2-NEXT: por {{.*}}(%rip), %xmm0 28749; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28750; SSE2-NEXT: movdqa %xmm0, %xmm2 28751; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 28752; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28753; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 28754; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28755; SSE2-NEXT: pand %xmm3, %xmm1 28756; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28757; SSE2-NEXT: por %xmm1, %xmm0 28758; SSE2-NEXT: retq 28759; 28760; SSE3-LABEL: ugt_33_v2i64: 28761; SSE3: # %bb.0: 28762; SSE3-NEXT: movdqa %xmm0, %xmm1 28763; SSE3-NEXT: psrlw $1, %xmm1 28764; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28765; SSE3-NEXT: psubb %xmm1, %xmm0 28766; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28767; SSE3-NEXT: movdqa %xmm0, %xmm2 28768; SSE3-NEXT: pand %xmm1, %xmm2 28769; SSE3-NEXT: psrlw $2, %xmm0 28770; SSE3-NEXT: pand %xmm1, %xmm0 28771; SSE3-NEXT: paddb %xmm2, %xmm0 28772; SSE3-NEXT: movdqa %xmm0, %xmm1 28773; SSE3-NEXT: psrlw $4, %xmm1 28774; SSE3-NEXT: paddb %xmm0, %xmm1 28775; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28776; SSE3-NEXT: pxor %xmm0, %xmm0 28777; SSE3-NEXT: psadbw %xmm1, %xmm0 28778; SSE3-NEXT: por {{.*}}(%rip), %xmm0 28779; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28780; SSE3-NEXT: movdqa %xmm0, %xmm2 28781; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 28782; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28783; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 28784; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28785; SSE3-NEXT: pand %xmm3, %xmm1 28786; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28787; SSE3-NEXT: por %xmm1, %xmm0 28788; SSE3-NEXT: retq 28789; 28790; SSSE3-LABEL: ugt_33_v2i64: 28791; SSSE3: # %bb.0: 28792; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28793; SSSE3-NEXT: movdqa %xmm0, %xmm2 28794; SSSE3-NEXT: pand %xmm1, %xmm2 28795; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28796; SSSE3-NEXT: movdqa %xmm3, %xmm4 28797; SSSE3-NEXT: pshufb %xmm2, %xmm4 28798; SSSE3-NEXT: psrlw $4, %xmm0 28799; SSSE3-NEXT: pand %xmm1, %xmm0 28800; SSSE3-NEXT: pshufb %xmm0, %xmm3 28801; SSSE3-NEXT: paddb %xmm4, %xmm3 28802; SSSE3-NEXT: pxor %xmm0, %xmm0 28803; SSSE3-NEXT: psadbw %xmm3, %xmm0 28804; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 28805; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28806; SSSE3-NEXT: movdqa %xmm0, %xmm2 28807; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 28808; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28809; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 28810; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28811; SSSE3-NEXT: pand %xmm3, %xmm1 28812; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28813; SSSE3-NEXT: por %xmm1, %xmm0 28814; SSSE3-NEXT: retq 28815; 28816; SSE41-LABEL: ugt_33_v2i64: 28817; SSE41: # %bb.0: 28818; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28819; SSE41-NEXT: movdqa %xmm0, %xmm2 28820; SSE41-NEXT: pand %xmm1, %xmm2 28821; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28822; SSE41-NEXT: movdqa %xmm3, %xmm4 28823; SSE41-NEXT: pshufb %xmm2, %xmm4 28824; SSE41-NEXT: psrlw $4, %xmm0 28825; SSE41-NEXT: pand %xmm1, %xmm0 28826; SSE41-NEXT: pshufb %xmm0, %xmm3 28827; SSE41-NEXT: paddb %xmm4, %xmm3 28828; SSE41-NEXT: pxor %xmm0, %xmm0 28829; SSE41-NEXT: psadbw %xmm3, %xmm0 28830; SSE41-NEXT: por {{.*}}(%rip), %xmm0 28831; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] 28832; SSE41-NEXT: movdqa %xmm0, %xmm2 28833; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 28834; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28835; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 28836; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28837; SSE41-NEXT: pand %xmm3, %xmm1 28838; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28839; SSE41-NEXT: por %xmm1, %xmm0 28840; SSE41-NEXT: retq 28841; 28842; AVX1-LABEL: ugt_33_v2i64: 28843; AVX1: # %bb.0: 28844; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28845; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 28846; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28847; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28848; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 28849; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 28850; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28851; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28852; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 28853; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28854; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28855; AVX1-NEXT: retq 28856; 28857; AVX2-LABEL: ugt_33_v2i64: 28858; AVX2: # %bb.0: 28859; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28860; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 28861; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28862; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 28863; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 28864; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 28865; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 28866; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 28867; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 28868; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28869; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28870; AVX2-NEXT: retq 28871; 28872; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64: 28873; AVX512VPOPCNTDQ: # %bb.0: 28874; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28875; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 28876; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28877; AVX512VPOPCNTDQ-NEXT: vzeroupper 28878; AVX512VPOPCNTDQ-NEXT: retq 28879; 28880; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64: 28881; AVX512VPOPCNTDQVL: # %bb.0: 28882; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 28883; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 28884; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28885; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28886; AVX512VPOPCNTDQVL-NEXT: retq 28887; 28888; BITALG_NOVLX-LABEL: ugt_33_v2i64: 28889; BITALG_NOVLX: # %bb.0: 28890; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 28891; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 28892; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 28893; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28894; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 28895; BITALG_NOVLX-NEXT: vzeroupper 28896; BITALG_NOVLX-NEXT: retq 28897; 28898; BITALG-LABEL: ugt_33_v2i64: 28899; BITALG: # %bb.0: 28900; BITALG-NEXT: vpopcntb %xmm0, %xmm0 28901; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 28902; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 28903; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 28904; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 28905; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28906; BITALG-NEXT: retq 28907 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 28908 %3 = icmp ugt <2 x i64> %2, <i64 33, i64 33> 28909 %4 = sext <2 x i1> %3 to <2 x i64> 28910 ret <2 x i64> %4 28911} 28912 28913define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { 28914; SSE2-LABEL: ult_34_v2i64: 28915; SSE2: # %bb.0: 28916; SSE2-NEXT: movdqa %xmm0, %xmm1 28917; SSE2-NEXT: psrlw $1, %xmm1 28918; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28919; SSE2-NEXT: psubb %xmm1, %xmm0 28920; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28921; SSE2-NEXT: movdqa %xmm0, %xmm2 28922; SSE2-NEXT: pand %xmm1, %xmm2 28923; SSE2-NEXT: psrlw $2, %xmm0 28924; SSE2-NEXT: pand %xmm1, %xmm0 28925; SSE2-NEXT: paddb %xmm2, %xmm0 28926; SSE2-NEXT: movdqa %xmm0, %xmm1 28927; SSE2-NEXT: psrlw $4, %xmm1 28928; SSE2-NEXT: paddb %xmm0, %xmm1 28929; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28930; SSE2-NEXT: pxor %xmm0, %xmm0 28931; SSE2-NEXT: psadbw %xmm1, %xmm0 28932; SSE2-NEXT: por {{.*}}(%rip), %xmm0 28933; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 28934; SSE2-NEXT: movdqa %xmm1, %xmm2 28935; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 28936; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28937; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 28938; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28939; SSE2-NEXT: pand %xmm3, %xmm1 28940; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28941; SSE2-NEXT: por %xmm1, %xmm0 28942; SSE2-NEXT: retq 28943; 28944; SSE3-LABEL: ult_34_v2i64: 28945; SSE3: # %bb.0: 28946; SSE3-NEXT: movdqa %xmm0, %xmm1 28947; SSE3-NEXT: psrlw $1, %xmm1 28948; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28949; SSE3-NEXT: psubb %xmm1, %xmm0 28950; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 28951; SSE3-NEXT: movdqa %xmm0, %xmm2 28952; SSE3-NEXT: pand %xmm1, %xmm2 28953; SSE3-NEXT: psrlw $2, %xmm0 28954; SSE3-NEXT: pand %xmm1, %xmm0 28955; SSE3-NEXT: paddb %xmm2, %xmm0 28956; SSE3-NEXT: movdqa %xmm0, %xmm1 28957; SSE3-NEXT: psrlw $4, %xmm1 28958; SSE3-NEXT: paddb %xmm0, %xmm1 28959; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 28960; SSE3-NEXT: pxor %xmm0, %xmm0 28961; SSE3-NEXT: psadbw %xmm1, %xmm0 28962; SSE3-NEXT: por {{.*}}(%rip), %xmm0 28963; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 28964; SSE3-NEXT: movdqa %xmm1, %xmm2 28965; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 28966; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28967; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 28968; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28969; SSE3-NEXT: pand %xmm3, %xmm1 28970; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28971; SSE3-NEXT: por %xmm1, %xmm0 28972; SSE3-NEXT: retq 28973; 28974; SSSE3-LABEL: ult_34_v2i64: 28975; SSSE3: # %bb.0: 28976; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 28977; SSSE3-NEXT: movdqa %xmm0, %xmm2 28978; SSSE3-NEXT: pand %xmm1, %xmm2 28979; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 28980; SSSE3-NEXT: movdqa %xmm3, %xmm4 28981; SSSE3-NEXT: pshufb %xmm2, %xmm4 28982; SSSE3-NEXT: psrlw $4, %xmm0 28983; SSSE3-NEXT: pand %xmm1, %xmm0 28984; SSSE3-NEXT: pshufb %xmm0, %xmm3 28985; SSSE3-NEXT: paddb %xmm4, %xmm3 28986; SSSE3-NEXT: pxor %xmm0, %xmm0 28987; SSSE3-NEXT: psadbw %xmm3, %xmm0 28988; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 28989; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 28990; SSSE3-NEXT: movdqa %xmm1, %xmm2 28991; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 28992; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 28993; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 28994; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 28995; SSSE3-NEXT: pand %xmm3, %xmm1 28996; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 28997; SSSE3-NEXT: por %xmm1, %xmm0 28998; SSSE3-NEXT: retq 28999; 29000; SSE41-LABEL: ult_34_v2i64: 29001; SSE41: # %bb.0: 29002; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29003; SSE41-NEXT: movdqa %xmm0, %xmm2 29004; SSE41-NEXT: pand %xmm1, %xmm2 29005; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29006; SSE41-NEXT: movdqa %xmm3, %xmm4 29007; SSE41-NEXT: pshufb %xmm2, %xmm4 29008; SSE41-NEXT: psrlw $4, %xmm0 29009; SSE41-NEXT: pand %xmm1, %xmm0 29010; SSE41-NEXT: pshufb %xmm0, %xmm3 29011; SSE41-NEXT: paddb %xmm4, %xmm3 29012; SSE41-NEXT: pxor %xmm0, %xmm0 29013; SSE41-NEXT: psadbw %xmm3, %xmm0 29014; SSE41-NEXT: por {{.*}}(%rip), %xmm0 29015; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 29016; SSE41-NEXT: movdqa %xmm1, %xmm2 29017; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 29018; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29019; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 29020; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29021; SSE41-NEXT: pand %xmm3, %xmm1 29022; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29023; SSE41-NEXT: por %xmm1, %xmm0 29024; SSE41-NEXT: retq 29025; 29026; AVX1-LABEL: ult_34_v2i64: 29027; AVX1: # %bb.0: 29028; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29029; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 29030; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29031; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29032; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 29033; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 29034; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29035; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29036; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 29037; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29038; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] 29039; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29040; AVX1-NEXT: retq 29041; 29042; AVX2-LABEL: ult_34_v2i64: 29043; AVX2: # %bb.0: 29044; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29045; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 29046; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29047; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29048; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 29049; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 29050; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29051; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29052; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 29053; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29054; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] 29055; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29056; AVX2-NEXT: retq 29057; 29058; AVX512VPOPCNTDQ-LABEL: ult_34_v2i64: 29059; AVX512VPOPCNTDQ: # %bb.0: 29060; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29061; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 29062; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] 29063; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29064; AVX512VPOPCNTDQ-NEXT: vzeroupper 29065; AVX512VPOPCNTDQ-NEXT: retq 29066; 29067; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64: 29068; AVX512VPOPCNTDQVL: # %bb.0: 29069; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 29070; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 29071; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29072; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29073; AVX512VPOPCNTDQVL-NEXT: retq 29074; 29075; BITALG_NOVLX-LABEL: ult_34_v2i64: 29076; BITALG_NOVLX: # %bb.0: 29077; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29078; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 29079; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 29080; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29081; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] 29082; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29083; BITALG_NOVLX-NEXT: vzeroupper 29084; BITALG_NOVLX-NEXT: retq 29085; 29086; BITALG-LABEL: ult_34_v2i64: 29087; BITALG: # %bb.0: 29088; BITALG-NEXT: vpopcntb %xmm0, %xmm0 29089; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 29090; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29091; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 29092; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29093; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29094; BITALG-NEXT: retq 29095 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 29096 %3 = icmp ult <2 x i64> %2, <i64 34, i64 34> 29097 %4 = sext <2 x i1> %3 to <2 x i64> 29098 ret <2 x i64> %4 29099} 29100 29101define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { 29102; SSE2-LABEL: ugt_34_v2i64: 29103; SSE2: # %bb.0: 29104; SSE2-NEXT: movdqa %xmm0, %xmm1 29105; SSE2-NEXT: psrlw $1, %xmm1 29106; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29107; SSE2-NEXT: psubb %xmm1, %xmm0 29108; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29109; SSE2-NEXT: movdqa %xmm0, %xmm2 29110; SSE2-NEXT: pand %xmm1, %xmm2 29111; SSE2-NEXT: psrlw $2, %xmm0 29112; SSE2-NEXT: pand %xmm1, %xmm0 29113; SSE2-NEXT: paddb %xmm2, %xmm0 29114; SSE2-NEXT: movdqa %xmm0, %xmm1 29115; SSE2-NEXT: psrlw $4, %xmm1 29116; SSE2-NEXT: paddb %xmm0, %xmm1 29117; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29118; SSE2-NEXT: pxor %xmm0, %xmm0 29119; SSE2-NEXT: psadbw %xmm1, %xmm0 29120; SSE2-NEXT: por {{.*}}(%rip), %xmm0 29121; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 29122; SSE2-NEXT: movdqa %xmm0, %xmm2 29123; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 29124; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29125; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 29126; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29127; SSE2-NEXT: pand %xmm3, %xmm1 29128; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29129; SSE2-NEXT: por %xmm1, %xmm0 29130; SSE2-NEXT: retq 29131; 29132; SSE3-LABEL: ugt_34_v2i64: 29133; SSE3: # %bb.0: 29134; SSE3-NEXT: movdqa %xmm0, %xmm1 29135; SSE3-NEXT: psrlw $1, %xmm1 29136; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29137; SSE3-NEXT: psubb %xmm1, %xmm0 29138; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29139; SSE3-NEXT: movdqa %xmm0, %xmm2 29140; SSE3-NEXT: pand %xmm1, %xmm2 29141; SSE3-NEXT: psrlw $2, %xmm0 29142; SSE3-NEXT: pand %xmm1, %xmm0 29143; SSE3-NEXT: paddb %xmm2, %xmm0 29144; SSE3-NEXT: movdqa %xmm0, %xmm1 29145; SSE3-NEXT: psrlw $4, %xmm1 29146; SSE3-NEXT: paddb %xmm0, %xmm1 29147; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29148; SSE3-NEXT: pxor %xmm0, %xmm0 29149; SSE3-NEXT: psadbw %xmm1, %xmm0 29150; SSE3-NEXT: por {{.*}}(%rip), %xmm0 29151; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 29152; SSE3-NEXT: movdqa %xmm0, %xmm2 29153; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 29154; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29155; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 29156; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29157; SSE3-NEXT: pand %xmm3, %xmm1 29158; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29159; SSE3-NEXT: por %xmm1, %xmm0 29160; SSE3-NEXT: retq 29161; 29162; SSSE3-LABEL: ugt_34_v2i64: 29163; SSSE3: # %bb.0: 29164; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29165; SSSE3-NEXT: movdqa %xmm0, %xmm2 29166; SSSE3-NEXT: pand %xmm1, %xmm2 29167; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29168; SSSE3-NEXT: movdqa %xmm3, %xmm4 29169; SSSE3-NEXT: pshufb %xmm2, %xmm4 29170; SSSE3-NEXT: psrlw $4, %xmm0 29171; SSSE3-NEXT: pand %xmm1, %xmm0 29172; SSSE3-NEXT: pshufb %xmm0, %xmm3 29173; SSSE3-NEXT: paddb %xmm4, %xmm3 29174; SSSE3-NEXT: pxor %xmm0, %xmm0 29175; SSSE3-NEXT: psadbw %xmm3, %xmm0 29176; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 29177; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 29178; SSSE3-NEXT: movdqa %xmm0, %xmm2 29179; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 29180; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29181; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 29182; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29183; SSSE3-NEXT: pand %xmm3, %xmm1 29184; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29185; SSSE3-NEXT: por %xmm1, %xmm0 29186; SSSE3-NEXT: retq 29187; 29188; SSE41-LABEL: ugt_34_v2i64: 29189; SSE41: # %bb.0: 29190; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29191; SSE41-NEXT: movdqa %xmm0, %xmm2 29192; SSE41-NEXT: pand %xmm1, %xmm2 29193; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29194; SSE41-NEXT: movdqa %xmm3, %xmm4 29195; SSE41-NEXT: pshufb %xmm2, %xmm4 29196; SSE41-NEXT: psrlw $4, %xmm0 29197; SSE41-NEXT: pand %xmm1, %xmm0 29198; SSE41-NEXT: pshufb %xmm0, %xmm3 29199; SSE41-NEXT: paddb %xmm4, %xmm3 29200; SSE41-NEXT: pxor %xmm0, %xmm0 29201; SSE41-NEXT: psadbw %xmm3, %xmm0 29202; SSE41-NEXT: por {{.*}}(%rip), %xmm0 29203; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] 29204; SSE41-NEXT: movdqa %xmm0, %xmm2 29205; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 29206; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29207; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 29208; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29209; SSE41-NEXT: pand %xmm3, %xmm1 29210; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29211; SSE41-NEXT: por %xmm1, %xmm0 29212; SSE41-NEXT: retq 29213; 29214; AVX1-LABEL: ugt_34_v2i64: 29215; AVX1: # %bb.0: 29216; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29217; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 29218; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29219; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29220; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 29221; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 29222; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29223; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29224; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 29225; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29226; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29227; AVX1-NEXT: retq 29228; 29229; AVX2-LABEL: ugt_34_v2i64: 29230; AVX2: # %bb.0: 29231; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29232; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 29233; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29234; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29235; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 29236; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 29237; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29238; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29239; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 29240; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29241; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29242; AVX2-NEXT: retq 29243; 29244; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64: 29245; AVX512VPOPCNTDQ: # %bb.0: 29246; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29247; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 29248; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29249; AVX512VPOPCNTDQ-NEXT: vzeroupper 29250; AVX512VPOPCNTDQ-NEXT: retq 29251; 29252; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64: 29253; AVX512VPOPCNTDQVL: # %bb.0: 29254; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 29255; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 29256; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29257; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29258; AVX512VPOPCNTDQVL-NEXT: retq 29259; 29260; BITALG_NOVLX-LABEL: ugt_34_v2i64: 29261; BITALG_NOVLX: # %bb.0: 29262; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29263; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 29264; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 29265; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29266; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29267; BITALG_NOVLX-NEXT: vzeroupper 29268; BITALG_NOVLX-NEXT: retq 29269; 29270; BITALG-LABEL: ugt_34_v2i64: 29271; BITALG: # %bb.0: 29272; BITALG-NEXT: vpopcntb %xmm0, %xmm0 29273; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 29274; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29275; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 29276; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29277; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29278; BITALG-NEXT: retq 29279 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 29280 %3 = icmp ugt <2 x i64> %2, <i64 34, i64 34> 29281 %4 = sext <2 x i1> %3 to <2 x i64> 29282 ret <2 x i64> %4 29283} 29284 29285define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { 29286; SSE2-LABEL: ult_35_v2i64: 29287; SSE2: # %bb.0: 29288; SSE2-NEXT: movdqa %xmm0, %xmm1 29289; SSE2-NEXT: psrlw $1, %xmm1 29290; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29291; SSE2-NEXT: psubb %xmm1, %xmm0 29292; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29293; SSE2-NEXT: movdqa %xmm0, %xmm2 29294; SSE2-NEXT: pand %xmm1, %xmm2 29295; SSE2-NEXT: psrlw $2, %xmm0 29296; SSE2-NEXT: pand %xmm1, %xmm0 29297; SSE2-NEXT: paddb %xmm2, %xmm0 29298; SSE2-NEXT: movdqa %xmm0, %xmm1 29299; SSE2-NEXT: psrlw $4, %xmm1 29300; SSE2-NEXT: paddb %xmm0, %xmm1 29301; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29302; SSE2-NEXT: pxor %xmm0, %xmm0 29303; SSE2-NEXT: psadbw %xmm1, %xmm0 29304; SSE2-NEXT: por {{.*}}(%rip), %xmm0 29305; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29306; SSE2-NEXT: movdqa %xmm1, %xmm2 29307; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 29308; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29309; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 29310; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29311; SSE2-NEXT: pand %xmm3, %xmm1 29312; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29313; SSE2-NEXT: por %xmm1, %xmm0 29314; SSE2-NEXT: retq 29315; 29316; SSE3-LABEL: ult_35_v2i64: 29317; SSE3: # %bb.0: 29318; SSE3-NEXT: movdqa %xmm0, %xmm1 29319; SSE3-NEXT: psrlw $1, %xmm1 29320; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29321; SSE3-NEXT: psubb %xmm1, %xmm0 29322; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29323; SSE3-NEXT: movdqa %xmm0, %xmm2 29324; SSE3-NEXT: pand %xmm1, %xmm2 29325; SSE3-NEXT: psrlw $2, %xmm0 29326; SSE3-NEXT: pand %xmm1, %xmm0 29327; SSE3-NEXT: paddb %xmm2, %xmm0 29328; SSE3-NEXT: movdqa %xmm0, %xmm1 29329; SSE3-NEXT: psrlw $4, %xmm1 29330; SSE3-NEXT: paddb %xmm0, %xmm1 29331; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29332; SSE3-NEXT: pxor %xmm0, %xmm0 29333; SSE3-NEXT: psadbw %xmm1, %xmm0 29334; SSE3-NEXT: por {{.*}}(%rip), %xmm0 29335; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29336; SSE3-NEXT: movdqa %xmm1, %xmm2 29337; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 29338; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29339; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 29340; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29341; SSE3-NEXT: pand %xmm3, %xmm1 29342; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29343; SSE3-NEXT: por %xmm1, %xmm0 29344; SSE3-NEXT: retq 29345; 29346; SSSE3-LABEL: ult_35_v2i64: 29347; SSSE3: # %bb.0: 29348; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29349; SSSE3-NEXT: movdqa %xmm0, %xmm2 29350; SSSE3-NEXT: pand %xmm1, %xmm2 29351; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29352; SSSE3-NEXT: movdqa %xmm3, %xmm4 29353; SSSE3-NEXT: pshufb %xmm2, %xmm4 29354; SSSE3-NEXT: psrlw $4, %xmm0 29355; SSSE3-NEXT: pand %xmm1, %xmm0 29356; SSSE3-NEXT: pshufb %xmm0, %xmm3 29357; SSSE3-NEXT: paddb %xmm4, %xmm3 29358; SSSE3-NEXT: pxor %xmm0, %xmm0 29359; SSSE3-NEXT: psadbw %xmm3, %xmm0 29360; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 29361; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29362; SSSE3-NEXT: movdqa %xmm1, %xmm2 29363; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 29364; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29365; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 29366; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29367; SSSE3-NEXT: pand %xmm3, %xmm1 29368; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29369; SSSE3-NEXT: por %xmm1, %xmm0 29370; SSSE3-NEXT: retq 29371; 29372; SSE41-LABEL: ult_35_v2i64: 29373; SSE41: # %bb.0: 29374; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29375; SSE41-NEXT: movdqa %xmm0, %xmm2 29376; SSE41-NEXT: pand %xmm1, %xmm2 29377; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29378; SSE41-NEXT: movdqa %xmm3, %xmm4 29379; SSE41-NEXT: pshufb %xmm2, %xmm4 29380; SSE41-NEXT: psrlw $4, %xmm0 29381; SSE41-NEXT: pand %xmm1, %xmm0 29382; SSE41-NEXT: pshufb %xmm0, %xmm3 29383; SSE41-NEXT: paddb %xmm4, %xmm3 29384; SSE41-NEXT: pxor %xmm0, %xmm0 29385; SSE41-NEXT: psadbw %xmm3, %xmm0 29386; SSE41-NEXT: por {{.*}}(%rip), %xmm0 29387; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29388; SSE41-NEXT: movdqa %xmm1, %xmm2 29389; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 29390; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29391; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 29392; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29393; SSE41-NEXT: pand %xmm3, %xmm1 29394; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29395; SSE41-NEXT: por %xmm1, %xmm0 29396; SSE41-NEXT: retq 29397; 29398; AVX1-LABEL: ult_35_v2i64: 29399; AVX1: # %bb.0: 29400; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29401; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 29402; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29403; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29404; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 29405; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 29406; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29407; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29408; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 29409; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29410; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] 29411; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29412; AVX1-NEXT: retq 29413; 29414; AVX2-LABEL: ult_35_v2i64: 29415; AVX2: # %bb.0: 29416; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29417; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 29418; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29419; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29420; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 29421; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 29422; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29423; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29424; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 29425; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29426; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] 29427; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29428; AVX2-NEXT: retq 29429; 29430; AVX512VPOPCNTDQ-LABEL: ult_35_v2i64: 29431; AVX512VPOPCNTDQ: # %bb.0: 29432; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29433; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 29434; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] 29435; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29436; AVX512VPOPCNTDQ-NEXT: vzeroupper 29437; AVX512VPOPCNTDQ-NEXT: retq 29438; 29439; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64: 29440; AVX512VPOPCNTDQVL: # %bb.0: 29441; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 29442; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 29443; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29444; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29445; AVX512VPOPCNTDQVL-NEXT: retq 29446; 29447; BITALG_NOVLX-LABEL: ult_35_v2i64: 29448; BITALG_NOVLX: # %bb.0: 29449; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29450; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 29451; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 29452; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29453; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] 29454; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29455; BITALG_NOVLX-NEXT: vzeroupper 29456; BITALG_NOVLX-NEXT: retq 29457; 29458; BITALG-LABEL: ult_35_v2i64: 29459; BITALG: # %bb.0: 29460; BITALG-NEXT: vpopcntb %xmm0, %xmm0 29461; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 29462; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29463; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 29464; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29465; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29466; BITALG-NEXT: retq 29467 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 29468 %3 = icmp ult <2 x i64> %2, <i64 35, i64 35> 29469 %4 = sext <2 x i1> %3 to <2 x i64> 29470 ret <2 x i64> %4 29471} 29472 29473define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { 29474; SSE2-LABEL: ugt_35_v2i64: 29475; SSE2: # %bb.0: 29476; SSE2-NEXT: movdqa %xmm0, %xmm1 29477; SSE2-NEXT: psrlw $1, %xmm1 29478; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29479; SSE2-NEXT: psubb %xmm1, %xmm0 29480; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29481; SSE2-NEXT: movdqa %xmm0, %xmm2 29482; SSE2-NEXT: pand %xmm1, %xmm2 29483; SSE2-NEXT: psrlw $2, %xmm0 29484; SSE2-NEXT: pand %xmm1, %xmm0 29485; SSE2-NEXT: paddb %xmm2, %xmm0 29486; SSE2-NEXT: movdqa %xmm0, %xmm1 29487; SSE2-NEXT: psrlw $4, %xmm1 29488; SSE2-NEXT: paddb %xmm0, %xmm1 29489; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29490; SSE2-NEXT: pxor %xmm0, %xmm0 29491; SSE2-NEXT: psadbw %xmm1, %xmm0 29492; SSE2-NEXT: por {{.*}}(%rip), %xmm0 29493; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29494; SSE2-NEXT: movdqa %xmm0, %xmm2 29495; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 29496; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29497; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 29498; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29499; SSE2-NEXT: pand %xmm3, %xmm1 29500; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29501; SSE2-NEXT: por %xmm1, %xmm0 29502; SSE2-NEXT: retq 29503; 29504; SSE3-LABEL: ugt_35_v2i64: 29505; SSE3: # %bb.0: 29506; SSE3-NEXT: movdqa %xmm0, %xmm1 29507; SSE3-NEXT: psrlw $1, %xmm1 29508; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29509; SSE3-NEXT: psubb %xmm1, %xmm0 29510; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29511; SSE3-NEXT: movdqa %xmm0, %xmm2 29512; SSE3-NEXT: pand %xmm1, %xmm2 29513; SSE3-NEXT: psrlw $2, %xmm0 29514; SSE3-NEXT: pand %xmm1, %xmm0 29515; SSE3-NEXT: paddb %xmm2, %xmm0 29516; SSE3-NEXT: movdqa %xmm0, %xmm1 29517; SSE3-NEXT: psrlw $4, %xmm1 29518; SSE3-NEXT: paddb %xmm0, %xmm1 29519; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29520; SSE3-NEXT: pxor %xmm0, %xmm0 29521; SSE3-NEXT: psadbw %xmm1, %xmm0 29522; SSE3-NEXT: por {{.*}}(%rip), %xmm0 29523; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29524; SSE3-NEXT: movdqa %xmm0, %xmm2 29525; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 29526; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29527; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 29528; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29529; SSE3-NEXT: pand %xmm3, %xmm1 29530; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29531; SSE3-NEXT: por %xmm1, %xmm0 29532; SSE3-NEXT: retq 29533; 29534; SSSE3-LABEL: ugt_35_v2i64: 29535; SSSE3: # %bb.0: 29536; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29537; SSSE3-NEXT: movdqa %xmm0, %xmm2 29538; SSSE3-NEXT: pand %xmm1, %xmm2 29539; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29540; SSSE3-NEXT: movdqa %xmm3, %xmm4 29541; SSSE3-NEXT: pshufb %xmm2, %xmm4 29542; SSSE3-NEXT: psrlw $4, %xmm0 29543; SSSE3-NEXT: pand %xmm1, %xmm0 29544; SSSE3-NEXT: pshufb %xmm0, %xmm3 29545; SSSE3-NEXT: paddb %xmm4, %xmm3 29546; SSSE3-NEXT: pxor %xmm0, %xmm0 29547; SSSE3-NEXT: psadbw %xmm3, %xmm0 29548; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 29549; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29550; SSSE3-NEXT: movdqa %xmm0, %xmm2 29551; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 29552; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29553; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 29554; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29555; SSSE3-NEXT: pand %xmm3, %xmm1 29556; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29557; SSSE3-NEXT: por %xmm1, %xmm0 29558; SSSE3-NEXT: retq 29559; 29560; SSE41-LABEL: ugt_35_v2i64: 29561; SSE41: # %bb.0: 29562; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29563; SSE41-NEXT: movdqa %xmm0, %xmm2 29564; SSE41-NEXT: pand %xmm1, %xmm2 29565; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29566; SSE41-NEXT: movdqa %xmm3, %xmm4 29567; SSE41-NEXT: pshufb %xmm2, %xmm4 29568; SSE41-NEXT: psrlw $4, %xmm0 29569; SSE41-NEXT: pand %xmm1, %xmm0 29570; SSE41-NEXT: pshufb %xmm0, %xmm3 29571; SSE41-NEXT: paddb %xmm4, %xmm3 29572; SSE41-NEXT: pxor %xmm0, %xmm0 29573; SSE41-NEXT: psadbw %xmm3, %xmm0 29574; SSE41-NEXT: por {{.*}}(%rip), %xmm0 29575; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] 29576; SSE41-NEXT: movdqa %xmm0, %xmm2 29577; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 29578; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29579; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 29580; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29581; SSE41-NEXT: pand %xmm3, %xmm1 29582; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29583; SSE41-NEXT: por %xmm1, %xmm0 29584; SSE41-NEXT: retq 29585; 29586; AVX1-LABEL: ugt_35_v2i64: 29587; AVX1: # %bb.0: 29588; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29589; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 29590; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29591; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29592; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 29593; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 29594; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29595; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29596; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 29597; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29598; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29599; AVX1-NEXT: retq 29600; 29601; AVX2-LABEL: ugt_35_v2i64: 29602; AVX2: # %bb.0: 29603; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29604; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 29605; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29606; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29607; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 29608; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 29609; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29610; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29611; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 29612; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29613; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29614; AVX2-NEXT: retq 29615; 29616; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64: 29617; AVX512VPOPCNTDQ: # %bb.0: 29618; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29619; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 29620; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29621; AVX512VPOPCNTDQ-NEXT: vzeroupper 29622; AVX512VPOPCNTDQ-NEXT: retq 29623; 29624; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64: 29625; AVX512VPOPCNTDQVL: # %bb.0: 29626; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 29627; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 29628; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29629; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29630; AVX512VPOPCNTDQVL-NEXT: retq 29631; 29632; BITALG_NOVLX-LABEL: ugt_35_v2i64: 29633; BITALG_NOVLX: # %bb.0: 29634; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29635; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 29636; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 29637; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29638; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29639; BITALG_NOVLX-NEXT: vzeroupper 29640; BITALG_NOVLX-NEXT: retq 29641; 29642; BITALG-LABEL: ugt_35_v2i64: 29643; BITALG: # %bb.0: 29644; BITALG-NEXT: vpopcntb %xmm0, %xmm0 29645; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 29646; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29647; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 29648; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29649; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29650; BITALG-NEXT: retq 29651 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 29652 %3 = icmp ugt <2 x i64> %2, <i64 35, i64 35> 29653 %4 = sext <2 x i1> %3 to <2 x i64> 29654 ret <2 x i64> %4 29655} 29656 29657define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { 29658; SSE2-LABEL: ult_36_v2i64: 29659; SSE2: # %bb.0: 29660; SSE2-NEXT: movdqa %xmm0, %xmm1 29661; SSE2-NEXT: psrlw $1, %xmm1 29662; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29663; SSE2-NEXT: psubb %xmm1, %xmm0 29664; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29665; SSE2-NEXT: movdqa %xmm0, %xmm2 29666; SSE2-NEXT: pand %xmm1, %xmm2 29667; SSE2-NEXT: psrlw $2, %xmm0 29668; SSE2-NEXT: pand %xmm1, %xmm0 29669; SSE2-NEXT: paddb %xmm2, %xmm0 29670; SSE2-NEXT: movdqa %xmm0, %xmm1 29671; SSE2-NEXT: psrlw $4, %xmm1 29672; SSE2-NEXT: paddb %xmm0, %xmm1 29673; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29674; SSE2-NEXT: pxor %xmm0, %xmm0 29675; SSE2-NEXT: psadbw %xmm1, %xmm0 29676; SSE2-NEXT: por {{.*}}(%rip), %xmm0 29677; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29678; SSE2-NEXT: movdqa %xmm1, %xmm2 29679; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 29680; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29681; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 29682; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29683; SSE2-NEXT: pand %xmm3, %xmm1 29684; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29685; SSE2-NEXT: por %xmm1, %xmm0 29686; SSE2-NEXT: retq 29687; 29688; SSE3-LABEL: ult_36_v2i64: 29689; SSE3: # %bb.0: 29690; SSE3-NEXT: movdqa %xmm0, %xmm1 29691; SSE3-NEXT: psrlw $1, %xmm1 29692; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29693; SSE3-NEXT: psubb %xmm1, %xmm0 29694; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29695; SSE3-NEXT: movdqa %xmm0, %xmm2 29696; SSE3-NEXT: pand %xmm1, %xmm2 29697; SSE3-NEXT: psrlw $2, %xmm0 29698; SSE3-NEXT: pand %xmm1, %xmm0 29699; SSE3-NEXT: paddb %xmm2, %xmm0 29700; SSE3-NEXT: movdqa %xmm0, %xmm1 29701; SSE3-NEXT: psrlw $4, %xmm1 29702; SSE3-NEXT: paddb %xmm0, %xmm1 29703; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29704; SSE3-NEXT: pxor %xmm0, %xmm0 29705; SSE3-NEXT: psadbw %xmm1, %xmm0 29706; SSE3-NEXT: por {{.*}}(%rip), %xmm0 29707; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29708; SSE3-NEXT: movdqa %xmm1, %xmm2 29709; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 29710; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29711; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 29712; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29713; SSE3-NEXT: pand %xmm3, %xmm1 29714; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29715; SSE3-NEXT: por %xmm1, %xmm0 29716; SSE3-NEXT: retq 29717; 29718; SSSE3-LABEL: ult_36_v2i64: 29719; SSSE3: # %bb.0: 29720; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29721; SSSE3-NEXT: movdqa %xmm0, %xmm2 29722; SSSE3-NEXT: pand %xmm1, %xmm2 29723; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29724; SSSE3-NEXT: movdqa %xmm3, %xmm4 29725; SSSE3-NEXT: pshufb %xmm2, %xmm4 29726; SSSE3-NEXT: psrlw $4, %xmm0 29727; SSSE3-NEXT: pand %xmm1, %xmm0 29728; SSSE3-NEXT: pshufb %xmm0, %xmm3 29729; SSSE3-NEXT: paddb %xmm4, %xmm3 29730; SSSE3-NEXT: pxor %xmm0, %xmm0 29731; SSSE3-NEXT: psadbw %xmm3, %xmm0 29732; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 29733; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29734; SSSE3-NEXT: movdqa %xmm1, %xmm2 29735; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 29736; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29737; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 29738; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29739; SSSE3-NEXT: pand %xmm3, %xmm1 29740; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29741; SSSE3-NEXT: por %xmm1, %xmm0 29742; SSSE3-NEXT: retq 29743; 29744; SSE41-LABEL: ult_36_v2i64: 29745; SSE41: # %bb.0: 29746; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29747; SSE41-NEXT: movdqa %xmm0, %xmm2 29748; SSE41-NEXT: pand %xmm1, %xmm2 29749; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29750; SSE41-NEXT: movdqa %xmm3, %xmm4 29751; SSE41-NEXT: pshufb %xmm2, %xmm4 29752; SSE41-NEXT: psrlw $4, %xmm0 29753; SSE41-NEXT: pand %xmm1, %xmm0 29754; SSE41-NEXT: pshufb %xmm0, %xmm3 29755; SSE41-NEXT: paddb %xmm4, %xmm3 29756; SSE41-NEXT: pxor %xmm0, %xmm0 29757; SSE41-NEXT: psadbw %xmm3, %xmm0 29758; SSE41-NEXT: por {{.*}}(%rip), %xmm0 29759; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29760; SSE41-NEXT: movdqa %xmm1, %xmm2 29761; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 29762; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29763; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 29764; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29765; SSE41-NEXT: pand %xmm3, %xmm1 29766; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29767; SSE41-NEXT: por %xmm1, %xmm0 29768; SSE41-NEXT: retq 29769; 29770; AVX1-LABEL: ult_36_v2i64: 29771; AVX1: # %bb.0: 29772; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29773; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 29774; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29775; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29776; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 29777; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 29778; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29779; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29780; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 29781; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29782; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] 29783; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29784; AVX1-NEXT: retq 29785; 29786; AVX2-LABEL: ult_36_v2i64: 29787; AVX2: # %bb.0: 29788; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29789; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 29790; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29791; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29792; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 29793; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 29794; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29795; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29796; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 29797; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29798; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] 29799; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29800; AVX2-NEXT: retq 29801; 29802; AVX512VPOPCNTDQ-LABEL: ult_36_v2i64: 29803; AVX512VPOPCNTDQ: # %bb.0: 29804; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29805; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 29806; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] 29807; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29808; AVX512VPOPCNTDQ-NEXT: vzeroupper 29809; AVX512VPOPCNTDQ-NEXT: retq 29810; 29811; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64: 29812; AVX512VPOPCNTDQVL: # %bb.0: 29813; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 29814; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 29815; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29816; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29817; AVX512VPOPCNTDQVL-NEXT: retq 29818; 29819; BITALG_NOVLX-LABEL: ult_36_v2i64: 29820; BITALG_NOVLX: # %bb.0: 29821; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29822; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 29823; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 29824; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29825; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] 29826; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 29827; BITALG_NOVLX-NEXT: vzeroupper 29828; BITALG_NOVLX-NEXT: retq 29829; 29830; BITALG-LABEL: ult_36_v2i64: 29831; BITALG: # %bb.0: 29832; BITALG-NEXT: vpopcntb %xmm0, %xmm0 29833; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 29834; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29835; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 29836; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 29837; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 29838; BITALG-NEXT: retq 29839 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 29840 %3 = icmp ult <2 x i64> %2, <i64 36, i64 36> 29841 %4 = sext <2 x i1> %3 to <2 x i64> 29842 ret <2 x i64> %4 29843} 29844 29845define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { 29846; SSE2-LABEL: ugt_36_v2i64: 29847; SSE2: # %bb.0: 29848; SSE2-NEXT: movdqa %xmm0, %xmm1 29849; SSE2-NEXT: psrlw $1, %xmm1 29850; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29851; SSE2-NEXT: psubb %xmm1, %xmm0 29852; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29853; SSE2-NEXT: movdqa %xmm0, %xmm2 29854; SSE2-NEXT: pand %xmm1, %xmm2 29855; SSE2-NEXT: psrlw $2, %xmm0 29856; SSE2-NEXT: pand %xmm1, %xmm0 29857; SSE2-NEXT: paddb %xmm2, %xmm0 29858; SSE2-NEXT: movdqa %xmm0, %xmm1 29859; SSE2-NEXT: psrlw $4, %xmm1 29860; SSE2-NEXT: paddb %xmm0, %xmm1 29861; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 29862; SSE2-NEXT: pxor %xmm0, %xmm0 29863; SSE2-NEXT: psadbw %xmm1, %xmm0 29864; SSE2-NEXT: por {{.*}}(%rip), %xmm0 29865; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29866; SSE2-NEXT: movdqa %xmm0, %xmm2 29867; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 29868; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29869; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 29870; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29871; SSE2-NEXT: pand %xmm3, %xmm1 29872; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29873; SSE2-NEXT: por %xmm1, %xmm0 29874; SSE2-NEXT: retq 29875; 29876; SSE3-LABEL: ugt_36_v2i64: 29877; SSE3: # %bb.0: 29878; SSE3-NEXT: movdqa %xmm0, %xmm1 29879; SSE3-NEXT: psrlw $1, %xmm1 29880; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29881; SSE3-NEXT: psubb %xmm1, %xmm0 29882; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 29883; SSE3-NEXT: movdqa %xmm0, %xmm2 29884; SSE3-NEXT: pand %xmm1, %xmm2 29885; SSE3-NEXT: psrlw $2, %xmm0 29886; SSE3-NEXT: pand %xmm1, %xmm0 29887; SSE3-NEXT: paddb %xmm2, %xmm0 29888; SSE3-NEXT: movdqa %xmm0, %xmm1 29889; SSE3-NEXT: psrlw $4, %xmm1 29890; SSE3-NEXT: paddb %xmm0, %xmm1 29891; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 29892; SSE3-NEXT: pxor %xmm0, %xmm0 29893; SSE3-NEXT: psadbw %xmm1, %xmm0 29894; SSE3-NEXT: por {{.*}}(%rip), %xmm0 29895; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29896; SSE3-NEXT: movdqa %xmm0, %xmm2 29897; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 29898; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29899; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 29900; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29901; SSE3-NEXT: pand %xmm3, %xmm1 29902; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29903; SSE3-NEXT: por %xmm1, %xmm0 29904; SSE3-NEXT: retq 29905; 29906; SSSE3-LABEL: ugt_36_v2i64: 29907; SSSE3: # %bb.0: 29908; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29909; SSSE3-NEXT: movdqa %xmm0, %xmm2 29910; SSSE3-NEXT: pand %xmm1, %xmm2 29911; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29912; SSSE3-NEXT: movdqa %xmm3, %xmm4 29913; SSSE3-NEXT: pshufb %xmm2, %xmm4 29914; SSSE3-NEXT: psrlw $4, %xmm0 29915; SSSE3-NEXT: pand %xmm1, %xmm0 29916; SSSE3-NEXT: pshufb %xmm0, %xmm3 29917; SSSE3-NEXT: paddb %xmm4, %xmm3 29918; SSSE3-NEXT: pxor %xmm0, %xmm0 29919; SSSE3-NEXT: psadbw %xmm3, %xmm0 29920; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 29921; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29922; SSSE3-NEXT: movdqa %xmm0, %xmm2 29923; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 29924; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29925; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 29926; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29927; SSSE3-NEXT: pand %xmm3, %xmm1 29928; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29929; SSSE3-NEXT: por %xmm1, %xmm0 29930; SSSE3-NEXT: retq 29931; 29932; SSE41-LABEL: ugt_36_v2i64: 29933; SSE41: # %bb.0: 29934; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29935; SSE41-NEXT: movdqa %xmm0, %xmm2 29936; SSE41-NEXT: pand %xmm1, %xmm2 29937; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29938; SSE41-NEXT: movdqa %xmm3, %xmm4 29939; SSE41-NEXT: pshufb %xmm2, %xmm4 29940; SSE41-NEXT: psrlw $4, %xmm0 29941; SSE41-NEXT: pand %xmm1, %xmm0 29942; SSE41-NEXT: pshufb %xmm0, %xmm3 29943; SSE41-NEXT: paddb %xmm4, %xmm3 29944; SSE41-NEXT: pxor %xmm0, %xmm0 29945; SSE41-NEXT: psadbw %xmm3, %xmm0 29946; SSE41-NEXT: por {{.*}}(%rip), %xmm0 29947; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] 29948; SSE41-NEXT: movdqa %xmm0, %xmm2 29949; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 29950; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 29951; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 29952; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 29953; SSE41-NEXT: pand %xmm3, %xmm1 29954; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 29955; SSE41-NEXT: por %xmm1, %xmm0 29956; SSE41-NEXT: retq 29957; 29958; AVX1-LABEL: ugt_36_v2i64: 29959; AVX1: # %bb.0: 29960; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29961; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 29962; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29963; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29964; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 29965; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 29966; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29967; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29968; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 29969; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29970; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29971; AVX1-NEXT: retq 29972; 29973; AVX2-LABEL: ugt_36_v2i64: 29974; AVX2: # %bb.0: 29975; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 29976; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 29977; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 29978; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 29979; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 29980; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 29981; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 29982; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 29983; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 29984; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 29985; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29986; AVX2-NEXT: retq 29987; 29988; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64: 29989; AVX512VPOPCNTDQ: # %bb.0: 29990; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 29991; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 29992; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 29993; AVX512VPOPCNTDQ-NEXT: vzeroupper 29994; AVX512VPOPCNTDQ-NEXT: retq 29995; 29996; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64: 29997; AVX512VPOPCNTDQVL: # %bb.0: 29998; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 29999; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 30000; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30001; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30002; AVX512VPOPCNTDQVL-NEXT: retq 30003; 30004; BITALG_NOVLX-LABEL: ugt_36_v2i64: 30005; BITALG_NOVLX: # %bb.0: 30006; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30007; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 30008; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 30009; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30010; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30011; BITALG_NOVLX-NEXT: vzeroupper 30012; BITALG_NOVLX-NEXT: retq 30013; 30014; BITALG-LABEL: ugt_36_v2i64: 30015; BITALG: # %bb.0: 30016; BITALG-NEXT: vpopcntb %xmm0, %xmm0 30017; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 30018; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30019; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 30020; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30021; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30022; BITALG-NEXT: retq 30023 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 30024 %3 = icmp ugt <2 x i64> %2, <i64 36, i64 36> 30025 %4 = sext <2 x i1> %3 to <2 x i64> 30026 ret <2 x i64> %4 30027} 30028 30029define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { 30030; SSE2-LABEL: ult_37_v2i64: 30031; SSE2: # %bb.0: 30032; SSE2-NEXT: movdqa %xmm0, %xmm1 30033; SSE2-NEXT: psrlw $1, %xmm1 30034; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30035; SSE2-NEXT: psubb %xmm1, %xmm0 30036; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30037; SSE2-NEXT: movdqa %xmm0, %xmm2 30038; SSE2-NEXT: pand %xmm1, %xmm2 30039; SSE2-NEXT: psrlw $2, %xmm0 30040; SSE2-NEXT: pand %xmm1, %xmm0 30041; SSE2-NEXT: paddb %xmm2, %xmm0 30042; SSE2-NEXT: movdqa %xmm0, %xmm1 30043; SSE2-NEXT: psrlw $4, %xmm1 30044; SSE2-NEXT: paddb %xmm0, %xmm1 30045; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30046; SSE2-NEXT: pxor %xmm0, %xmm0 30047; SSE2-NEXT: psadbw %xmm1, %xmm0 30048; SSE2-NEXT: por {{.*}}(%rip), %xmm0 30049; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30050; SSE2-NEXT: movdqa %xmm1, %xmm2 30051; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 30052; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30053; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 30054; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30055; SSE2-NEXT: pand %xmm3, %xmm1 30056; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30057; SSE2-NEXT: por %xmm1, %xmm0 30058; SSE2-NEXT: retq 30059; 30060; SSE3-LABEL: ult_37_v2i64: 30061; SSE3: # %bb.0: 30062; SSE3-NEXT: movdqa %xmm0, %xmm1 30063; SSE3-NEXT: psrlw $1, %xmm1 30064; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30065; SSE3-NEXT: psubb %xmm1, %xmm0 30066; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30067; SSE3-NEXT: movdqa %xmm0, %xmm2 30068; SSE3-NEXT: pand %xmm1, %xmm2 30069; SSE3-NEXT: psrlw $2, %xmm0 30070; SSE3-NEXT: pand %xmm1, %xmm0 30071; SSE3-NEXT: paddb %xmm2, %xmm0 30072; SSE3-NEXT: movdqa %xmm0, %xmm1 30073; SSE3-NEXT: psrlw $4, %xmm1 30074; SSE3-NEXT: paddb %xmm0, %xmm1 30075; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30076; SSE3-NEXT: pxor %xmm0, %xmm0 30077; SSE3-NEXT: psadbw %xmm1, %xmm0 30078; SSE3-NEXT: por {{.*}}(%rip), %xmm0 30079; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30080; SSE3-NEXT: movdqa %xmm1, %xmm2 30081; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 30082; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30083; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 30084; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30085; SSE3-NEXT: pand %xmm3, %xmm1 30086; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30087; SSE3-NEXT: por %xmm1, %xmm0 30088; SSE3-NEXT: retq 30089; 30090; SSSE3-LABEL: ult_37_v2i64: 30091; SSSE3: # %bb.0: 30092; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30093; SSSE3-NEXT: movdqa %xmm0, %xmm2 30094; SSSE3-NEXT: pand %xmm1, %xmm2 30095; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30096; SSSE3-NEXT: movdqa %xmm3, %xmm4 30097; SSSE3-NEXT: pshufb %xmm2, %xmm4 30098; SSSE3-NEXT: psrlw $4, %xmm0 30099; SSSE3-NEXT: pand %xmm1, %xmm0 30100; SSSE3-NEXT: pshufb %xmm0, %xmm3 30101; SSSE3-NEXT: paddb %xmm4, %xmm3 30102; SSSE3-NEXT: pxor %xmm0, %xmm0 30103; SSSE3-NEXT: psadbw %xmm3, %xmm0 30104; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 30105; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30106; SSSE3-NEXT: movdqa %xmm1, %xmm2 30107; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 30108; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30109; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 30110; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30111; SSSE3-NEXT: pand %xmm3, %xmm1 30112; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30113; SSSE3-NEXT: por %xmm1, %xmm0 30114; SSSE3-NEXT: retq 30115; 30116; SSE41-LABEL: ult_37_v2i64: 30117; SSE41: # %bb.0: 30118; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30119; SSE41-NEXT: movdqa %xmm0, %xmm2 30120; SSE41-NEXT: pand %xmm1, %xmm2 30121; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30122; SSE41-NEXT: movdqa %xmm3, %xmm4 30123; SSE41-NEXT: pshufb %xmm2, %xmm4 30124; SSE41-NEXT: psrlw $4, %xmm0 30125; SSE41-NEXT: pand %xmm1, %xmm0 30126; SSE41-NEXT: pshufb %xmm0, %xmm3 30127; SSE41-NEXT: paddb %xmm4, %xmm3 30128; SSE41-NEXT: pxor %xmm0, %xmm0 30129; SSE41-NEXT: psadbw %xmm3, %xmm0 30130; SSE41-NEXT: por {{.*}}(%rip), %xmm0 30131; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30132; SSE41-NEXT: movdqa %xmm1, %xmm2 30133; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 30134; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30135; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 30136; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30137; SSE41-NEXT: pand %xmm3, %xmm1 30138; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30139; SSE41-NEXT: por %xmm1, %xmm0 30140; SSE41-NEXT: retq 30141; 30142; AVX1-LABEL: ult_37_v2i64: 30143; AVX1: # %bb.0: 30144; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30145; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 30146; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30147; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30148; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 30149; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 30150; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30151; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30152; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 30153; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30154; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] 30155; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30156; AVX1-NEXT: retq 30157; 30158; AVX2-LABEL: ult_37_v2i64: 30159; AVX2: # %bb.0: 30160; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30161; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 30162; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30163; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30164; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 30165; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 30166; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30167; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30168; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 30169; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30170; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] 30171; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30172; AVX2-NEXT: retq 30173; 30174; AVX512VPOPCNTDQ-LABEL: ult_37_v2i64: 30175; AVX512VPOPCNTDQ: # %bb.0: 30176; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30177; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 30178; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] 30179; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30180; AVX512VPOPCNTDQ-NEXT: vzeroupper 30181; AVX512VPOPCNTDQ-NEXT: retq 30182; 30183; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64: 30184; AVX512VPOPCNTDQVL: # %bb.0: 30185; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 30186; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 30187; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30188; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30189; AVX512VPOPCNTDQVL-NEXT: retq 30190; 30191; BITALG_NOVLX-LABEL: ult_37_v2i64: 30192; BITALG_NOVLX: # %bb.0: 30193; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30194; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 30195; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 30196; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30197; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] 30198; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30199; BITALG_NOVLX-NEXT: vzeroupper 30200; BITALG_NOVLX-NEXT: retq 30201; 30202; BITALG-LABEL: ult_37_v2i64: 30203; BITALG: # %bb.0: 30204; BITALG-NEXT: vpopcntb %xmm0, %xmm0 30205; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 30206; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30207; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 30208; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30209; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30210; BITALG-NEXT: retq 30211 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 30212 %3 = icmp ult <2 x i64> %2, <i64 37, i64 37> 30213 %4 = sext <2 x i1> %3 to <2 x i64> 30214 ret <2 x i64> %4 30215} 30216 30217define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { 30218; SSE2-LABEL: ugt_37_v2i64: 30219; SSE2: # %bb.0: 30220; SSE2-NEXT: movdqa %xmm0, %xmm1 30221; SSE2-NEXT: psrlw $1, %xmm1 30222; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30223; SSE2-NEXT: psubb %xmm1, %xmm0 30224; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30225; SSE2-NEXT: movdqa %xmm0, %xmm2 30226; SSE2-NEXT: pand %xmm1, %xmm2 30227; SSE2-NEXT: psrlw $2, %xmm0 30228; SSE2-NEXT: pand %xmm1, %xmm0 30229; SSE2-NEXT: paddb %xmm2, %xmm0 30230; SSE2-NEXT: movdqa %xmm0, %xmm1 30231; SSE2-NEXT: psrlw $4, %xmm1 30232; SSE2-NEXT: paddb %xmm0, %xmm1 30233; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30234; SSE2-NEXT: pxor %xmm0, %xmm0 30235; SSE2-NEXT: psadbw %xmm1, %xmm0 30236; SSE2-NEXT: por {{.*}}(%rip), %xmm0 30237; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30238; SSE2-NEXT: movdqa %xmm0, %xmm2 30239; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 30240; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30241; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 30242; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30243; SSE2-NEXT: pand %xmm3, %xmm1 30244; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30245; SSE2-NEXT: por %xmm1, %xmm0 30246; SSE2-NEXT: retq 30247; 30248; SSE3-LABEL: ugt_37_v2i64: 30249; SSE3: # %bb.0: 30250; SSE3-NEXT: movdqa %xmm0, %xmm1 30251; SSE3-NEXT: psrlw $1, %xmm1 30252; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30253; SSE3-NEXT: psubb %xmm1, %xmm0 30254; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30255; SSE3-NEXT: movdqa %xmm0, %xmm2 30256; SSE3-NEXT: pand %xmm1, %xmm2 30257; SSE3-NEXT: psrlw $2, %xmm0 30258; SSE3-NEXT: pand %xmm1, %xmm0 30259; SSE3-NEXT: paddb %xmm2, %xmm0 30260; SSE3-NEXT: movdqa %xmm0, %xmm1 30261; SSE3-NEXT: psrlw $4, %xmm1 30262; SSE3-NEXT: paddb %xmm0, %xmm1 30263; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30264; SSE3-NEXT: pxor %xmm0, %xmm0 30265; SSE3-NEXT: psadbw %xmm1, %xmm0 30266; SSE3-NEXT: por {{.*}}(%rip), %xmm0 30267; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30268; SSE3-NEXT: movdqa %xmm0, %xmm2 30269; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 30270; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30271; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 30272; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30273; SSE3-NEXT: pand %xmm3, %xmm1 30274; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30275; SSE3-NEXT: por %xmm1, %xmm0 30276; SSE3-NEXT: retq 30277; 30278; SSSE3-LABEL: ugt_37_v2i64: 30279; SSSE3: # %bb.0: 30280; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30281; SSSE3-NEXT: movdqa %xmm0, %xmm2 30282; SSSE3-NEXT: pand %xmm1, %xmm2 30283; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30284; SSSE3-NEXT: movdqa %xmm3, %xmm4 30285; SSSE3-NEXT: pshufb %xmm2, %xmm4 30286; SSSE3-NEXT: psrlw $4, %xmm0 30287; SSSE3-NEXT: pand %xmm1, %xmm0 30288; SSSE3-NEXT: pshufb %xmm0, %xmm3 30289; SSSE3-NEXT: paddb %xmm4, %xmm3 30290; SSSE3-NEXT: pxor %xmm0, %xmm0 30291; SSSE3-NEXT: psadbw %xmm3, %xmm0 30292; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 30293; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30294; SSSE3-NEXT: movdqa %xmm0, %xmm2 30295; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 30296; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30297; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 30298; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30299; SSSE3-NEXT: pand %xmm3, %xmm1 30300; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30301; SSSE3-NEXT: por %xmm1, %xmm0 30302; SSSE3-NEXT: retq 30303; 30304; SSE41-LABEL: ugt_37_v2i64: 30305; SSE41: # %bb.0: 30306; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30307; SSE41-NEXT: movdqa %xmm0, %xmm2 30308; SSE41-NEXT: pand %xmm1, %xmm2 30309; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30310; SSE41-NEXT: movdqa %xmm3, %xmm4 30311; SSE41-NEXT: pshufb %xmm2, %xmm4 30312; SSE41-NEXT: psrlw $4, %xmm0 30313; SSE41-NEXT: pand %xmm1, %xmm0 30314; SSE41-NEXT: pshufb %xmm0, %xmm3 30315; SSE41-NEXT: paddb %xmm4, %xmm3 30316; SSE41-NEXT: pxor %xmm0, %xmm0 30317; SSE41-NEXT: psadbw %xmm3, %xmm0 30318; SSE41-NEXT: por {{.*}}(%rip), %xmm0 30319; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] 30320; SSE41-NEXT: movdqa %xmm0, %xmm2 30321; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 30322; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30323; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 30324; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30325; SSE41-NEXT: pand %xmm3, %xmm1 30326; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30327; SSE41-NEXT: por %xmm1, %xmm0 30328; SSE41-NEXT: retq 30329; 30330; AVX1-LABEL: ugt_37_v2i64: 30331; AVX1: # %bb.0: 30332; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30333; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 30334; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30335; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30336; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 30337; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 30338; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30339; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30340; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 30341; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30342; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30343; AVX1-NEXT: retq 30344; 30345; AVX2-LABEL: ugt_37_v2i64: 30346; AVX2: # %bb.0: 30347; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30348; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 30349; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30350; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30351; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 30352; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 30353; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30354; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30355; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 30356; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30357; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30358; AVX2-NEXT: retq 30359; 30360; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64: 30361; AVX512VPOPCNTDQ: # %bb.0: 30362; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30363; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 30364; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30365; AVX512VPOPCNTDQ-NEXT: vzeroupper 30366; AVX512VPOPCNTDQ-NEXT: retq 30367; 30368; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64: 30369; AVX512VPOPCNTDQVL: # %bb.0: 30370; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 30371; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 30372; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30373; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30374; AVX512VPOPCNTDQVL-NEXT: retq 30375; 30376; BITALG_NOVLX-LABEL: ugt_37_v2i64: 30377; BITALG_NOVLX: # %bb.0: 30378; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30379; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 30380; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 30381; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30382; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30383; BITALG_NOVLX-NEXT: vzeroupper 30384; BITALG_NOVLX-NEXT: retq 30385; 30386; BITALG-LABEL: ugt_37_v2i64: 30387; BITALG: # %bb.0: 30388; BITALG-NEXT: vpopcntb %xmm0, %xmm0 30389; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 30390; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30391; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 30392; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30393; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30394; BITALG-NEXT: retq 30395 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 30396 %3 = icmp ugt <2 x i64> %2, <i64 37, i64 37> 30397 %4 = sext <2 x i1> %3 to <2 x i64> 30398 ret <2 x i64> %4 30399} 30400 30401define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { 30402; SSE2-LABEL: ult_38_v2i64: 30403; SSE2: # %bb.0: 30404; SSE2-NEXT: movdqa %xmm0, %xmm1 30405; SSE2-NEXT: psrlw $1, %xmm1 30406; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30407; SSE2-NEXT: psubb %xmm1, %xmm0 30408; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30409; SSE2-NEXT: movdqa %xmm0, %xmm2 30410; SSE2-NEXT: pand %xmm1, %xmm2 30411; SSE2-NEXT: psrlw $2, %xmm0 30412; SSE2-NEXT: pand %xmm1, %xmm0 30413; SSE2-NEXT: paddb %xmm2, %xmm0 30414; SSE2-NEXT: movdqa %xmm0, %xmm1 30415; SSE2-NEXT: psrlw $4, %xmm1 30416; SSE2-NEXT: paddb %xmm0, %xmm1 30417; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30418; SSE2-NEXT: pxor %xmm0, %xmm0 30419; SSE2-NEXT: psadbw %xmm1, %xmm0 30420; SSE2-NEXT: por {{.*}}(%rip), %xmm0 30421; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30422; SSE2-NEXT: movdqa %xmm1, %xmm2 30423; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 30424; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30425; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 30426; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30427; SSE2-NEXT: pand %xmm3, %xmm1 30428; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30429; SSE2-NEXT: por %xmm1, %xmm0 30430; SSE2-NEXT: retq 30431; 30432; SSE3-LABEL: ult_38_v2i64: 30433; SSE3: # %bb.0: 30434; SSE3-NEXT: movdqa %xmm0, %xmm1 30435; SSE3-NEXT: psrlw $1, %xmm1 30436; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30437; SSE3-NEXT: psubb %xmm1, %xmm0 30438; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30439; SSE3-NEXT: movdqa %xmm0, %xmm2 30440; SSE3-NEXT: pand %xmm1, %xmm2 30441; SSE3-NEXT: psrlw $2, %xmm0 30442; SSE3-NEXT: pand %xmm1, %xmm0 30443; SSE3-NEXT: paddb %xmm2, %xmm0 30444; SSE3-NEXT: movdqa %xmm0, %xmm1 30445; SSE3-NEXT: psrlw $4, %xmm1 30446; SSE3-NEXT: paddb %xmm0, %xmm1 30447; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30448; SSE3-NEXT: pxor %xmm0, %xmm0 30449; SSE3-NEXT: psadbw %xmm1, %xmm0 30450; SSE3-NEXT: por {{.*}}(%rip), %xmm0 30451; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30452; SSE3-NEXT: movdqa %xmm1, %xmm2 30453; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 30454; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30455; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 30456; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30457; SSE3-NEXT: pand %xmm3, %xmm1 30458; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30459; SSE3-NEXT: por %xmm1, %xmm0 30460; SSE3-NEXT: retq 30461; 30462; SSSE3-LABEL: ult_38_v2i64: 30463; SSSE3: # %bb.0: 30464; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30465; SSSE3-NEXT: movdqa %xmm0, %xmm2 30466; SSSE3-NEXT: pand %xmm1, %xmm2 30467; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30468; SSSE3-NEXT: movdqa %xmm3, %xmm4 30469; SSSE3-NEXT: pshufb %xmm2, %xmm4 30470; SSSE3-NEXT: psrlw $4, %xmm0 30471; SSSE3-NEXT: pand %xmm1, %xmm0 30472; SSSE3-NEXT: pshufb %xmm0, %xmm3 30473; SSSE3-NEXT: paddb %xmm4, %xmm3 30474; SSSE3-NEXT: pxor %xmm0, %xmm0 30475; SSSE3-NEXT: psadbw %xmm3, %xmm0 30476; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 30477; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30478; SSSE3-NEXT: movdqa %xmm1, %xmm2 30479; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 30480; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30481; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 30482; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30483; SSSE3-NEXT: pand %xmm3, %xmm1 30484; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30485; SSSE3-NEXT: por %xmm1, %xmm0 30486; SSSE3-NEXT: retq 30487; 30488; SSE41-LABEL: ult_38_v2i64: 30489; SSE41: # %bb.0: 30490; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30491; SSE41-NEXT: movdqa %xmm0, %xmm2 30492; SSE41-NEXT: pand %xmm1, %xmm2 30493; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30494; SSE41-NEXT: movdqa %xmm3, %xmm4 30495; SSE41-NEXT: pshufb %xmm2, %xmm4 30496; SSE41-NEXT: psrlw $4, %xmm0 30497; SSE41-NEXT: pand %xmm1, %xmm0 30498; SSE41-NEXT: pshufb %xmm0, %xmm3 30499; SSE41-NEXT: paddb %xmm4, %xmm3 30500; SSE41-NEXT: pxor %xmm0, %xmm0 30501; SSE41-NEXT: psadbw %xmm3, %xmm0 30502; SSE41-NEXT: por {{.*}}(%rip), %xmm0 30503; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30504; SSE41-NEXT: movdqa %xmm1, %xmm2 30505; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 30506; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30507; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 30508; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30509; SSE41-NEXT: pand %xmm3, %xmm1 30510; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30511; SSE41-NEXT: por %xmm1, %xmm0 30512; SSE41-NEXT: retq 30513; 30514; AVX1-LABEL: ult_38_v2i64: 30515; AVX1: # %bb.0: 30516; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30517; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 30518; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30519; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30520; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 30521; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 30522; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30523; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30524; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 30525; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30526; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] 30527; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30528; AVX1-NEXT: retq 30529; 30530; AVX2-LABEL: ult_38_v2i64: 30531; AVX2: # %bb.0: 30532; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30533; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 30534; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30535; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30536; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 30537; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 30538; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30539; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30540; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 30541; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30542; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] 30543; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30544; AVX2-NEXT: retq 30545; 30546; AVX512VPOPCNTDQ-LABEL: ult_38_v2i64: 30547; AVX512VPOPCNTDQ: # %bb.0: 30548; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30549; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 30550; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] 30551; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30552; AVX512VPOPCNTDQ-NEXT: vzeroupper 30553; AVX512VPOPCNTDQ-NEXT: retq 30554; 30555; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64: 30556; AVX512VPOPCNTDQVL: # %bb.0: 30557; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 30558; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 30559; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30560; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30561; AVX512VPOPCNTDQVL-NEXT: retq 30562; 30563; BITALG_NOVLX-LABEL: ult_38_v2i64: 30564; BITALG_NOVLX: # %bb.0: 30565; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30566; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 30567; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 30568; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30569; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] 30570; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30571; BITALG_NOVLX-NEXT: vzeroupper 30572; BITALG_NOVLX-NEXT: retq 30573; 30574; BITALG-LABEL: ult_38_v2i64: 30575; BITALG: # %bb.0: 30576; BITALG-NEXT: vpopcntb %xmm0, %xmm0 30577; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 30578; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30579; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 30580; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30581; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30582; BITALG-NEXT: retq 30583 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 30584 %3 = icmp ult <2 x i64> %2, <i64 38, i64 38> 30585 %4 = sext <2 x i1> %3 to <2 x i64> 30586 ret <2 x i64> %4 30587} 30588 30589define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { 30590; SSE2-LABEL: ugt_38_v2i64: 30591; SSE2: # %bb.0: 30592; SSE2-NEXT: movdqa %xmm0, %xmm1 30593; SSE2-NEXT: psrlw $1, %xmm1 30594; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30595; SSE2-NEXT: psubb %xmm1, %xmm0 30596; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30597; SSE2-NEXT: movdqa %xmm0, %xmm2 30598; SSE2-NEXT: pand %xmm1, %xmm2 30599; SSE2-NEXT: psrlw $2, %xmm0 30600; SSE2-NEXT: pand %xmm1, %xmm0 30601; SSE2-NEXT: paddb %xmm2, %xmm0 30602; SSE2-NEXT: movdqa %xmm0, %xmm1 30603; SSE2-NEXT: psrlw $4, %xmm1 30604; SSE2-NEXT: paddb %xmm0, %xmm1 30605; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30606; SSE2-NEXT: pxor %xmm0, %xmm0 30607; SSE2-NEXT: psadbw %xmm1, %xmm0 30608; SSE2-NEXT: por {{.*}}(%rip), %xmm0 30609; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30610; SSE2-NEXT: movdqa %xmm0, %xmm2 30611; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 30612; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30613; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 30614; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30615; SSE2-NEXT: pand %xmm3, %xmm1 30616; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30617; SSE2-NEXT: por %xmm1, %xmm0 30618; SSE2-NEXT: retq 30619; 30620; SSE3-LABEL: ugt_38_v2i64: 30621; SSE3: # %bb.0: 30622; SSE3-NEXT: movdqa %xmm0, %xmm1 30623; SSE3-NEXT: psrlw $1, %xmm1 30624; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30625; SSE3-NEXT: psubb %xmm1, %xmm0 30626; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30627; SSE3-NEXT: movdqa %xmm0, %xmm2 30628; SSE3-NEXT: pand %xmm1, %xmm2 30629; SSE3-NEXT: psrlw $2, %xmm0 30630; SSE3-NEXT: pand %xmm1, %xmm0 30631; SSE3-NEXT: paddb %xmm2, %xmm0 30632; SSE3-NEXT: movdqa %xmm0, %xmm1 30633; SSE3-NEXT: psrlw $4, %xmm1 30634; SSE3-NEXT: paddb %xmm0, %xmm1 30635; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30636; SSE3-NEXT: pxor %xmm0, %xmm0 30637; SSE3-NEXT: psadbw %xmm1, %xmm0 30638; SSE3-NEXT: por {{.*}}(%rip), %xmm0 30639; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30640; SSE3-NEXT: movdqa %xmm0, %xmm2 30641; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 30642; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30643; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 30644; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30645; SSE3-NEXT: pand %xmm3, %xmm1 30646; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30647; SSE3-NEXT: por %xmm1, %xmm0 30648; SSE3-NEXT: retq 30649; 30650; SSSE3-LABEL: ugt_38_v2i64: 30651; SSSE3: # %bb.0: 30652; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30653; SSSE3-NEXT: movdqa %xmm0, %xmm2 30654; SSSE3-NEXT: pand %xmm1, %xmm2 30655; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30656; SSSE3-NEXT: movdqa %xmm3, %xmm4 30657; SSSE3-NEXT: pshufb %xmm2, %xmm4 30658; SSSE3-NEXT: psrlw $4, %xmm0 30659; SSSE3-NEXT: pand %xmm1, %xmm0 30660; SSSE3-NEXT: pshufb %xmm0, %xmm3 30661; SSSE3-NEXT: paddb %xmm4, %xmm3 30662; SSSE3-NEXT: pxor %xmm0, %xmm0 30663; SSSE3-NEXT: psadbw %xmm3, %xmm0 30664; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 30665; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30666; SSSE3-NEXT: movdqa %xmm0, %xmm2 30667; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 30668; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30669; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 30670; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30671; SSSE3-NEXT: pand %xmm3, %xmm1 30672; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30673; SSSE3-NEXT: por %xmm1, %xmm0 30674; SSSE3-NEXT: retq 30675; 30676; SSE41-LABEL: ugt_38_v2i64: 30677; SSE41: # %bb.0: 30678; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30679; SSE41-NEXT: movdqa %xmm0, %xmm2 30680; SSE41-NEXT: pand %xmm1, %xmm2 30681; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30682; SSE41-NEXT: movdqa %xmm3, %xmm4 30683; SSE41-NEXT: pshufb %xmm2, %xmm4 30684; SSE41-NEXT: psrlw $4, %xmm0 30685; SSE41-NEXT: pand %xmm1, %xmm0 30686; SSE41-NEXT: pshufb %xmm0, %xmm3 30687; SSE41-NEXT: paddb %xmm4, %xmm3 30688; SSE41-NEXT: pxor %xmm0, %xmm0 30689; SSE41-NEXT: psadbw %xmm3, %xmm0 30690; SSE41-NEXT: por {{.*}}(%rip), %xmm0 30691; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] 30692; SSE41-NEXT: movdqa %xmm0, %xmm2 30693; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 30694; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30695; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 30696; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30697; SSE41-NEXT: pand %xmm3, %xmm1 30698; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30699; SSE41-NEXT: por %xmm1, %xmm0 30700; SSE41-NEXT: retq 30701; 30702; AVX1-LABEL: ugt_38_v2i64: 30703; AVX1: # %bb.0: 30704; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30705; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 30706; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30707; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30708; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 30709; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 30710; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30711; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30712; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 30713; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30714; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30715; AVX1-NEXT: retq 30716; 30717; AVX2-LABEL: ugt_38_v2i64: 30718; AVX2: # %bb.0: 30719; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30720; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 30721; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30722; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30723; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 30724; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 30725; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30726; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30727; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 30728; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30729; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30730; AVX2-NEXT: retq 30731; 30732; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64: 30733; AVX512VPOPCNTDQ: # %bb.0: 30734; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30735; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 30736; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30737; AVX512VPOPCNTDQ-NEXT: vzeroupper 30738; AVX512VPOPCNTDQ-NEXT: retq 30739; 30740; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64: 30741; AVX512VPOPCNTDQVL: # %bb.0: 30742; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 30743; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 30744; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30745; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30746; AVX512VPOPCNTDQVL-NEXT: retq 30747; 30748; BITALG_NOVLX-LABEL: ugt_38_v2i64: 30749; BITALG_NOVLX: # %bb.0: 30750; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30751; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 30752; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 30753; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30754; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 30755; BITALG_NOVLX-NEXT: vzeroupper 30756; BITALG_NOVLX-NEXT: retq 30757; 30758; BITALG-LABEL: ugt_38_v2i64: 30759; BITALG: # %bb.0: 30760; BITALG-NEXT: vpopcntb %xmm0, %xmm0 30761; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 30762; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30763; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 30764; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30765; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30766; BITALG-NEXT: retq 30767 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 30768 %3 = icmp ugt <2 x i64> %2, <i64 38, i64 38> 30769 %4 = sext <2 x i1> %3 to <2 x i64> 30770 ret <2 x i64> %4 30771} 30772 30773define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { 30774; SSE2-LABEL: ult_39_v2i64: 30775; SSE2: # %bb.0: 30776; SSE2-NEXT: movdqa %xmm0, %xmm1 30777; SSE2-NEXT: psrlw $1, %xmm1 30778; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30779; SSE2-NEXT: psubb %xmm1, %xmm0 30780; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30781; SSE2-NEXT: movdqa %xmm0, %xmm2 30782; SSE2-NEXT: pand %xmm1, %xmm2 30783; SSE2-NEXT: psrlw $2, %xmm0 30784; SSE2-NEXT: pand %xmm1, %xmm0 30785; SSE2-NEXT: paddb %xmm2, %xmm0 30786; SSE2-NEXT: movdqa %xmm0, %xmm1 30787; SSE2-NEXT: psrlw $4, %xmm1 30788; SSE2-NEXT: paddb %xmm0, %xmm1 30789; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30790; SSE2-NEXT: pxor %xmm0, %xmm0 30791; SSE2-NEXT: psadbw %xmm1, %xmm0 30792; SSE2-NEXT: por {{.*}}(%rip), %xmm0 30793; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 30794; SSE2-NEXT: movdqa %xmm1, %xmm2 30795; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 30796; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30797; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 30798; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30799; SSE2-NEXT: pand %xmm3, %xmm1 30800; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30801; SSE2-NEXT: por %xmm1, %xmm0 30802; SSE2-NEXT: retq 30803; 30804; SSE3-LABEL: ult_39_v2i64: 30805; SSE3: # %bb.0: 30806; SSE3-NEXT: movdqa %xmm0, %xmm1 30807; SSE3-NEXT: psrlw $1, %xmm1 30808; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30809; SSE3-NEXT: psubb %xmm1, %xmm0 30810; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30811; SSE3-NEXT: movdqa %xmm0, %xmm2 30812; SSE3-NEXT: pand %xmm1, %xmm2 30813; SSE3-NEXT: psrlw $2, %xmm0 30814; SSE3-NEXT: pand %xmm1, %xmm0 30815; SSE3-NEXT: paddb %xmm2, %xmm0 30816; SSE3-NEXT: movdqa %xmm0, %xmm1 30817; SSE3-NEXT: psrlw $4, %xmm1 30818; SSE3-NEXT: paddb %xmm0, %xmm1 30819; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30820; SSE3-NEXT: pxor %xmm0, %xmm0 30821; SSE3-NEXT: psadbw %xmm1, %xmm0 30822; SSE3-NEXT: por {{.*}}(%rip), %xmm0 30823; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 30824; SSE3-NEXT: movdqa %xmm1, %xmm2 30825; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 30826; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30827; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 30828; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30829; SSE3-NEXT: pand %xmm3, %xmm1 30830; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30831; SSE3-NEXT: por %xmm1, %xmm0 30832; SSE3-NEXT: retq 30833; 30834; SSSE3-LABEL: ult_39_v2i64: 30835; SSSE3: # %bb.0: 30836; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30837; SSSE3-NEXT: movdqa %xmm0, %xmm2 30838; SSSE3-NEXT: pand %xmm1, %xmm2 30839; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30840; SSSE3-NEXT: movdqa %xmm3, %xmm4 30841; SSSE3-NEXT: pshufb %xmm2, %xmm4 30842; SSSE3-NEXT: psrlw $4, %xmm0 30843; SSSE3-NEXT: pand %xmm1, %xmm0 30844; SSSE3-NEXT: pshufb %xmm0, %xmm3 30845; SSSE3-NEXT: paddb %xmm4, %xmm3 30846; SSSE3-NEXT: pxor %xmm0, %xmm0 30847; SSSE3-NEXT: psadbw %xmm3, %xmm0 30848; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 30849; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 30850; SSSE3-NEXT: movdqa %xmm1, %xmm2 30851; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 30852; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30853; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 30854; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30855; SSSE3-NEXT: pand %xmm3, %xmm1 30856; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30857; SSSE3-NEXT: por %xmm1, %xmm0 30858; SSSE3-NEXT: retq 30859; 30860; SSE41-LABEL: ult_39_v2i64: 30861; SSE41: # %bb.0: 30862; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30863; SSE41-NEXT: movdqa %xmm0, %xmm2 30864; SSE41-NEXT: pand %xmm1, %xmm2 30865; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30866; SSE41-NEXT: movdqa %xmm3, %xmm4 30867; SSE41-NEXT: pshufb %xmm2, %xmm4 30868; SSE41-NEXT: psrlw $4, %xmm0 30869; SSE41-NEXT: pand %xmm1, %xmm0 30870; SSE41-NEXT: pshufb %xmm0, %xmm3 30871; SSE41-NEXT: paddb %xmm4, %xmm3 30872; SSE41-NEXT: pxor %xmm0, %xmm0 30873; SSE41-NEXT: psadbw %xmm3, %xmm0 30874; SSE41-NEXT: por {{.*}}(%rip), %xmm0 30875; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 30876; SSE41-NEXT: movdqa %xmm1, %xmm2 30877; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 30878; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30879; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 30880; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30881; SSE41-NEXT: pand %xmm3, %xmm1 30882; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30883; SSE41-NEXT: por %xmm1, %xmm0 30884; SSE41-NEXT: retq 30885; 30886; AVX1-LABEL: ult_39_v2i64: 30887; AVX1: # %bb.0: 30888; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30889; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 30890; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30891; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30892; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 30893; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 30894; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30895; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30896; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 30897; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30898; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] 30899; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30900; AVX1-NEXT: retq 30901; 30902; AVX2-LABEL: ult_39_v2i64: 30903; AVX2: # %bb.0: 30904; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 30905; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 30906; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 30907; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 30908; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 30909; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 30910; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 30911; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 30912; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 30913; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30914; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] 30915; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30916; AVX2-NEXT: retq 30917; 30918; AVX512VPOPCNTDQ-LABEL: ult_39_v2i64: 30919; AVX512VPOPCNTDQ: # %bb.0: 30920; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30921; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 30922; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] 30923; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30924; AVX512VPOPCNTDQ-NEXT: vzeroupper 30925; AVX512VPOPCNTDQ-NEXT: retq 30926; 30927; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64: 30928; AVX512VPOPCNTDQVL: # %bb.0: 30929; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 30930; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 30931; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30932; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30933; AVX512VPOPCNTDQVL-NEXT: retq 30934; 30935; BITALG_NOVLX-LABEL: ult_39_v2i64: 30936; BITALG_NOVLX: # %bb.0: 30937; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 30938; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 30939; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 30940; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30941; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] 30942; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 30943; BITALG_NOVLX-NEXT: vzeroupper 30944; BITALG_NOVLX-NEXT: retq 30945; 30946; BITALG-LABEL: ult_39_v2i64: 30947; BITALG: # %bb.0: 30948; BITALG-NEXT: vpopcntb %xmm0, %xmm0 30949; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 30950; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 30951; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 30952; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 30953; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 30954; BITALG-NEXT: retq 30955 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 30956 %3 = icmp ult <2 x i64> %2, <i64 39, i64 39> 30957 %4 = sext <2 x i1> %3 to <2 x i64> 30958 ret <2 x i64> %4 30959} 30960 30961define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { 30962; SSE2-LABEL: ugt_39_v2i64: 30963; SSE2: # %bb.0: 30964; SSE2-NEXT: movdqa %xmm0, %xmm1 30965; SSE2-NEXT: psrlw $1, %xmm1 30966; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30967; SSE2-NEXT: psubb %xmm1, %xmm0 30968; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30969; SSE2-NEXT: movdqa %xmm0, %xmm2 30970; SSE2-NEXT: pand %xmm1, %xmm2 30971; SSE2-NEXT: psrlw $2, %xmm0 30972; SSE2-NEXT: pand %xmm1, %xmm0 30973; SSE2-NEXT: paddb %xmm2, %xmm0 30974; SSE2-NEXT: movdqa %xmm0, %xmm1 30975; SSE2-NEXT: psrlw $4, %xmm1 30976; SSE2-NEXT: paddb %xmm0, %xmm1 30977; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30978; SSE2-NEXT: pxor %xmm0, %xmm0 30979; SSE2-NEXT: psadbw %xmm1, %xmm0 30980; SSE2-NEXT: por {{.*}}(%rip), %xmm0 30981; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 30982; SSE2-NEXT: movdqa %xmm0, %xmm2 30983; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 30984; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 30985; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 30986; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 30987; SSE2-NEXT: pand %xmm3, %xmm1 30988; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 30989; SSE2-NEXT: por %xmm1, %xmm0 30990; SSE2-NEXT: retq 30991; 30992; SSE3-LABEL: ugt_39_v2i64: 30993; SSE3: # %bb.0: 30994; SSE3-NEXT: movdqa %xmm0, %xmm1 30995; SSE3-NEXT: psrlw $1, %xmm1 30996; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 30997; SSE3-NEXT: psubb %xmm1, %xmm0 30998; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 30999; SSE3-NEXT: movdqa %xmm0, %xmm2 31000; SSE3-NEXT: pand %xmm1, %xmm2 31001; SSE3-NEXT: psrlw $2, %xmm0 31002; SSE3-NEXT: pand %xmm1, %xmm0 31003; SSE3-NEXT: paddb %xmm2, %xmm0 31004; SSE3-NEXT: movdqa %xmm0, %xmm1 31005; SSE3-NEXT: psrlw $4, %xmm1 31006; SSE3-NEXT: paddb %xmm0, %xmm1 31007; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31008; SSE3-NEXT: pxor %xmm0, %xmm0 31009; SSE3-NEXT: psadbw %xmm1, %xmm0 31010; SSE3-NEXT: por {{.*}}(%rip), %xmm0 31011; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 31012; SSE3-NEXT: movdqa %xmm0, %xmm2 31013; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 31014; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31015; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 31016; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31017; SSE3-NEXT: pand %xmm3, %xmm1 31018; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31019; SSE3-NEXT: por %xmm1, %xmm0 31020; SSE3-NEXT: retq 31021; 31022; SSSE3-LABEL: ugt_39_v2i64: 31023; SSSE3: # %bb.0: 31024; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31025; SSSE3-NEXT: movdqa %xmm0, %xmm2 31026; SSSE3-NEXT: pand %xmm1, %xmm2 31027; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31028; SSSE3-NEXT: movdqa %xmm3, %xmm4 31029; SSSE3-NEXT: pshufb %xmm2, %xmm4 31030; SSSE3-NEXT: psrlw $4, %xmm0 31031; SSSE3-NEXT: pand %xmm1, %xmm0 31032; SSSE3-NEXT: pshufb %xmm0, %xmm3 31033; SSSE3-NEXT: paddb %xmm4, %xmm3 31034; SSSE3-NEXT: pxor %xmm0, %xmm0 31035; SSSE3-NEXT: psadbw %xmm3, %xmm0 31036; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 31037; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 31038; SSSE3-NEXT: movdqa %xmm0, %xmm2 31039; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 31040; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31041; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 31042; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31043; SSSE3-NEXT: pand %xmm3, %xmm1 31044; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31045; SSSE3-NEXT: por %xmm1, %xmm0 31046; SSSE3-NEXT: retq 31047; 31048; SSE41-LABEL: ugt_39_v2i64: 31049; SSE41: # %bb.0: 31050; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31051; SSE41-NEXT: movdqa %xmm0, %xmm2 31052; SSE41-NEXT: pand %xmm1, %xmm2 31053; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31054; SSE41-NEXT: movdqa %xmm3, %xmm4 31055; SSE41-NEXT: pshufb %xmm2, %xmm4 31056; SSE41-NEXT: psrlw $4, %xmm0 31057; SSE41-NEXT: pand %xmm1, %xmm0 31058; SSE41-NEXT: pshufb %xmm0, %xmm3 31059; SSE41-NEXT: paddb %xmm4, %xmm3 31060; SSE41-NEXT: pxor %xmm0, %xmm0 31061; SSE41-NEXT: psadbw %xmm3, %xmm0 31062; SSE41-NEXT: por {{.*}}(%rip), %xmm0 31063; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] 31064; SSE41-NEXT: movdqa %xmm0, %xmm2 31065; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 31066; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31067; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 31068; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31069; SSE41-NEXT: pand %xmm3, %xmm1 31070; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31071; SSE41-NEXT: por %xmm1, %xmm0 31072; SSE41-NEXT: retq 31073; 31074; AVX1-LABEL: ugt_39_v2i64: 31075; AVX1: # %bb.0: 31076; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31077; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 31078; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31079; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31080; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 31081; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 31082; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31083; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31084; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 31085; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31086; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31087; AVX1-NEXT: retq 31088; 31089; AVX2-LABEL: ugt_39_v2i64: 31090; AVX2: # %bb.0: 31091; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31092; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 31093; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31094; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31095; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 31096; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 31097; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31098; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31099; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 31100; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31101; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31102; AVX2-NEXT: retq 31103; 31104; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64: 31105; AVX512VPOPCNTDQ: # %bb.0: 31106; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31107; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 31108; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31109; AVX512VPOPCNTDQ-NEXT: vzeroupper 31110; AVX512VPOPCNTDQ-NEXT: retq 31111; 31112; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64: 31113; AVX512VPOPCNTDQVL: # %bb.0: 31114; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 31115; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 31116; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31117; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31118; AVX512VPOPCNTDQVL-NEXT: retq 31119; 31120; BITALG_NOVLX-LABEL: ugt_39_v2i64: 31121; BITALG_NOVLX: # %bb.0: 31122; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31123; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 31124; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 31125; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31126; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31127; BITALG_NOVLX-NEXT: vzeroupper 31128; BITALG_NOVLX-NEXT: retq 31129; 31130; BITALG-LABEL: ugt_39_v2i64: 31131; BITALG: # %bb.0: 31132; BITALG-NEXT: vpopcntb %xmm0, %xmm0 31133; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 31134; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31135; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 31136; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31137; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31138; BITALG-NEXT: retq 31139 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 31140 %3 = icmp ugt <2 x i64> %2, <i64 39, i64 39> 31141 %4 = sext <2 x i1> %3 to <2 x i64> 31142 ret <2 x i64> %4 31143} 31144 31145define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { 31146; SSE2-LABEL: ult_40_v2i64: 31147; SSE2: # %bb.0: 31148; SSE2-NEXT: movdqa %xmm0, %xmm1 31149; SSE2-NEXT: psrlw $1, %xmm1 31150; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31151; SSE2-NEXT: psubb %xmm1, %xmm0 31152; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31153; SSE2-NEXT: movdqa %xmm0, %xmm2 31154; SSE2-NEXT: pand %xmm1, %xmm2 31155; SSE2-NEXT: psrlw $2, %xmm0 31156; SSE2-NEXT: pand %xmm1, %xmm0 31157; SSE2-NEXT: paddb %xmm2, %xmm0 31158; SSE2-NEXT: movdqa %xmm0, %xmm1 31159; SSE2-NEXT: psrlw $4, %xmm1 31160; SSE2-NEXT: paddb %xmm0, %xmm1 31161; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31162; SSE2-NEXT: pxor %xmm0, %xmm0 31163; SSE2-NEXT: psadbw %xmm1, %xmm0 31164; SSE2-NEXT: por {{.*}}(%rip), %xmm0 31165; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31166; SSE2-NEXT: movdqa %xmm1, %xmm2 31167; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 31168; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31169; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 31170; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31171; SSE2-NEXT: pand %xmm3, %xmm1 31172; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31173; SSE2-NEXT: por %xmm1, %xmm0 31174; SSE2-NEXT: retq 31175; 31176; SSE3-LABEL: ult_40_v2i64: 31177; SSE3: # %bb.0: 31178; SSE3-NEXT: movdqa %xmm0, %xmm1 31179; SSE3-NEXT: psrlw $1, %xmm1 31180; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31181; SSE3-NEXT: psubb %xmm1, %xmm0 31182; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31183; SSE3-NEXT: movdqa %xmm0, %xmm2 31184; SSE3-NEXT: pand %xmm1, %xmm2 31185; SSE3-NEXT: psrlw $2, %xmm0 31186; SSE3-NEXT: pand %xmm1, %xmm0 31187; SSE3-NEXT: paddb %xmm2, %xmm0 31188; SSE3-NEXT: movdqa %xmm0, %xmm1 31189; SSE3-NEXT: psrlw $4, %xmm1 31190; SSE3-NEXT: paddb %xmm0, %xmm1 31191; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31192; SSE3-NEXT: pxor %xmm0, %xmm0 31193; SSE3-NEXT: psadbw %xmm1, %xmm0 31194; SSE3-NEXT: por {{.*}}(%rip), %xmm0 31195; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31196; SSE3-NEXT: movdqa %xmm1, %xmm2 31197; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 31198; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31199; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 31200; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31201; SSE3-NEXT: pand %xmm3, %xmm1 31202; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31203; SSE3-NEXT: por %xmm1, %xmm0 31204; SSE3-NEXT: retq 31205; 31206; SSSE3-LABEL: ult_40_v2i64: 31207; SSSE3: # %bb.0: 31208; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31209; SSSE3-NEXT: movdqa %xmm0, %xmm2 31210; SSSE3-NEXT: pand %xmm1, %xmm2 31211; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31212; SSSE3-NEXT: movdqa %xmm3, %xmm4 31213; SSSE3-NEXT: pshufb %xmm2, %xmm4 31214; SSSE3-NEXT: psrlw $4, %xmm0 31215; SSSE3-NEXT: pand %xmm1, %xmm0 31216; SSSE3-NEXT: pshufb %xmm0, %xmm3 31217; SSSE3-NEXT: paddb %xmm4, %xmm3 31218; SSSE3-NEXT: pxor %xmm0, %xmm0 31219; SSSE3-NEXT: psadbw %xmm3, %xmm0 31220; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 31221; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31222; SSSE3-NEXT: movdqa %xmm1, %xmm2 31223; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 31224; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31225; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 31226; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31227; SSSE3-NEXT: pand %xmm3, %xmm1 31228; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31229; SSSE3-NEXT: por %xmm1, %xmm0 31230; SSSE3-NEXT: retq 31231; 31232; SSE41-LABEL: ult_40_v2i64: 31233; SSE41: # %bb.0: 31234; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31235; SSE41-NEXT: movdqa %xmm0, %xmm2 31236; SSE41-NEXT: pand %xmm1, %xmm2 31237; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31238; SSE41-NEXT: movdqa %xmm3, %xmm4 31239; SSE41-NEXT: pshufb %xmm2, %xmm4 31240; SSE41-NEXT: psrlw $4, %xmm0 31241; SSE41-NEXT: pand %xmm1, %xmm0 31242; SSE41-NEXT: pshufb %xmm0, %xmm3 31243; SSE41-NEXT: paddb %xmm4, %xmm3 31244; SSE41-NEXT: pxor %xmm0, %xmm0 31245; SSE41-NEXT: psadbw %xmm3, %xmm0 31246; SSE41-NEXT: por {{.*}}(%rip), %xmm0 31247; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31248; SSE41-NEXT: movdqa %xmm1, %xmm2 31249; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 31250; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31251; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 31252; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31253; SSE41-NEXT: pand %xmm3, %xmm1 31254; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31255; SSE41-NEXT: por %xmm1, %xmm0 31256; SSE41-NEXT: retq 31257; 31258; AVX1-LABEL: ult_40_v2i64: 31259; AVX1: # %bb.0: 31260; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31261; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 31262; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31263; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31264; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 31265; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 31266; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31267; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31268; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 31269; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31270; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] 31271; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31272; AVX1-NEXT: retq 31273; 31274; AVX2-LABEL: ult_40_v2i64: 31275; AVX2: # %bb.0: 31276; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31277; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 31278; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31279; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31280; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 31281; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 31282; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31283; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31284; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 31285; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31286; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] 31287; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31288; AVX2-NEXT: retq 31289; 31290; AVX512VPOPCNTDQ-LABEL: ult_40_v2i64: 31291; AVX512VPOPCNTDQ: # %bb.0: 31292; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31293; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 31294; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] 31295; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31296; AVX512VPOPCNTDQ-NEXT: vzeroupper 31297; AVX512VPOPCNTDQ-NEXT: retq 31298; 31299; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64: 31300; AVX512VPOPCNTDQVL: # %bb.0: 31301; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 31302; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 31303; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31304; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31305; AVX512VPOPCNTDQVL-NEXT: retq 31306; 31307; BITALG_NOVLX-LABEL: ult_40_v2i64: 31308; BITALG_NOVLX: # %bb.0: 31309; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31310; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 31311; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 31312; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31313; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] 31314; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31315; BITALG_NOVLX-NEXT: vzeroupper 31316; BITALG_NOVLX-NEXT: retq 31317; 31318; BITALG-LABEL: ult_40_v2i64: 31319; BITALG: # %bb.0: 31320; BITALG-NEXT: vpopcntb %xmm0, %xmm0 31321; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 31322; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31323; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 31324; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31325; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31326; BITALG-NEXT: retq 31327 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 31328 %3 = icmp ult <2 x i64> %2, <i64 40, i64 40> 31329 %4 = sext <2 x i1> %3 to <2 x i64> 31330 ret <2 x i64> %4 31331} 31332 31333define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { 31334; SSE2-LABEL: ugt_40_v2i64: 31335; SSE2: # %bb.0: 31336; SSE2-NEXT: movdqa %xmm0, %xmm1 31337; SSE2-NEXT: psrlw $1, %xmm1 31338; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31339; SSE2-NEXT: psubb %xmm1, %xmm0 31340; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31341; SSE2-NEXT: movdqa %xmm0, %xmm2 31342; SSE2-NEXT: pand %xmm1, %xmm2 31343; SSE2-NEXT: psrlw $2, %xmm0 31344; SSE2-NEXT: pand %xmm1, %xmm0 31345; SSE2-NEXT: paddb %xmm2, %xmm0 31346; SSE2-NEXT: movdqa %xmm0, %xmm1 31347; SSE2-NEXT: psrlw $4, %xmm1 31348; SSE2-NEXT: paddb %xmm0, %xmm1 31349; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31350; SSE2-NEXT: pxor %xmm0, %xmm0 31351; SSE2-NEXT: psadbw %xmm1, %xmm0 31352; SSE2-NEXT: por {{.*}}(%rip), %xmm0 31353; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31354; SSE2-NEXT: movdqa %xmm0, %xmm2 31355; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 31356; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31357; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 31358; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31359; SSE2-NEXT: pand %xmm3, %xmm1 31360; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31361; SSE2-NEXT: por %xmm1, %xmm0 31362; SSE2-NEXT: retq 31363; 31364; SSE3-LABEL: ugt_40_v2i64: 31365; SSE3: # %bb.0: 31366; SSE3-NEXT: movdqa %xmm0, %xmm1 31367; SSE3-NEXT: psrlw $1, %xmm1 31368; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31369; SSE3-NEXT: psubb %xmm1, %xmm0 31370; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31371; SSE3-NEXT: movdqa %xmm0, %xmm2 31372; SSE3-NEXT: pand %xmm1, %xmm2 31373; SSE3-NEXT: psrlw $2, %xmm0 31374; SSE3-NEXT: pand %xmm1, %xmm0 31375; SSE3-NEXT: paddb %xmm2, %xmm0 31376; SSE3-NEXT: movdqa %xmm0, %xmm1 31377; SSE3-NEXT: psrlw $4, %xmm1 31378; SSE3-NEXT: paddb %xmm0, %xmm1 31379; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31380; SSE3-NEXT: pxor %xmm0, %xmm0 31381; SSE3-NEXT: psadbw %xmm1, %xmm0 31382; SSE3-NEXT: por {{.*}}(%rip), %xmm0 31383; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31384; SSE3-NEXT: movdqa %xmm0, %xmm2 31385; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 31386; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31387; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 31388; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31389; SSE3-NEXT: pand %xmm3, %xmm1 31390; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31391; SSE3-NEXT: por %xmm1, %xmm0 31392; SSE3-NEXT: retq 31393; 31394; SSSE3-LABEL: ugt_40_v2i64: 31395; SSSE3: # %bb.0: 31396; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31397; SSSE3-NEXT: movdqa %xmm0, %xmm2 31398; SSSE3-NEXT: pand %xmm1, %xmm2 31399; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31400; SSSE3-NEXT: movdqa %xmm3, %xmm4 31401; SSSE3-NEXT: pshufb %xmm2, %xmm4 31402; SSSE3-NEXT: psrlw $4, %xmm0 31403; SSSE3-NEXT: pand %xmm1, %xmm0 31404; SSSE3-NEXT: pshufb %xmm0, %xmm3 31405; SSSE3-NEXT: paddb %xmm4, %xmm3 31406; SSSE3-NEXT: pxor %xmm0, %xmm0 31407; SSSE3-NEXT: psadbw %xmm3, %xmm0 31408; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 31409; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31410; SSSE3-NEXT: movdqa %xmm0, %xmm2 31411; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 31412; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31413; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 31414; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31415; SSSE3-NEXT: pand %xmm3, %xmm1 31416; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31417; SSSE3-NEXT: por %xmm1, %xmm0 31418; SSSE3-NEXT: retq 31419; 31420; SSE41-LABEL: ugt_40_v2i64: 31421; SSE41: # %bb.0: 31422; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31423; SSE41-NEXT: movdqa %xmm0, %xmm2 31424; SSE41-NEXT: pand %xmm1, %xmm2 31425; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31426; SSE41-NEXT: movdqa %xmm3, %xmm4 31427; SSE41-NEXT: pshufb %xmm2, %xmm4 31428; SSE41-NEXT: psrlw $4, %xmm0 31429; SSE41-NEXT: pand %xmm1, %xmm0 31430; SSE41-NEXT: pshufb %xmm0, %xmm3 31431; SSE41-NEXT: paddb %xmm4, %xmm3 31432; SSE41-NEXT: pxor %xmm0, %xmm0 31433; SSE41-NEXT: psadbw %xmm3, %xmm0 31434; SSE41-NEXT: por {{.*}}(%rip), %xmm0 31435; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] 31436; SSE41-NEXT: movdqa %xmm0, %xmm2 31437; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 31438; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31439; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 31440; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31441; SSE41-NEXT: pand %xmm3, %xmm1 31442; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31443; SSE41-NEXT: por %xmm1, %xmm0 31444; SSE41-NEXT: retq 31445; 31446; AVX1-LABEL: ugt_40_v2i64: 31447; AVX1: # %bb.0: 31448; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31449; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 31450; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31451; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31452; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 31453; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 31454; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31455; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31456; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 31457; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31458; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31459; AVX1-NEXT: retq 31460; 31461; AVX2-LABEL: ugt_40_v2i64: 31462; AVX2: # %bb.0: 31463; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31464; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 31465; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31466; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31467; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 31468; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 31469; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31470; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31471; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 31472; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31473; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31474; AVX2-NEXT: retq 31475; 31476; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64: 31477; AVX512VPOPCNTDQ: # %bb.0: 31478; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31479; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 31480; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31481; AVX512VPOPCNTDQ-NEXT: vzeroupper 31482; AVX512VPOPCNTDQ-NEXT: retq 31483; 31484; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64: 31485; AVX512VPOPCNTDQVL: # %bb.0: 31486; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 31487; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 31488; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31489; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31490; AVX512VPOPCNTDQVL-NEXT: retq 31491; 31492; BITALG_NOVLX-LABEL: ugt_40_v2i64: 31493; BITALG_NOVLX: # %bb.0: 31494; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31495; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 31496; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 31497; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31498; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31499; BITALG_NOVLX-NEXT: vzeroupper 31500; BITALG_NOVLX-NEXT: retq 31501; 31502; BITALG-LABEL: ugt_40_v2i64: 31503; BITALG: # %bb.0: 31504; BITALG-NEXT: vpopcntb %xmm0, %xmm0 31505; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 31506; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31507; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 31508; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31509; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31510; BITALG-NEXT: retq 31511 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 31512 %3 = icmp ugt <2 x i64> %2, <i64 40, i64 40> 31513 %4 = sext <2 x i1> %3 to <2 x i64> 31514 ret <2 x i64> %4 31515} 31516 31517define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { 31518; SSE2-LABEL: ult_41_v2i64: 31519; SSE2: # %bb.0: 31520; SSE2-NEXT: movdqa %xmm0, %xmm1 31521; SSE2-NEXT: psrlw $1, %xmm1 31522; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31523; SSE2-NEXT: psubb %xmm1, %xmm0 31524; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31525; SSE2-NEXT: movdqa %xmm0, %xmm2 31526; SSE2-NEXT: pand %xmm1, %xmm2 31527; SSE2-NEXT: psrlw $2, %xmm0 31528; SSE2-NEXT: pand %xmm1, %xmm0 31529; SSE2-NEXT: paddb %xmm2, %xmm0 31530; SSE2-NEXT: movdqa %xmm0, %xmm1 31531; SSE2-NEXT: psrlw $4, %xmm1 31532; SSE2-NEXT: paddb %xmm0, %xmm1 31533; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31534; SSE2-NEXT: pxor %xmm0, %xmm0 31535; SSE2-NEXT: psadbw %xmm1, %xmm0 31536; SSE2-NEXT: por {{.*}}(%rip), %xmm0 31537; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31538; SSE2-NEXT: movdqa %xmm1, %xmm2 31539; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 31540; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31541; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 31542; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31543; SSE2-NEXT: pand %xmm3, %xmm1 31544; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31545; SSE2-NEXT: por %xmm1, %xmm0 31546; SSE2-NEXT: retq 31547; 31548; SSE3-LABEL: ult_41_v2i64: 31549; SSE3: # %bb.0: 31550; SSE3-NEXT: movdqa %xmm0, %xmm1 31551; SSE3-NEXT: psrlw $1, %xmm1 31552; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31553; SSE3-NEXT: psubb %xmm1, %xmm0 31554; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31555; SSE3-NEXT: movdqa %xmm0, %xmm2 31556; SSE3-NEXT: pand %xmm1, %xmm2 31557; SSE3-NEXT: psrlw $2, %xmm0 31558; SSE3-NEXT: pand %xmm1, %xmm0 31559; SSE3-NEXT: paddb %xmm2, %xmm0 31560; SSE3-NEXT: movdqa %xmm0, %xmm1 31561; SSE3-NEXT: psrlw $4, %xmm1 31562; SSE3-NEXT: paddb %xmm0, %xmm1 31563; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31564; SSE3-NEXT: pxor %xmm0, %xmm0 31565; SSE3-NEXT: psadbw %xmm1, %xmm0 31566; SSE3-NEXT: por {{.*}}(%rip), %xmm0 31567; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31568; SSE3-NEXT: movdqa %xmm1, %xmm2 31569; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 31570; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31571; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 31572; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31573; SSE3-NEXT: pand %xmm3, %xmm1 31574; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31575; SSE3-NEXT: por %xmm1, %xmm0 31576; SSE3-NEXT: retq 31577; 31578; SSSE3-LABEL: ult_41_v2i64: 31579; SSSE3: # %bb.0: 31580; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31581; SSSE3-NEXT: movdqa %xmm0, %xmm2 31582; SSSE3-NEXT: pand %xmm1, %xmm2 31583; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31584; SSSE3-NEXT: movdqa %xmm3, %xmm4 31585; SSSE3-NEXT: pshufb %xmm2, %xmm4 31586; SSSE3-NEXT: psrlw $4, %xmm0 31587; SSSE3-NEXT: pand %xmm1, %xmm0 31588; SSSE3-NEXT: pshufb %xmm0, %xmm3 31589; SSSE3-NEXT: paddb %xmm4, %xmm3 31590; SSSE3-NEXT: pxor %xmm0, %xmm0 31591; SSSE3-NEXT: psadbw %xmm3, %xmm0 31592; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 31593; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31594; SSSE3-NEXT: movdqa %xmm1, %xmm2 31595; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 31596; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31597; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 31598; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31599; SSSE3-NEXT: pand %xmm3, %xmm1 31600; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31601; SSSE3-NEXT: por %xmm1, %xmm0 31602; SSSE3-NEXT: retq 31603; 31604; SSE41-LABEL: ult_41_v2i64: 31605; SSE41: # %bb.0: 31606; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31607; SSE41-NEXT: movdqa %xmm0, %xmm2 31608; SSE41-NEXT: pand %xmm1, %xmm2 31609; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31610; SSE41-NEXT: movdqa %xmm3, %xmm4 31611; SSE41-NEXT: pshufb %xmm2, %xmm4 31612; SSE41-NEXT: psrlw $4, %xmm0 31613; SSE41-NEXT: pand %xmm1, %xmm0 31614; SSE41-NEXT: pshufb %xmm0, %xmm3 31615; SSE41-NEXT: paddb %xmm4, %xmm3 31616; SSE41-NEXT: pxor %xmm0, %xmm0 31617; SSE41-NEXT: psadbw %xmm3, %xmm0 31618; SSE41-NEXT: por {{.*}}(%rip), %xmm0 31619; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31620; SSE41-NEXT: movdqa %xmm1, %xmm2 31621; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 31622; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31623; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 31624; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31625; SSE41-NEXT: pand %xmm3, %xmm1 31626; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31627; SSE41-NEXT: por %xmm1, %xmm0 31628; SSE41-NEXT: retq 31629; 31630; AVX1-LABEL: ult_41_v2i64: 31631; AVX1: # %bb.0: 31632; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31633; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 31634; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31635; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31636; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 31637; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 31638; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31639; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31640; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 31641; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31642; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] 31643; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31644; AVX1-NEXT: retq 31645; 31646; AVX2-LABEL: ult_41_v2i64: 31647; AVX2: # %bb.0: 31648; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31649; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 31650; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31651; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31652; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 31653; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 31654; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31655; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31656; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 31657; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31658; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] 31659; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31660; AVX2-NEXT: retq 31661; 31662; AVX512VPOPCNTDQ-LABEL: ult_41_v2i64: 31663; AVX512VPOPCNTDQ: # %bb.0: 31664; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31665; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 31666; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] 31667; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31668; AVX512VPOPCNTDQ-NEXT: vzeroupper 31669; AVX512VPOPCNTDQ-NEXT: retq 31670; 31671; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64: 31672; AVX512VPOPCNTDQVL: # %bb.0: 31673; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 31674; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 31675; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31676; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31677; AVX512VPOPCNTDQVL-NEXT: retq 31678; 31679; BITALG_NOVLX-LABEL: ult_41_v2i64: 31680; BITALG_NOVLX: # %bb.0: 31681; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31682; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 31683; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 31684; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31685; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] 31686; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 31687; BITALG_NOVLX-NEXT: vzeroupper 31688; BITALG_NOVLX-NEXT: retq 31689; 31690; BITALG-LABEL: ult_41_v2i64: 31691; BITALG: # %bb.0: 31692; BITALG-NEXT: vpopcntb %xmm0, %xmm0 31693; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 31694; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31695; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 31696; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31697; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31698; BITALG-NEXT: retq 31699 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 31700 %3 = icmp ult <2 x i64> %2, <i64 41, i64 41> 31701 %4 = sext <2 x i1> %3 to <2 x i64> 31702 ret <2 x i64> %4 31703} 31704 31705define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { 31706; SSE2-LABEL: ugt_41_v2i64: 31707; SSE2: # %bb.0: 31708; SSE2-NEXT: movdqa %xmm0, %xmm1 31709; SSE2-NEXT: psrlw $1, %xmm1 31710; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31711; SSE2-NEXT: psubb %xmm1, %xmm0 31712; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31713; SSE2-NEXT: movdqa %xmm0, %xmm2 31714; SSE2-NEXT: pand %xmm1, %xmm2 31715; SSE2-NEXT: psrlw $2, %xmm0 31716; SSE2-NEXT: pand %xmm1, %xmm0 31717; SSE2-NEXT: paddb %xmm2, %xmm0 31718; SSE2-NEXT: movdqa %xmm0, %xmm1 31719; SSE2-NEXT: psrlw $4, %xmm1 31720; SSE2-NEXT: paddb %xmm0, %xmm1 31721; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31722; SSE2-NEXT: pxor %xmm0, %xmm0 31723; SSE2-NEXT: psadbw %xmm1, %xmm0 31724; SSE2-NEXT: por {{.*}}(%rip), %xmm0 31725; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31726; SSE2-NEXT: movdqa %xmm0, %xmm2 31727; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 31728; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31729; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 31730; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31731; SSE2-NEXT: pand %xmm3, %xmm1 31732; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31733; SSE2-NEXT: por %xmm1, %xmm0 31734; SSE2-NEXT: retq 31735; 31736; SSE3-LABEL: ugt_41_v2i64: 31737; SSE3: # %bb.0: 31738; SSE3-NEXT: movdqa %xmm0, %xmm1 31739; SSE3-NEXT: psrlw $1, %xmm1 31740; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31741; SSE3-NEXT: psubb %xmm1, %xmm0 31742; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31743; SSE3-NEXT: movdqa %xmm0, %xmm2 31744; SSE3-NEXT: pand %xmm1, %xmm2 31745; SSE3-NEXT: psrlw $2, %xmm0 31746; SSE3-NEXT: pand %xmm1, %xmm0 31747; SSE3-NEXT: paddb %xmm2, %xmm0 31748; SSE3-NEXT: movdqa %xmm0, %xmm1 31749; SSE3-NEXT: psrlw $4, %xmm1 31750; SSE3-NEXT: paddb %xmm0, %xmm1 31751; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31752; SSE3-NEXT: pxor %xmm0, %xmm0 31753; SSE3-NEXT: psadbw %xmm1, %xmm0 31754; SSE3-NEXT: por {{.*}}(%rip), %xmm0 31755; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31756; SSE3-NEXT: movdqa %xmm0, %xmm2 31757; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 31758; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31759; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 31760; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31761; SSE3-NEXT: pand %xmm3, %xmm1 31762; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31763; SSE3-NEXT: por %xmm1, %xmm0 31764; SSE3-NEXT: retq 31765; 31766; SSSE3-LABEL: ugt_41_v2i64: 31767; SSSE3: # %bb.0: 31768; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31769; SSSE3-NEXT: movdqa %xmm0, %xmm2 31770; SSSE3-NEXT: pand %xmm1, %xmm2 31771; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31772; SSSE3-NEXT: movdqa %xmm3, %xmm4 31773; SSSE3-NEXT: pshufb %xmm2, %xmm4 31774; SSSE3-NEXT: psrlw $4, %xmm0 31775; SSSE3-NEXT: pand %xmm1, %xmm0 31776; SSSE3-NEXT: pshufb %xmm0, %xmm3 31777; SSSE3-NEXT: paddb %xmm4, %xmm3 31778; SSSE3-NEXT: pxor %xmm0, %xmm0 31779; SSSE3-NEXT: psadbw %xmm3, %xmm0 31780; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 31781; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31782; SSSE3-NEXT: movdqa %xmm0, %xmm2 31783; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 31784; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31785; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 31786; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31787; SSSE3-NEXT: pand %xmm3, %xmm1 31788; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31789; SSSE3-NEXT: por %xmm1, %xmm0 31790; SSSE3-NEXT: retq 31791; 31792; SSE41-LABEL: ugt_41_v2i64: 31793; SSE41: # %bb.0: 31794; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31795; SSE41-NEXT: movdqa %xmm0, %xmm2 31796; SSE41-NEXT: pand %xmm1, %xmm2 31797; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31798; SSE41-NEXT: movdqa %xmm3, %xmm4 31799; SSE41-NEXT: pshufb %xmm2, %xmm4 31800; SSE41-NEXT: psrlw $4, %xmm0 31801; SSE41-NEXT: pand %xmm1, %xmm0 31802; SSE41-NEXT: pshufb %xmm0, %xmm3 31803; SSE41-NEXT: paddb %xmm4, %xmm3 31804; SSE41-NEXT: pxor %xmm0, %xmm0 31805; SSE41-NEXT: psadbw %xmm3, %xmm0 31806; SSE41-NEXT: por {{.*}}(%rip), %xmm0 31807; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] 31808; SSE41-NEXT: movdqa %xmm0, %xmm2 31809; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 31810; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31811; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 31812; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31813; SSE41-NEXT: pand %xmm3, %xmm1 31814; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31815; SSE41-NEXT: por %xmm1, %xmm0 31816; SSE41-NEXT: retq 31817; 31818; AVX1-LABEL: ugt_41_v2i64: 31819; AVX1: # %bb.0: 31820; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31821; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 31822; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31823; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31824; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 31825; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 31826; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31827; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31828; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 31829; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31830; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31831; AVX1-NEXT: retq 31832; 31833; AVX2-LABEL: ugt_41_v2i64: 31834; AVX2: # %bb.0: 31835; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31836; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 31837; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31838; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 31839; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 31840; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 31841; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 31842; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 31843; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 31844; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31845; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31846; AVX2-NEXT: retq 31847; 31848; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64: 31849; AVX512VPOPCNTDQ: # %bb.0: 31850; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31851; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 31852; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31853; AVX512VPOPCNTDQ-NEXT: vzeroupper 31854; AVX512VPOPCNTDQ-NEXT: retq 31855; 31856; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64: 31857; AVX512VPOPCNTDQVL: # %bb.0: 31858; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 31859; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 31860; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31861; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31862; AVX512VPOPCNTDQVL-NEXT: retq 31863; 31864; BITALG_NOVLX-LABEL: ugt_41_v2i64: 31865; BITALG_NOVLX: # %bb.0: 31866; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 31867; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 31868; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 31869; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31870; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 31871; BITALG_NOVLX-NEXT: vzeroupper 31872; BITALG_NOVLX-NEXT: retq 31873; 31874; BITALG-LABEL: ugt_41_v2i64: 31875; BITALG: # %bb.0: 31876; BITALG-NEXT: vpopcntb %xmm0, %xmm0 31877; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 31878; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 31879; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 31880; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 31881; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 31882; BITALG-NEXT: retq 31883 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 31884 %3 = icmp ugt <2 x i64> %2, <i64 41, i64 41> 31885 %4 = sext <2 x i1> %3 to <2 x i64> 31886 ret <2 x i64> %4 31887} 31888 31889define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { 31890; SSE2-LABEL: ult_42_v2i64: 31891; SSE2: # %bb.0: 31892; SSE2-NEXT: movdqa %xmm0, %xmm1 31893; SSE2-NEXT: psrlw $1, %xmm1 31894; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31895; SSE2-NEXT: psubb %xmm1, %xmm0 31896; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31897; SSE2-NEXT: movdqa %xmm0, %xmm2 31898; SSE2-NEXT: pand %xmm1, %xmm2 31899; SSE2-NEXT: psrlw $2, %xmm0 31900; SSE2-NEXT: pand %xmm1, %xmm0 31901; SSE2-NEXT: paddb %xmm2, %xmm0 31902; SSE2-NEXT: movdqa %xmm0, %xmm1 31903; SSE2-NEXT: psrlw $4, %xmm1 31904; SSE2-NEXT: paddb %xmm0, %xmm1 31905; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 31906; SSE2-NEXT: pxor %xmm0, %xmm0 31907; SSE2-NEXT: psadbw %xmm1, %xmm0 31908; SSE2-NEXT: por {{.*}}(%rip), %xmm0 31909; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 31910; SSE2-NEXT: movdqa %xmm1, %xmm2 31911; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 31912; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31913; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 31914; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31915; SSE2-NEXT: pand %xmm3, %xmm1 31916; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31917; SSE2-NEXT: por %xmm1, %xmm0 31918; SSE2-NEXT: retq 31919; 31920; SSE3-LABEL: ult_42_v2i64: 31921; SSE3: # %bb.0: 31922; SSE3-NEXT: movdqa %xmm0, %xmm1 31923; SSE3-NEXT: psrlw $1, %xmm1 31924; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31925; SSE3-NEXT: psubb %xmm1, %xmm0 31926; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 31927; SSE3-NEXT: movdqa %xmm0, %xmm2 31928; SSE3-NEXT: pand %xmm1, %xmm2 31929; SSE3-NEXT: psrlw $2, %xmm0 31930; SSE3-NEXT: pand %xmm1, %xmm0 31931; SSE3-NEXT: paddb %xmm2, %xmm0 31932; SSE3-NEXT: movdqa %xmm0, %xmm1 31933; SSE3-NEXT: psrlw $4, %xmm1 31934; SSE3-NEXT: paddb %xmm0, %xmm1 31935; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 31936; SSE3-NEXT: pxor %xmm0, %xmm0 31937; SSE3-NEXT: psadbw %xmm1, %xmm0 31938; SSE3-NEXT: por {{.*}}(%rip), %xmm0 31939; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 31940; SSE3-NEXT: movdqa %xmm1, %xmm2 31941; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 31942; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31943; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 31944; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31945; SSE3-NEXT: pand %xmm3, %xmm1 31946; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31947; SSE3-NEXT: por %xmm1, %xmm0 31948; SSE3-NEXT: retq 31949; 31950; SSSE3-LABEL: ult_42_v2i64: 31951; SSSE3: # %bb.0: 31952; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31953; SSSE3-NEXT: movdqa %xmm0, %xmm2 31954; SSSE3-NEXT: pand %xmm1, %xmm2 31955; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31956; SSSE3-NEXT: movdqa %xmm3, %xmm4 31957; SSSE3-NEXT: pshufb %xmm2, %xmm4 31958; SSSE3-NEXT: psrlw $4, %xmm0 31959; SSSE3-NEXT: pand %xmm1, %xmm0 31960; SSSE3-NEXT: pshufb %xmm0, %xmm3 31961; SSSE3-NEXT: paddb %xmm4, %xmm3 31962; SSSE3-NEXT: pxor %xmm0, %xmm0 31963; SSSE3-NEXT: psadbw %xmm3, %xmm0 31964; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 31965; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 31966; SSSE3-NEXT: movdqa %xmm1, %xmm2 31967; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 31968; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31969; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 31970; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31971; SSSE3-NEXT: pand %xmm3, %xmm1 31972; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31973; SSSE3-NEXT: por %xmm1, %xmm0 31974; SSSE3-NEXT: retq 31975; 31976; SSE41-LABEL: ult_42_v2i64: 31977; SSE41: # %bb.0: 31978; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 31979; SSE41-NEXT: movdqa %xmm0, %xmm2 31980; SSE41-NEXT: pand %xmm1, %xmm2 31981; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 31982; SSE41-NEXT: movdqa %xmm3, %xmm4 31983; SSE41-NEXT: pshufb %xmm2, %xmm4 31984; SSE41-NEXT: psrlw $4, %xmm0 31985; SSE41-NEXT: pand %xmm1, %xmm0 31986; SSE41-NEXT: pshufb %xmm0, %xmm3 31987; SSE41-NEXT: paddb %xmm4, %xmm3 31988; SSE41-NEXT: pxor %xmm0, %xmm0 31989; SSE41-NEXT: psadbw %xmm3, %xmm0 31990; SSE41-NEXT: por {{.*}}(%rip), %xmm0 31991; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 31992; SSE41-NEXT: movdqa %xmm1, %xmm2 31993; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 31994; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 31995; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 31996; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31997; SSE41-NEXT: pand %xmm3, %xmm1 31998; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 31999; SSE41-NEXT: por %xmm1, %xmm0 32000; SSE41-NEXT: retq 32001; 32002; AVX1-LABEL: ult_42_v2i64: 32003; AVX1: # %bb.0: 32004; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32005; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 32006; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32007; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32008; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 32009; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 32010; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32011; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32012; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 32013; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32014; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] 32015; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32016; AVX1-NEXT: retq 32017; 32018; AVX2-LABEL: ult_42_v2i64: 32019; AVX2: # %bb.0: 32020; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32021; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 32022; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32023; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32024; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 32025; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 32026; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32027; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32028; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 32029; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32030; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] 32031; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32032; AVX2-NEXT: retq 32033; 32034; AVX512VPOPCNTDQ-LABEL: ult_42_v2i64: 32035; AVX512VPOPCNTDQ: # %bb.0: 32036; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32037; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 32038; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] 32039; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32040; AVX512VPOPCNTDQ-NEXT: vzeroupper 32041; AVX512VPOPCNTDQ-NEXT: retq 32042; 32043; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64: 32044; AVX512VPOPCNTDQVL: # %bb.0: 32045; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 32046; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 32047; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32048; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32049; AVX512VPOPCNTDQVL-NEXT: retq 32050; 32051; BITALG_NOVLX-LABEL: ult_42_v2i64: 32052; BITALG_NOVLX: # %bb.0: 32053; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32054; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 32055; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 32056; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32057; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] 32058; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32059; BITALG_NOVLX-NEXT: vzeroupper 32060; BITALG_NOVLX-NEXT: retq 32061; 32062; BITALG-LABEL: ult_42_v2i64: 32063; BITALG: # %bb.0: 32064; BITALG-NEXT: vpopcntb %xmm0, %xmm0 32065; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 32066; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32067; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 32068; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32069; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32070; BITALG-NEXT: retq 32071 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 32072 %3 = icmp ult <2 x i64> %2, <i64 42, i64 42> 32073 %4 = sext <2 x i1> %3 to <2 x i64> 32074 ret <2 x i64> %4 32075} 32076 32077define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { 32078; SSE2-LABEL: ugt_42_v2i64: 32079; SSE2: # %bb.0: 32080; SSE2-NEXT: movdqa %xmm0, %xmm1 32081; SSE2-NEXT: psrlw $1, %xmm1 32082; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32083; SSE2-NEXT: psubb %xmm1, %xmm0 32084; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32085; SSE2-NEXT: movdqa %xmm0, %xmm2 32086; SSE2-NEXT: pand %xmm1, %xmm2 32087; SSE2-NEXT: psrlw $2, %xmm0 32088; SSE2-NEXT: pand %xmm1, %xmm0 32089; SSE2-NEXT: paddb %xmm2, %xmm0 32090; SSE2-NEXT: movdqa %xmm0, %xmm1 32091; SSE2-NEXT: psrlw $4, %xmm1 32092; SSE2-NEXT: paddb %xmm0, %xmm1 32093; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32094; SSE2-NEXT: pxor %xmm0, %xmm0 32095; SSE2-NEXT: psadbw %xmm1, %xmm0 32096; SSE2-NEXT: por {{.*}}(%rip), %xmm0 32097; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 32098; SSE2-NEXT: movdqa %xmm0, %xmm2 32099; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 32100; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32101; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 32102; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32103; SSE2-NEXT: pand %xmm3, %xmm1 32104; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32105; SSE2-NEXT: por %xmm1, %xmm0 32106; SSE2-NEXT: retq 32107; 32108; SSE3-LABEL: ugt_42_v2i64: 32109; SSE3: # %bb.0: 32110; SSE3-NEXT: movdqa %xmm0, %xmm1 32111; SSE3-NEXT: psrlw $1, %xmm1 32112; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32113; SSE3-NEXT: psubb %xmm1, %xmm0 32114; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32115; SSE3-NEXT: movdqa %xmm0, %xmm2 32116; SSE3-NEXT: pand %xmm1, %xmm2 32117; SSE3-NEXT: psrlw $2, %xmm0 32118; SSE3-NEXT: pand %xmm1, %xmm0 32119; SSE3-NEXT: paddb %xmm2, %xmm0 32120; SSE3-NEXT: movdqa %xmm0, %xmm1 32121; SSE3-NEXT: psrlw $4, %xmm1 32122; SSE3-NEXT: paddb %xmm0, %xmm1 32123; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32124; SSE3-NEXT: pxor %xmm0, %xmm0 32125; SSE3-NEXT: psadbw %xmm1, %xmm0 32126; SSE3-NEXT: por {{.*}}(%rip), %xmm0 32127; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 32128; SSE3-NEXT: movdqa %xmm0, %xmm2 32129; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 32130; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32131; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 32132; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32133; SSE3-NEXT: pand %xmm3, %xmm1 32134; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32135; SSE3-NEXT: por %xmm1, %xmm0 32136; SSE3-NEXT: retq 32137; 32138; SSSE3-LABEL: ugt_42_v2i64: 32139; SSSE3: # %bb.0: 32140; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32141; SSSE3-NEXT: movdqa %xmm0, %xmm2 32142; SSSE3-NEXT: pand %xmm1, %xmm2 32143; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32144; SSSE3-NEXT: movdqa %xmm3, %xmm4 32145; SSSE3-NEXT: pshufb %xmm2, %xmm4 32146; SSSE3-NEXT: psrlw $4, %xmm0 32147; SSSE3-NEXT: pand %xmm1, %xmm0 32148; SSSE3-NEXT: pshufb %xmm0, %xmm3 32149; SSSE3-NEXT: paddb %xmm4, %xmm3 32150; SSSE3-NEXT: pxor %xmm0, %xmm0 32151; SSSE3-NEXT: psadbw %xmm3, %xmm0 32152; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 32153; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 32154; SSSE3-NEXT: movdqa %xmm0, %xmm2 32155; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 32156; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32157; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 32158; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32159; SSSE3-NEXT: pand %xmm3, %xmm1 32160; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32161; SSSE3-NEXT: por %xmm1, %xmm0 32162; SSSE3-NEXT: retq 32163; 32164; SSE41-LABEL: ugt_42_v2i64: 32165; SSE41: # %bb.0: 32166; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32167; SSE41-NEXT: movdqa %xmm0, %xmm2 32168; SSE41-NEXT: pand %xmm1, %xmm2 32169; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32170; SSE41-NEXT: movdqa %xmm3, %xmm4 32171; SSE41-NEXT: pshufb %xmm2, %xmm4 32172; SSE41-NEXT: psrlw $4, %xmm0 32173; SSE41-NEXT: pand %xmm1, %xmm0 32174; SSE41-NEXT: pshufb %xmm0, %xmm3 32175; SSE41-NEXT: paddb %xmm4, %xmm3 32176; SSE41-NEXT: pxor %xmm0, %xmm0 32177; SSE41-NEXT: psadbw %xmm3, %xmm0 32178; SSE41-NEXT: por {{.*}}(%rip), %xmm0 32179; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] 32180; SSE41-NEXT: movdqa %xmm0, %xmm2 32181; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 32182; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32183; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 32184; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32185; SSE41-NEXT: pand %xmm3, %xmm1 32186; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32187; SSE41-NEXT: por %xmm1, %xmm0 32188; SSE41-NEXT: retq 32189; 32190; AVX1-LABEL: ugt_42_v2i64: 32191; AVX1: # %bb.0: 32192; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32193; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 32194; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32195; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32196; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 32197; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 32198; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32199; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32200; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 32201; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32202; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32203; AVX1-NEXT: retq 32204; 32205; AVX2-LABEL: ugt_42_v2i64: 32206; AVX2: # %bb.0: 32207; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32208; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 32209; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32210; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32211; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 32212; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 32213; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32214; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32215; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 32216; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32217; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32218; AVX2-NEXT: retq 32219; 32220; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64: 32221; AVX512VPOPCNTDQ: # %bb.0: 32222; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32223; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 32224; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32225; AVX512VPOPCNTDQ-NEXT: vzeroupper 32226; AVX512VPOPCNTDQ-NEXT: retq 32227; 32228; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64: 32229; AVX512VPOPCNTDQVL: # %bb.0: 32230; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 32231; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 32232; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32233; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32234; AVX512VPOPCNTDQVL-NEXT: retq 32235; 32236; BITALG_NOVLX-LABEL: ugt_42_v2i64: 32237; BITALG_NOVLX: # %bb.0: 32238; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32239; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 32240; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 32241; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32242; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32243; BITALG_NOVLX-NEXT: vzeroupper 32244; BITALG_NOVLX-NEXT: retq 32245; 32246; BITALG-LABEL: ugt_42_v2i64: 32247; BITALG: # %bb.0: 32248; BITALG-NEXT: vpopcntb %xmm0, %xmm0 32249; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 32250; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32251; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 32252; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32253; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32254; BITALG-NEXT: retq 32255 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 32256 %3 = icmp ugt <2 x i64> %2, <i64 42, i64 42> 32257 %4 = sext <2 x i1> %3 to <2 x i64> 32258 ret <2 x i64> %4 32259} 32260 32261define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { 32262; SSE2-LABEL: ult_43_v2i64: 32263; SSE2: # %bb.0: 32264; SSE2-NEXT: movdqa %xmm0, %xmm1 32265; SSE2-NEXT: psrlw $1, %xmm1 32266; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32267; SSE2-NEXT: psubb %xmm1, %xmm0 32268; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32269; SSE2-NEXT: movdqa %xmm0, %xmm2 32270; SSE2-NEXT: pand %xmm1, %xmm2 32271; SSE2-NEXT: psrlw $2, %xmm0 32272; SSE2-NEXT: pand %xmm1, %xmm0 32273; SSE2-NEXT: paddb %xmm2, %xmm0 32274; SSE2-NEXT: movdqa %xmm0, %xmm1 32275; SSE2-NEXT: psrlw $4, %xmm1 32276; SSE2-NEXT: paddb %xmm0, %xmm1 32277; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32278; SSE2-NEXT: pxor %xmm0, %xmm0 32279; SSE2-NEXT: psadbw %xmm1, %xmm0 32280; SSE2-NEXT: por {{.*}}(%rip), %xmm0 32281; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32282; SSE2-NEXT: movdqa %xmm1, %xmm2 32283; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 32284; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32285; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 32286; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32287; SSE2-NEXT: pand %xmm3, %xmm1 32288; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32289; SSE2-NEXT: por %xmm1, %xmm0 32290; SSE2-NEXT: retq 32291; 32292; SSE3-LABEL: ult_43_v2i64: 32293; SSE3: # %bb.0: 32294; SSE3-NEXT: movdqa %xmm0, %xmm1 32295; SSE3-NEXT: psrlw $1, %xmm1 32296; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32297; SSE3-NEXT: psubb %xmm1, %xmm0 32298; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32299; SSE3-NEXT: movdqa %xmm0, %xmm2 32300; SSE3-NEXT: pand %xmm1, %xmm2 32301; SSE3-NEXT: psrlw $2, %xmm0 32302; SSE3-NEXT: pand %xmm1, %xmm0 32303; SSE3-NEXT: paddb %xmm2, %xmm0 32304; SSE3-NEXT: movdqa %xmm0, %xmm1 32305; SSE3-NEXT: psrlw $4, %xmm1 32306; SSE3-NEXT: paddb %xmm0, %xmm1 32307; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32308; SSE3-NEXT: pxor %xmm0, %xmm0 32309; SSE3-NEXT: psadbw %xmm1, %xmm0 32310; SSE3-NEXT: por {{.*}}(%rip), %xmm0 32311; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32312; SSE3-NEXT: movdqa %xmm1, %xmm2 32313; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 32314; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32315; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 32316; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32317; SSE3-NEXT: pand %xmm3, %xmm1 32318; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32319; SSE3-NEXT: por %xmm1, %xmm0 32320; SSE3-NEXT: retq 32321; 32322; SSSE3-LABEL: ult_43_v2i64: 32323; SSSE3: # %bb.0: 32324; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32325; SSSE3-NEXT: movdqa %xmm0, %xmm2 32326; SSSE3-NEXT: pand %xmm1, %xmm2 32327; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32328; SSSE3-NEXT: movdqa %xmm3, %xmm4 32329; SSSE3-NEXT: pshufb %xmm2, %xmm4 32330; SSSE3-NEXT: psrlw $4, %xmm0 32331; SSSE3-NEXT: pand %xmm1, %xmm0 32332; SSSE3-NEXT: pshufb %xmm0, %xmm3 32333; SSSE3-NEXT: paddb %xmm4, %xmm3 32334; SSSE3-NEXT: pxor %xmm0, %xmm0 32335; SSSE3-NEXT: psadbw %xmm3, %xmm0 32336; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 32337; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32338; SSSE3-NEXT: movdqa %xmm1, %xmm2 32339; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 32340; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32341; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 32342; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32343; SSSE3-NEXT: pand %xmm3, %xmm1 32344; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32345; SSSE3-NEXT: por %xmm1, %xmm0 32346; SSSE3-NEXT: retq 32347; 32348; SSE41-LABEL: ult_43_v2i64: 32349; SSE41: # %bb.0: 32350; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32351; SSE41-NEXT: movdqa %xmm0, %xmm2 32352; SSE41-NEXT: pand %xmm1, %xmm2 32353; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32354; SSE41-NEXT: movdqa %xmm3, %xmm4 32355; SSE41-NEXT: pshufb %xmm2, %xmm4 32356; SSE41-NEXT: psrlw $4, %xmm0 32357; SSE41-NEXT: pand %xmm1, %xmm0 32358; SSE41-NEXT: pshufb %xmm0, %xmm3 32359; SSE41-NEXT: paddb %xmm4, %xmm3 32360; SSE41-NEXT: pxor %xmm0, %xmm0 32361; SSE41-NEXT: psadbw %xmm3, %xmm0 32362; SSE41-NEXT: por {{.*}}(%rip), %xmm0 32363; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32364; SSE41-NEXT: movdqa %xmm1, %xmm2 32365; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 32366; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32367; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 32368; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32369; SSE41-NEXT: pand %xmm3, %xmm1 32370; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32371; SSE41-NEXT: por %xmm1, %xmm0 32372; SSE41-NEXT: retq 32373; 32374; AVX1-LABEL: ult_43_v2i64: 32375; AVX1: # %bb.0: 32376; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32377; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 32378; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32379; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32380; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 32381; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 32382; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32383; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32384; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 32385; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32386; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] 32387; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32388; AVX1-NEXT: retq 32389; 32390; AVX2-LABEL: ult_43_v2i64: 32391; AVX2: # %bb.0: 32392; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32393; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 32394; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32395; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32396; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 32397; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 32398; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32399; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32400; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 32401; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32402; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] 32403; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32404; AVX2-NEXT: retq 32405; 32406; AVX512VPOPCNTDQ-LABEL: ult_43_v2i64: 32407; AVX512VPOPCNTDQ: # %bb.0: 32408; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32409; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 32410; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] 32411; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32412; AVX512VPOPCNTDQ-NEXT: vzeroupper 32413; AVX512VPOPCNTDQ-NEXT: retq 32414; 32415; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64: 32416; AVX512VPOPCNTDQVL: # %bb.0: 32417; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 32418; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 32419; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32420; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32421; AVX512VPOPCNTDQVL-NEXT: retq 32422; 32423; BITALG_NOVLX-LABEL: ult_43_v2i64: 32424; BITALG_NOVLX: # %bb.0: 32425; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32426; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 32427; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 32428; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32429; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] 32430; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32431; BITALG_NOVLX-NEXT: vzeroupper 32432; BITALG_NOVLX-NEXT: retq 32433; 32434; BITALG-LABEL: ult_43_v2i64: 32435; BITALG: # %bb.0: 32436; BITALG-NEXT: vpopcntb %xmm0, %xmm0 32437; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 32438; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32439; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 32440; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32441; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32442; BITALG-NEXT: retq 32443 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 32444 %3 = icmp ult <2 x i64> %2, <i64 43, i64 43> 32445 %4 = sext <2 x i1> %3 to <2 x i64> 32446 ret <2 x i64> %4 32447} 32448 32449define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { 32450; SSE2-LABEL: ugt_43_v2i64: 32451; SSE2: # %bb.0: 32452; SSE2-NEXT: movdqa %xmm0, %xmm1 32453; SSE2-NEXT: psrlw $1, %xmm1 32454; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32455; SSE2-NEXT: psubb %xmm1, %xmm0 32456; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32457; SSE2-NEXT: movdqa %xmm0, %xmm2 32458; SSE2-NEXT: pand %xmm1, %xmm2 32459; SSE2-NEXT: psrlw $2, %xmm0 32460; SSE2-NEXT: pand %xmm1, %xmm0 32461; SSE2-NEXT: paddb %xmm2, %xmm0 32462; SSE2-NEXT: movdqa %xmm0, %xmm1 32463; SSE2-NEXT: psrlw $4, %xmm1 32464; SSE2-NEXT: paddb %xmm0, %xmm1 32465; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32466; SSE2-NEXT: pxor %xmm0, %xmm0 32467; SSE2-NEXT: psadbw %xmm1, %xmm0 32468; SSE2-NEXT: por {{.*}}(%rip), %xmm0 32469; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32470; SSE2-NEXT: movdqa %xmm0, %xmm2 32471; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 32472; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32473; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 32474; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32475; SSE2-NEXT: pand %xmm3, %xmm1 32476; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32477; SSE2-NEXT: por %xmm1, %xmm0 32478; SSE2-NEXT: retq 32479; 32480; SSE3-LABEL: ugt_43_v2i64: 32481; SSE3: # %bb.0: 32482; SSE3-NEXT: movdqa %xmm0, %xmm1 32483; SSE3-NEXT: psrlw $1, %xmm1 32484; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32485; SSE3-NEXT: psubb %xmm1, %xmm0 32486; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32487; SSE3-NEXT: movdqa %xmm0, %xmm2 32488; SSE3-NEXT: pand %xmm1, %xmm2 32489; SSE3-NEXT: psrlw $2, %xmm0 32490; SSE3-NEXT: pand %xmm1, %xmm0 32491; SSE3-NEXT: paddb %xmm2, %xmm0 32492; SSE3-NEXT: movdqa %xmm0, %xmm1 32493; SSE3-NEXT: psrlw $4, %xmm1 32494; SSE3-NEXT: paddb %xmm0, %xmm1 32495; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32496; SSE3-NEXT: pxor %xmm0, %xmm0 32497; SSE3-NEXT: psadbw %xmm1, %xmm0 32498; SSE3-NEXT: por {{.*}}(%rip), %xmm0 32499; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32500; SSE3-NEXT: movdqa %xmm0, %xmm2 32501; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 32502; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32503; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 32504; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32505; SSE3-NEXT: pand %xmm3, %xmm1 32506; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32507; SSE3-NEXT: por %xmm1, %xmm0 32508; SSE3-NEXT: retq 32509; 32510; SSSE3-LABEL: ugt_43_v2i64: 32511; SSSE3: # %bb.0: 32512; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32513; SSSE3-NEXT: movdqa %xmm0, %xmm2 32514; SSSE3-NEXT: pand %xmm1, %xmm2 32515; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32516; SSSE3-NEXT: movdqa %xmm3, %xmm4 32517; SSSE3-NEXT: pshufb %xmm2, %xmm4 32518; SSSE3-NEXT: psrlw $4, %xmm0 32519; SSSE3-NEXT: pand %xmm1, %xmm0 32520; SSSE3-NEXT: pshufb %xmm0, %xmm3 32521; SSSE3-NEXT: paddb %xmm4, %xmm3 32522; SSSE3-NEXT: pxor %xmm0, %xmm0 32523; SSSE3-NEXT: psadbw %xmm3, %xmm0 32524; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 32525; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32526; SSSE3-NEXT: movdqa %xmm0, %xmm2 32527; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 32528; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32529; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 32530; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32531; SSSE3-NEXT: pand %xmm3, %xmm1 32532; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32533; SSSE3-NEXT: por %xmm1, %xmm0 32534; SSSE3-NEXT: retq 32535; 32536; SSE41-LABEL: ugt_43_v2i64: 32537; SSE41: # %bb.0: 32538; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32539; SSE41-NEXT: movdqa %xmm0, %xmm2 32540; SSE41-NEXT: pand %xmm1, %xmm2 32541; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32542; SSE41-NEXT: movdqa %xmm3, %xmm4 32543; SSE41-NEXT: pshufb %xmm2, %xmm4 32544; SSE41-NEXT: psrlw $4, %xmm0 32545; SSE41-NEXT: pand %xmm1, %xmm0 32546; SSE41-NEXT: pshufb %xmm0, %xmm3 32547; SSE41-NEXT: paddb %xmm4, %xmm3 32548; SSE41-NEXT: pxor %xmm0, %xmm0 32549; SSE41-NEXT: psadbw %xmm3, %xmm0 32550; SSE41-NEXT: por {{.*}}(%rip), %xmm0 32551; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] 32552; SSE41-NEXT: movdqa %xmm0, %xmm2 32553; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 32554; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32555; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 32556; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32557; SSE41-NEXT: pand %xmm3, %xmm1 32558; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32559; SSE41-NEXT: por %xmm1, %xmm0 32560; SSE41-NEXT: retq 32561; 32562; AVX1-LABEL: ugt_43_v2i64: 32563; AVX1: # %bb.0: 32564; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32565; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 32566; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32567; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32568; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 32569; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 32570; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32571; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32572; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 32573; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32574; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32575; AVX1-NEXT: retq 32576; 32577; AVX2-LABEL: ugt_43_v2i64: 32578; AVX2: # %bb.0: 32579; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32580; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 32581; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32582; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32583; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 32584; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 32585; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32586; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32587; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 32588; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32589; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32590; AVX2-NEXT: retq 32591; 32592; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64: 32593; AVX512VPOPCNTDQ: # %bb.0: 32594; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32595; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 32596; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32597; AVX512VPOPCNTDQ-NEXT: vzeroupper 32598; AVX512VPOPCNTDQ-NEXT: retq 32599; 32600; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64: 32601; AVX512VPOPCNTDQVL: # %bb.0: 32602; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 32603; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 32604; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32605; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32606; AVX512VPOPCNTDQVL-NEXT: retq 32607; 32608; BITALG_NOVLX-LABEL: ugt_43_v2i64: 32609; BITALG_NOVLX: # %bb.0: 32610; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32611; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 32612; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 32613; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32614; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32615; BITALG_NOVLX-NEXT: vzeroupper 32616; BITALG_NOVLX-NEXT: retq 32617; 32618; BITALG-LABEL: ugt_43_v2i64: 32619; BITALG: # %bb.0: 32620; BITALG-NEXT: vpopcntb %xmm0, %xmm0 32621; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 32622; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32623; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 32624; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32625; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32626; BITALG-NEXT: retq 32627 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 32628 %3 = icmp ugt <2 x i64> %2, <i64 43, i64 43> 32629 %4 = sext <2 x i1> %3 to <2 x i64> 32630 ret <2 x i64> %4 32631} 32632 32633define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { 32634; SSE2-LABEL: ult_44_v2i64: 32635; SSE2: # %bb.0: 32636; SSE2-NEXT: movdqa %xmm0, %xmm1 32637; SSE2-NEXT: psrlw $1, %xmm1 32638; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32639; SSE2-NEXT: psubb %xmm1, %xmm0 32640; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32641; SSE2-NEXT: movdqa %xmm0, %xmm2 32642; SSE2-NEXT: pand %xmm1, %xmm2 32643; SSE2-NEXT: psrlw $2, %xmm0 32644; SSE2-NEXT: pand %xmm1, %xmm0 32645; SSE2-NEXT: paddb %xmm2, %xmm0 32646; SSE2-NEXT: movdqa %xmm0, %xmm1 32647; SSE2-NEXT: psrlw $4, %xmm1 32648; SSE2-NEXT: paddb %xmm0, %xmm1 32649; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32650; SSE2-NEXT: pxor %xmm0, %xmm0 32651; SSE2-NEXT: psadbw %xmm1, %xmm0 32652; SSE2-NEXT: por {{.*}}(%rip), %xmm0 32653; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32654; SSE2-NEXT: movdqa %xmm1, %xmm2 32655; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 32656; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32657; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 32658; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32659; SSE2-NEXT: pand %xmm3, %xmm1 32660; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32661; SSE2-NEXT: por %xmm1, %xmm0 32662; SSE2-NEXT: retq 32663; 32664; SSE3-LABEL: ult_44_v2i64: 32665; SSE3: # %bb.0: 32666; SSE3-NEXT: movdqa %xmm0, %xmm1 32667; SSE3-NEXT: psrlw $1, %xmm1 32668; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32669; SSE3-NEXT: psubb %xmm1, %xmm0 32670; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32671; SSE3-NEXT: movdqa %xmm0, %xmm2 32672; SSE3-NEXT: pand %xmm1, %xmm2 32673; SSE3-NEXT: psrlw $2, %xmm0 32674; SSE3-NEXT: pand %xmm1, %xmm0 32675; SSE3-NEXT: paddb %xmm2, %xmm0 32676; SSE3-NEXT: movdqa %xmm0, %xmm1 32677; SSE3-NEXT: psrlw $4, %xmm1 32678; SSE3-NEXT: paddb %xmm0, %xmm1 32679; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32680; SSE3-NEXT: pxor %xmm0, %xmm0 32681; SSE3-NEXT: psadbw %xmm1, %xmm0 32682; SSE3-NEXT: por {{.*}}(%rip), %xmm0 32683; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32684; SSE3-NEXT: movdqa %xmm1, %xmm2 32685; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 32686; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32687; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 32688; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32689; SSE3-NEXT: pand %xmm3, %xmm1 32690; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32691; SSE3-NEXT: por %xmm1, %xmm0 32692; SSE3-NEXT: retq 32693; 32694; SSSE3-LABEL: ult_44_v2i64: 32695; SSSE3: # %bb.0: 32696; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32697; SSSE3-NEXT: movdqa %xmm0, %xmm2 32698; SSSE3-NEXT: pand %xmm1, %xmm2 32699; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32700; SSSE3-NEXT: movdqa %xmm3, %xmm4 32701; SSSE3-NEXT: pshufb %xmm2, %xmm4 32702; SSSE3-NEXT: psrlw $4, %xmm0 32703; SSSE3-NEXT: pand %xmm1, %xmm0 32704; SSSE3-NEXT: pshufb %xmm0, %xmm3 32705; SSSE3-NEXT: paddb %xmm4, %xmm3 32706; SSSE3-NEXT: pxor %xmm0, %xmm0 32707; SSSE3-NEXT: psadbw %xmm3, %xmm0 32708; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 32709; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32710; SSSE3-NEXT: movdqa %xmm1, %xmm2 32711; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 32712; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32713; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 32714; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32715; SSSE3-NEXT: pand %xmm3, %xmm1 32716; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32717; SSSE3-NEXT: por %xmm1, %xmm0 32718; SSSE3-NEXT: retq 32719; 32720; SSE41-LABEL: ult_44_v2i64: 32721; SSE41: # %bb.0: 32722; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32723; SSE41-NEXT: movdqa %xmm0, %xmm2 32724; SSE41-NEXT: pand %xmm1, %xmm2 32725; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32726; SSE41-NEXT: movdqa %xmm3, %xmm4 32727; SSE41-NEXT: pshufb %xmm2, %xmm4 32728; SSE41-NEXT: psrlw $4, %xmm0 32729; SSE41-NEXT: pand %xmm1, %xmm0 32730; SSE41-NEXT: pshufb %xmm0, %xmm3 32731; SSE41-NEXT: paddb %xmm4, %xmm3 32732; SSE41-NEXT: pxor %xmm0, %xmm0 32733; SSE41-NEXT: psadbw %xmm3, %xmm0 32734; SSE41-NEXT: por {{.*}}(%rip), %xmm0 32735; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32736; SSE41-NEXT: movdqa %xmm1, %xmm2 32737; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 32738; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32739; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 32740; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32741; SSE41-NEXT: pand %xmm3, %xmm1 32742; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32743; SSE41-NEXT: por %xmm1, %xmm0 32744; SSE41-NEXT: retq 32745; 32746; AVX1-LABEL: ult_44_v2i64: 32747; AVX1: # %bb.0: 32748; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32749; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 32750; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32751; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32752; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 32753; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 32754; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32755; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32756; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 32757; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32758; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] 32759; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32760; AVX1-NEXT: retq 32761; 32762; AVX2-LABEL: ult_44_v2i64: 32763; AVX2: # %bb.0: 32764; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32765; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 32766; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32767; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32768; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 32769; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 32770; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32771; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32772; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 32773; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32774; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] 32775; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32776; AVX2-NEXT: retq 32777; 32778; AVX512VPOPCNTDQ-LABEL: ult_44_v2i64: 32779; AVX512VPOPCNTDQ: # %bb.0: 32780; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32781; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 32782; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] 32783; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32784; AVX512VPOPCNTDQ-NEXT: vzeroupper 32785; AVX512VPOPCNTDQ-NEXT: retq 32786; 32787; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64: 32788; AVX512VPOPCNTDQVL: # %bb.0: 32789; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 32790; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 32791; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32792; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32793; AVX512VPOPCNTDQVL-NEXT: retq 32794; 32795; BITALG_NOVLX-LABEL: ult_44_v2i64: 32796; BITALG_NOVLX: # %bb.0: 32797; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32798; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 32799; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 32800; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32801; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] 32802; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 32803; BITALG_NOVLX-NEXT: vzeroupper 32804; BITALG_NOVLX-NEXT: retq 32805; 32806; BITALG-LABEL: ult_44_v2i64: 32807; BITALG: # %bb.0: 32808; BITALG-NEXT: vpopcntb %xmm0, %xmm0 32809; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 32810; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32811; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 32812; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32813; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32814; BITALG-NEXT: retq 32815 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 32816 %3 = icmp ult <2 x i64> %2, <i64 44, i64 44> 32817 %4 = sext <2 x i1> %3 to <2 x i64> 32818 ret <2 x i64> %4 32819} 32820 32821define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { 32822; SSE2-LABEL: ugt_44_v2i64: 32823; SSE2: # %bb.0: 32824; SSE2-NEXT: movdqa %xmm0, %xmm1 32825; SSE2-NEXT: psrlw $1, %xmm1 32826; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32827; SSE2-NEXT: psubb %xmm1, %xmm0 32828; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32829; SSE2-NEXT: movdqa %xmm0, %xmm2 32830; SSE2-NEXT: pand %xmm1, %xmm2 32831; SSE2-NEXT: psrlw $2, %xmm0 32832; SSE2-NEXT: pand %xmm1, %xmm0 32833; SSE2-NEXT: paddb %xmm2, %xmm0 32834; SSE2-NEXT: movdqa %xmm0, %xmm1 32835; SSE2-NEXT: psrlw $4, %xmm1 32836; SSE2-NEXT: paddb %xmm0, %xmm1 32837; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 32838; SSE2-NEXT: pxor %xmm0, %xmm0 32839; SSE2-NEXT: psadbw %xmm1, %xmm0 32840; SSE2-NEXT: por {{.*}}(%rip), %xmm0 32841; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32842; SSE2-NEXT: movdqa %xmm0, %xmm2 32843; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 32844; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32845; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 32846; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32847; SSE2-NEXT: pand %xmm3, %xmm1 32848; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32849; SSE2-NEXT: por %xmm1, %xmm0 32850; SSE2-NEXT: retq 32851; 32852; SSE3-LABEL: ugt_44_v2i64: 32853; SSE3: # %bb.0: 32854; SSE3-NEXT: movdqa %xmm0, %xmm1 32855; SSE3-NEXT: psrlw $1, %xmm1 32856; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32857; SSE3-NEXT: psubb %xmm1, %xmm0 32858; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 32859; SSE3-NEXT: movdqa %xmm0, %xmm2 32860; SSE3-NEXT: pand %xmm1, %xmm2 32861; SSE3-NEXT: psrlw $2, %xmm0 32862; SSE3-NEXT: pand %xmm1, %xmm0 32863; SSE3-NEXT: paddb %xmm2, %xmm0 32864; SSE3-NEXT: movdqa %xmm0, %xmm1 32865; SSE3-NEXT: psrlw $4, %xmm1 32866; SSE3-NEXT: paddb %xmm0, %xmm1 32867; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 32868; SSE3-NEXT: pxor %xmm0, %xmm0 32869; SSE3-NEXT: psadbw %xmm1, %xmm0 32870; SSE3-NEXT: por {{.*}}(%rip), %xmm0 32871; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32872; SSE3-NEXT: movdqa %xmm0, %xmm2 32873; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 32874; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32875; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 32876; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32877; SSE3-NEXT: pand %xmm3, %xmm1 32878; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32879; SSE3-NEXT: por %xmm1, %xmm0 32880; SSE3-NEXT: retq 32881; 32882; SSSE3-LABEL: ugt_44_v2i64: 32883; SSSE3: # %bb.0: 32884; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32885; SSSE3-NEXT: movdqa %xmm0, %xmm2 32886; SSSE3-NEXT: pand %xmm1, %xmm2 32887; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32888; SSSE3-NEXT: movdqa %xmm3, %xmm4 32889; SSSE3-NEXT: pshufb %xmm2, %xmm4 32890; SSSE3-NEXT: psrlw $4, %xmm0 32891; SSSE3-NEXT: pand %xmm1, %xmm0 32892; SSSE3-NEXT: pshufb %xmm0, %xmm3 32893; SSSE3-NEXT: paddb %xmm4, %xmm3 32894; SSSE3-NEXT: pxor %xmm0, %xmm0 32895; SSSE3-NEXT: psadbw %xmm3, %xmm0 32896; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 32897; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32898; SSSE3-NEXT: movdqa %xmm0, %xmm2 32899; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 32900; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32901; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 32902; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32903; SSSE3-NEXT: pand %xmm3, %xmm1 32904; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32905; SSSE3-NEXT: por %xmm1, %xmm0 32906; SSSE3-NEXT: retq 32907; 32908; SSE41-LABEL: ugt_44_v2i64: 32909; SSE41: # %bb.0: 32910; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32911; SSE41-NEXT: movdqa %xmm0, %xmm2 32912; SSE41-NEXT: pand %xmm1, %xmm2 32913; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32914; SSE41-NEXT: movdqa %xmm3, %xmm4 32915; SSE41-NEXT: pshufb %xmm2, %xmm4 32916; SSE41-NEXT: psrlw $4, %xmm0 32917; SSE41-NEXT: pand %xmm1, %xmm0 32918; SSE41-NEXT: pshufb %xmm0, %xmm3 32919; SSE41-NEXT: paddb %xmm4, %xmm3 32920; SSE41-NEXT: pxor %xmm0, %xmm0 32921; SSE41-NEXT: psadbw %xmm3, %xmm0 32922; SSE41-NEXT: por {{.*}}(%rip), %xmm0 32923; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] 32924; SSE41-NEXT: movdqa %xmm0, %xmm2 32925; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 32926; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 32927; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 32928; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 32929; SSE41-NEXT: pand %xmm3, %xmm1 32930; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 32931; SSE41-NEXT: por %xmm1, %xmm0 32932; SSE41-NEXT: retq 32933; 32934; AVX1-LABEL: ugt_44_v2i64: 32935; AVX1: # %bb.0: 32936; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32937; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 32938; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32939; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32940; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 32941; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 32942; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32943; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32944; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 32945; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32946; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32947; AVX1-NEXT: retq 32948; 32949; AVX2-LABEL: ugt_44_v2i64: 32950; AVX2: # %bb.0: 32951; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 32952; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 32953; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 32954; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 32955; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 32956; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 32957; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 32958; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 32959; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 32960; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32961; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32962; AVX2-NEXT: retq 32963; 32964; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64: 32965; AVX512VPOPCNTDQ: # %bb.0: 32966; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32967; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 32968; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32969; AVX512VPOPCNTDQ-NEXT: vzeroupper 32970; AVX512VPOPCNTDQ-NEXT: retq 32971; 32972; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64: 32973; AVX512VPOPCNTDQVL: # %bb.0: 32974; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 32975; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 32976; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32977; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32978; AVX512VPOPCNTDQVL-NEXT: retq 32979; 32980; BITALG_NOVLX-LABEL: ugt_44_v2i64: 32981; BITALG_NOVLX: # %bb.0: 32982; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 32983; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 32984; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 32985; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32986; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 32987; BITALG_NOVLX-NEXT: vzeroupper 32988; BITALG_NOVLX-NEXT: retq 32989; 32990; BITALG-LABEL: ugt_44_v2i64: 32991; BITALG: # %bb.0: 32992; BITALG-NEXT: vpopcntb %xmm0, %xmm0 32993; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 32994; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 32995; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 32996; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 32997; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 32998; BITALG-NEXT: retq 32999 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 33000 %3 = icmp ugt <2 x i64> %2, <i64 44, i64 44> 33001 %4 = sext <2 x i1> %3 to <2 x i64> 33002 ret <2 x i64> %4 33003} 33004 33005define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { 33006; SSE2-LABEL: ult_45_v2i64: 33007; SSE2: # %bb.0: 33008; SSE2-NEXT: movdqa %xmm0, %xmm1 33009; SSE2-NEXT: psrlw $1, %xmm1 33010; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33011; SSE2-NEXT: psubb %xmm1, %xmm0 33012; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33013; SSE2-NEXT: movdqa %xmm0, %xmm2 33014; SSE2-NEXT: pand %xmm1, %xmm2 33015; SSE2-NEXT: psrlw $2, %xmm0 33016; SSE2-NEXT: pand %xmm1, %xmm0 33017; SSE2-NEXT: paddb %xmm2, %xmm0 33018; SSE2-NEXT: movdqa %xmm0, %xmm1 33019; SSE2-NEXT: psrlw $4, %xmm1 33020; SSE2-NEXT: paddb %xmm0, %xmm1 33021; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33022; SSE2-NEXT: pxor %xmm0, %xmm0 33023; SSE2-NEXT: psadbw %xmm1, %xmm0 33024; SSE2-NEXT: por {{.*}}(%rip), %xmm0 33025; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33026; SSE2-NEXT: movdqa %xmm1, %xmm2 33027; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 33028; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33029; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 33030; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33031; SSE2-NEXT: pand %xmm3, %xmm1 33032; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33033; SSE2-NEXT: por %xmm1, %xmm0 33034; SSE2-NEXT: retq 33035; 33036; SSE3-LABEL: ult_45_v2i64: 33037; SSE3: # %bb.0: 33038; SSE3-NEXT: movdqa %xmm0, %xmm1 33039; SSE3-NEXT: psrlw $1, %xmm1 33040; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33041; SSE3-NEXT: psubb %xmm1, %xmm0 33042; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33043; SSE3-NEXT: movdqa %xmm0, %xmm2 33044; SSE3-NEXT: pand %xmm1, %xmm2 33045; SSE3-NEXT: psrlw $2, %xmm0 33046; SSE3-NEXT: pand %xmm1, %xmm0 33047; SSE3-NEXT: paddb %xmm2, %xmm0 33048; SSE3-NEXT: movdqa %xmm0, %xmm1 33049; SSE3-NEXT: psrlw $4, %xmm1 33050; SSE3-NEXT: paddb %xmm0, %xmm1 33051; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33052; SSE3-NEXT: pxor %xmm0, %xmm0 33053; SSE3-NEXT: psadbw %xmm1, %xmm0 33054; SSE3-NEXT: por {{.*}}(%rip), %xmm0 33055; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33056; SSE3-NEXT: movdqa %xmm1, %xmm2 33057; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 33058; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33059; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 33060; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33061; SSE3-NEXT: pand %xmm3, %xmm1 33062; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33063; SSE3-NEXT: por %xmm1, %xmm0 33064; SSE3-NEXT: retq 33065; 33066; SSSE3-LABEL: ult_45_v2i64: 33067; SSSE3: # %bb.0: 33068; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33069; SSSE3-NEXT: movdqa %xmm0, %xmm2 33070; SSSE3-NEXT: pand %xmm1, %xmm2 33071; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33072; SSSE3-NEXT: movdqa %xmm3, %xmm4 33073; SSSE3-NEXT: pshufb %xmm2, %xmm4 33074; SSSE3-NEXT: psrlw $4, %xmm0 33075; SSSE3-NEXT: pand %xmm1, %xmm0 33076; SSSE3-NEXT: pshufb %xmm0, %xmm3 33077; SSSE3-NEXT: paddb %xmm4, %xmm3 33078; SSSE3-NEXT: pxor %xmm0, %xmm0 33079; SSSE3-NEXT: psadbw %xmm3, %xmm0 33080; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 33081; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33082; SSSE3-NEXT: movdqa %xmm1, %xmm2 33083; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 33084; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33085; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 33086; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33087; SSSE3-NEXT: pand %xmm3, %xmm1 33088; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33089; SSSE3-NEXT: por %xmm1, %xmm0 33090; SSSE3-NEXT: retq 33091; 33092; SSE41-LABEL: ult_45_v2i64: 33093; SSE41: # %bb.0: 33094; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33095; SSE41-NEXT: movdqa %xmm0, %xmm2 33096; SSE41-NEXT: pand %xmm1, %xmm2 33097; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33098; SSE41-NEXT: movdqa %xmm3, %xmm4 33099; SSE41-NEXT: pshufb %xmm2, %xmm4 33100; SSE41-NEXT: psrlw $4, %xmm0 33101; SSE41-NEXT: pand %xmm1, %xmm0 33102; SSE41-NEXT: pshufb %xmm0, %xmm3 33103; SSE41-NEXT: paddb %xmm4, %xmm3 33104; SSE41-NEXT: pxor %xmm0, %xmm0 33105; SSE41-NEXT: psadbw %xmm3, %xmm0 33106; SSE41-NEXT: por {{.*}}(%rip), %xmm0 33107; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33108; SSE41-NEXT: movdqa %xmm1, %xmm2 33109; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 33110; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33111; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 33112; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33113; SSE41-NEXT: pand %xmm3, %xmm1 33114; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33115; SSE41-NEXT: por %xmm1, %xmm0 33116; SSE41-NEXT: retq 33117; 33118; AVX1-LABEL: ult_45_v2i64: 33119; AVX1: # %bb.0: 33120; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33121; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 33122; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33123; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33124; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 33125; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 33126; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33127; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33128; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 33129; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33130; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] 33131; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33132; AVX1-NEXT: retq 33133; 33134; AVX2-LABEL: ult_45_v2i64: 33135; AVX2: # %bb.0: 33136; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33137; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 33138; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33139; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33140; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 33141; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 33142; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33143; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33144; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 33145; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33146; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] 33147; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33148; AVX2-NEXT: retq 33149; 33150; AVX512VPOPCNTDQ-LABEL: ult_45_v2i64: 33151; AVX512VPOPCNTDQ: # %bb.0: 33152; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33153; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 33154; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] 33155; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33156; AVX512VPOPCNTDQ-NEXT: vzeroupper 33157; AVX512VPOPCNTDQ-NEXT: retq 33158; 33159; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64: 33160; AVX512VPOPCNTDQVL: # %bb.0: 33161; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 33162; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 33163; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33164; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33165; AVX512VPOPCNTDQVL-NEXT: retq 33166; 33167; BITALG_NOVLX-LABEL: ult_45_v2i64: 33168; BITALG_NOVLX: # %bb.0: 33169; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33170; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 33171; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 33172; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33173; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] 33174; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33175; BITALG_NOVLX-NEXT: vzeroupper 33176; BITALG_NOVLX-NEXT: retq 33177; 33178; BITALG-LABEL: ult_45_v2i64: 33179; BITALG: # %bb.0: 33180; BITALG-NEXT: vpopcntb %xmm0, %xmm0 33181; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 33182; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33183; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 33184; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33185; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33186; BITALG-NEXT: retq 33187 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 33188 %3 = icmp ult <2 x i64> %2, <i64 45, i64 45> 33189 %4 = sext <2 x i1> %3 to <2 x i64> 33190 ret <2 x i64> %4 33191} 33192 33193define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { 33194; SSE2-LABEL: ugt_45_v2i64: 33195; SSE2: # %bb.0: 33196; SSE2-NEXT: movdqa %xmm0, %xmm1 33197; SSE2-NEXT: psrlw $1, %xmm1 33198; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33199; SSE2-NEXT: psubb %xmm1, %xmm0 33200; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33201; SSE2-NEXT: movdqa %xmm0, %xmm2 33202; SSE2-NEXT: pand %xmm1, %xmm2 33203; SSE2-NEXT: psrlw $2, %xmm0 33204; SSE2-NEXT: pand %xmm1, %xmm0 33205; SSE2-NEXT: paddb %xmm2, %xmm0 33206; SSE2-NEXT: movdqa %xmm0, %xmm1 33207; SSE2-NEXT: psrlw $4, %xmm1 33208; SSE2-NEXT: paddb %xmm0, %xmm1 33209; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33210; SSE2-NEXT: pxor %xmm0, %xmm0 33211; SSE2-NEXT: psadbw %xmm1, %xmm0 33212; SSE2-NEXT: por {{.*}}(%rip), %xmm0 33213; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33214; SSE2-NEXT: movdqa %xmm0, %xmm2 33215; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 33216; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33217; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 33218; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33219; SSE2-NEXT: pand %xmm3, %xmm1 33220; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33221; SSE2-NEXT: por %xmm1, %xmm0 33222; SSE2-NEXT: retq 33223; 33224; SSE3-LABEL: ugt_45_v2i64: 33225; SSE3: # %bb.0: 33226; SSE3-NEXT: movdqa %xmm0, %xmm1 33227; SSE3-NEXT: psrlw $1, %xmm1 33228; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33229; SSE3-NEXT: psubb %xmm1, %xmm0 33230; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33231; SSE3-NEXT: movdqa %xmm0, %xmm2 33232; SSE3-NEXT: pand %xmm1, %xmm2 33233; SSE3-NEXT: psrlw $2, %xmm0 33234; SSE3-NEXT: pand %xmm1, %xmm0 33235; SSE3-NEXT: paddb %xmm2, %xmm0 33236; SSE3-NEXT: movdqa %xmm0, %xmm1 33237; SSE3-NEXT: psrlw $4, %xmm1 33238; SSE3-NEXT: paddb %xmm0, %xmm1 33239; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33240; SSE3-NEXT: pxor %xmm0, %xmm0 33241; SSE3-NEXT: psadbw %xmm1, %xmm0 33242; SSE3-NEXT: por {{.*}}(%rip), %xmm0 33243; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33244; SSE3-NEXT: movdqa %xmm0, %xmm2 33245; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 33246; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33247; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 33248; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33249; SSE3-NEXT: pand %xmm3, %xmm1 33250; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33251; SSE3-NEXT: por %xmm1, %xmm0 33252; SSE3-NEXT: retq 33253; 33254; SSSE3-LABEL: ugt_45_v2i64: 33255; SSSE3: # %bb.0: 33256; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33257; SSSE3-NEXT: movdqa %xmm0, %xmm2 33258; SSSE3-NEXT: pand %xmm1, %xmm2 33259; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33260; SSSE3-NEXT: movdqa %xmm3, %xmm4 33261; SSSE3-NEXT: pshufb %xmm2, %xmm4 33262; SSSE3-NEXT: psrlw $4, %xmm0 33263; SSSE3-NEXT: pand %xmm1, %xmm0 33264; SSSE3-NEXT: pshufb %xmm0, %xmm3 33265; SSSE3-NEXT: paddb %xmm4, %xmm3 33266; SSSE3-NEXT: pxor %xmm0, %xmm0 33267; SSSE3-NEXT: psadbw %xmm3, %xmm0 33268; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 33269; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33270; SSSE3-NEXT: movdqa %xmm0, %xmm2 33271; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 33272; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33273; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 33274; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33275; SSSE3-NEXT: pand %xmm3, %xmm1 33276; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33277; SSSE3-NEXT: por %xmm1, %xmm0 33278; SSSE3-NEXT: retq 33279; 33280; SSE41-LABEL: ugt_45_v2i64: 33281; SSE41: # %bb.0: 33282; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33283; SSE41-NEXT: movdqa %xmm0, %xmm2 33284; SSE41-NEXT: pand %xmm1, %xmm2 33285; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33286; SSE41-NEXT: movdqa %xmm3, %xmm4 33287; SSE41-NEXT: pshufb %xmm2, %xmm4 33288; SSE41-NEXT: psrlw $4, %xmm0 33289; SSE41-NEXT: pand %xmm1, %xmm0 33290; SSE41-NEXT: pshufb %xmm0, %xmm3 33291; SSE41-NEXT: paddb %xmm4, %xmm3 33292; SSE41-NEXT: pxor %xmm0, %xmm0 33293; SSE41-NEXT: psadbw %xmm3, %xmm0 33294; SSE41-NEXT: por {{.*}}(%rip), %xmm0 33295; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] 33296; SSE41-NEXT: movdqa %xmm0, %xmm2 33297; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 33298; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33299; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 33300; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33301; SSE41-NEXT: pand %xmm3, %xmm1 33302; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33303; SSE41-NEXT: por %xmm1, %xmm0 33304; SSE41-NEXT: retq 33305; 33306; AVX1-LABEL: ugt_45_v2i64: 33307; AVX1: # %bb.0: 33308; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33309; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 33310; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33311; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33312; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 33313; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 33314; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33315; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33316; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 33317; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33318; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33319; AVX1-NEXT: retq 33320; 33321; AVX2-LABEL: ugt_45_v2i64: 33322; AVX2: # %bb.0: 33323; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33324; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 33325; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33326; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33327; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 33328; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 33329; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33330; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33331; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 33332; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33333; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33334; AVX2-NEXT: retq 33335; 33336; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64: 33337; AVX512VPOPCNTDQ: # %bb.0: 33338; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33339; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 33340; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33341; AVX512VPOPCNTDQ-NEXT: vzeroupper 33342; AVX512VPOPCNTDQ-NEXT: retq 33343; 33344; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64: 33345; AVX512VPOPCNTDQVL: # %bb.0: 33346; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 33347; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 33348; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33349; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33350; AVX512VPOPCNTDQVL-NEXT: retq 33351; 33352; BITALG_NOVLX-LABEL: ugt_45_v2i64: 33353; BITALG_NOVLX: # %bb.0: 33354; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33355; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 33356; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 33357; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33358; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33359; BITALG_NOVLX-NEXT: vzeroupper 33360; BITALG_NOVLX-NEXT: retq 33361; 33362; BITALG-LABEL: ugt_45_v2i64: 33363; BITALG: # %bb.0: 33364; BITALG-NEXT: vpopcntb %xmm0, %xmm0 33365; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 33366; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33367; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 33368; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33369; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33370; BITALG-NEXT: retq 33371 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 33372 %3 = icmp ugt <2 x i64> %2, <i64 45, i64 45> 33373 %4 = sext <2 x i1> %3 to <2 x i64> 33374 ret <2 x i64> %4 33375} 33376 33377define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { 33378; SSE2-LABEL: ult_46_v2i64: 33379; SSE2: # %bb.0: 33380; SSE2-NEXT: movdqa %xmm0, %xmm1 33381; SSE2-NEXT: psrlw $1, %xmm1 33382; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33383; SSE2-NEXT: psubb %xmm1, %xmm0 33384; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33385; SSE2-NEXT: movdqa %xmm0, %xmm2 33386; SSE2-NEXT: pand %xmm1, %xmm2 33387; SSE2-NEXT: psrlw $2, %xmm0 33388; SSE2-NEXT: pand %xmm1, %xmm0 33389; SSE2-NEXT: paddb %xmm2, %xmm0 33390; SSE2-NEXT: movdqa %xmm0, %xmm1 33391; SSE2-NEXT: psrlw $4, %xmm1 33392; SSE2-NEXT: paddb %xmm0, %xmm1 33393; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33394; SSE2-NEXT: pxor %xmm0, %xmm0 33395; SSE2-NEXT: psadbw %xmm1, %xmm0 33396; SSE2-NEXT: por {{.*}}(%rip), %xmm0 33397; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33398; SSE2-NEXT: movdqa %xmm1, %xmm2 33399; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 33400; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33401; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 33402; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33403; SSE2-NEXT: pand %xmm3, %xmm1 33404; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33405; SSE2-NEXT: por %xmm1, %xmm0 33406; SSE2-NEXT: retq 33407; 33408; SSE3-LABEL: ult_46_v2i64: 33409; SSE3: # %bb.0: 33410; SSE3-NEXT: movdqa %xmm0, %xmm1 33411; SSE3-NEXT: psrlw $1, %xmm1 33412; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33413; SSE3-NEXT: psubb %xmm1, %xmm0 33414; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33415; SSE3-NEXT: movdqa %xmm0, %xmm2 33416; SSE3-NEXT: pand %xmm1, %xmm2 33417; SSE3-NEXT: psrlw $2, %xmm0 33418; SSE3-NEXT: pand %xmm1, %xmm0 33419; SSE3-NEXT: paddb %xmm2, %xmm0 33420; SSE3-NEXT: movdqa %xmm0, %xmm1 33421; SSE3-NEXT: psrlw $4, %xmm1 33422; SSE3-NEXT: paddb %xmm0, %xmm1 33423; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33424; SSE3-NEXT: pxor %xmm0, %xmm0 33425; SSE3-NEXT: psadbw %xmm1, %xmm0 33426; SSE3-NEXT: por {{.*}}(%rip), %xmm0 33427; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33428; SSE3-NEXT: movdqa %xmm1, %xmm2 33429; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 33430; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33431; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 33432; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33433; SSE3-NEXT: pand %xmm3, %xmm1 33434; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33435; SSE3-NEXT: por %xmm1, %xmm0 33436; SSE3-NEXT: retq 33437; 33438; SSSE3-LABEL: ult_46_v2i64: 33439; SSSE3: # %bb.0: 33440; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33441; SSSE3-NEXT: movdqa %xmm0, %xmm2 33442; SSSE3-NEXT: pand %xmm1, %xmm2 33443; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33444; SSSE3-NEXT: movdqa %xmm3, %xmm4 33445; SSSE3-NEXT: pshufb %xmm2, %xmm4 33446; SSSE3-NEXT: psrlw $4, %xmm0 33447; SSSE3-NEXT: pand %xmm1, %xmm0 33448; SSSE3-NEXT: pshufb %xmm0, %xmm3 33449; SSSE3-NEXT: paddb %xmm4, %xmm3 33450; SSSE3-NEXT: pxor %xmm0, %xmm0 33451; SSSE3-NEXT: psadbw %xmm3, %xmm0 33452; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 33453; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33454; SSSE3-NEXT: movdqa %xmm1, %xmm2 33455; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 33456; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33457; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 33458; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33459; SSSE3-NEXT: pand %xmm3, %xmm1 33460; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33461; SSSE3-NEXT: por %xmm1, %xmm0 33462; SSSE3-NEXT: retq 33463; 33464; SSE41-LABEL: ult_46_v2i64: 33465; SSE41: # %bb.0: 33466; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33467; SSE41-NEXT: movdqa %xmm0, %xmm2 33468; SSE41-NEXT: pand %xmm1, %xmm2 33469; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33470; SSE41-NEXT: movdqa %xmm3, %xmm4 33471; SSE41-NEXT: pshufb %xmm2, %xmm4 33472; SSE41-NEXT: psrlw $4, %xmm0 33473; SSE41-NEXT: pand %xmm1, %xmm0 33474; SSE41-NEXT: pshufb %xmm0, %xmm3 33475; SSE41-NEXT: paddb %xmm4, %xmm3 33476; SSE41-NEXT: pxor %xmm0, %xmm0 33477; SSE41-NEXT: psadbw %xmm3, %xmm0 33478; SSE41-NEXT: por {{.*}}(%rip), %xmm0 33479; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33480; SSE41-NEXT: movdqa %xmm1, %xmm2 33481; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 33482; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33483; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 33484; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33485; SSE41-NEXT: pand %xmm3, %xmm1 33486; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33487; SSE41-NEXT: por %xmm1, %xmm0 33488; SSE41-NEXT: retq 33489; 33490; AVX1-LABEL: ult_46_v2i64: 33491; AVX1: # %bb.0: 33492; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33493; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 33494; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33495; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33496; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 33497; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 33498; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33499; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33500; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 33501; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33502; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] 33503; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33504; AVX1-NEXT: retq 33505; 33506; AVX2-LABEL: ult_46_v2i64: 33507; AVX2: # %bb.0: 33508; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33509; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 33510; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33511; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33512; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 33513; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 33514; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33515; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33516; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 33517; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33518; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] 33519; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33520; AVX2-NEXT: retq 33521; 33522; AVX512VPOPCNTDQ-LABEL: ult_46_v2i64: 33523; AVX512VPOPCNTDQ: # %bb.0: 33524; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33525; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 33526; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] 33527; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33528; AVX512VPOPCNTDQ-NEXT: vzeroupper 33529; AVX512VPOPCNTDQ-NEXT: retq 33530; 33531; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64: 33532; AVX512VPOPCNTDQVL: # %bb.0: 33533; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 33534; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 33535; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33536; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33537; AVX512VPOPCNTDQVL-NEXT: retq 33538; 33539; BITALG_NOVLX-LABEL: ult_46_v2i64: 33540; BITALG_NOVLX: # %bb.0: 33541; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33542; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 33543; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 33544; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33545; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] 33546; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33547; BITALG_NOVLX-NEXT: vzeroupper 33548; BITALG_NOVLX-NEXT: retq 33549; 33550; BITALG-LABEL: ult_46_v2i64: 33551; BITALG: # %bb.0: 33552; BITALG-NEXT: vpopcntb %xmm0, %xmm0 33553; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 33554; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33555; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 33556; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33557; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33558; BITALG-NEXT: retq 33559 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 33560 %3 = icmp ult <2 x i64> %2, <i64 46, i64 46> 33561 %4 = sext <2 x i1> %3 to <2 x i64> 33562 ret <2 x i64> %4 33563} 33564 33565define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { 33566; SSE2-LABEL: ugt_46_v2i64: 33567; SSE2: # %bb.0: 33568; SSE2-NEXT: movdqa %xmm0, %xmm1 33569; SSE2-NEXT: psrlw $1, %xmm1 33570; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33571; SSE2-NEXT: psubb %xmm1, %xmm0 33572; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33573; SSE2-NEXT: movdqa %xmm0, %xmm2 33574; SSE2-NEXT: pand %xmm1, %xmm2 33575; SSE2-NEXT: psrlw $2, %xmm0 33576; SSE2-NEXT: pand %xmm1, %xmm0 33577; SSE2-NEXT: paddb %xmm2, %xmm0 33578; SSE2-NEXT: movdqa %xmm0, %xmm1 33579; SSE2-NEXT: psrlw $4, %xmm1 33580; SSE2-NEXT: paddb %xmm0, %xmm1 33581; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33582; SSE2-NEXT: pxor %xmm0, %xmm0 33583; SSE2-NEXT: psadbw %xmm1, %xmm0 33584; SSE2-NEXT: por {{.*}}(%rip), %xmm0 33585; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33586; SSE2-NEXT: movdqa %xmm0, %xmm2 33587; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 33588; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33589; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 33590; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33591; SSE2-NEXT: pand %xmm3, %xmm1 33592; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33593; SSE2-NEXT: por %xmm1, %xmm0 33594; SSE2-NEXT: retq 33595; 33596; SSE3-LABEL: ugt_46_v2i64: 33597; SSE3: # %bb.0: 33598; SSE3-NEXT: movdqa %xmm0, %xmm1 33599; SSE3-NEXT: psrlw $1, %xmm1 33600; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33601; SSE3-NEXT: psubb %xmm1, %xmm0 33602; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33603; SSE3-NEXT: movdqa %xmm0, %xmm2 33604; SSE3-NEXT: pand %xmm1, %xmm2 33605; SSE3-NEXT: psrlw $2, %xmm0 33606; SSE3-NEXT: pand %xmm1, %xmm0 33607; SSE3-NEXT: paddb %xmm2, %xmm0 33608; SSE3-NEXT: movdqa %xmm0, %xmm1 33609; SSE3-NEXT: psrlw $4, %xmm1 33610; SSE3-NEXT: paddb %xmm0, %xmm1 33611; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33612; SSE3-NEXT: pxor %xmm0, %xmm0 33613; SSE3-NEXT: psadbw %xmm1, %xmm0 33614; SSE3-NEXT: por {{.*}}(%rip), %xmm0 33615; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33616; SSE3-NEXT: movdqa %xmm0, %xmm2 33617; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 33618; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33619; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 33620; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33621; SSE3-NEXT: pand %xmm3, %xmm1 33622; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33623; SSE3-NEXT: por %xmm1, %xmm0 33624; SSE3-NEXT: retq 33625; 33626; SSSE3-LABEL: ugt_46_v2i64: 33627; SSSE3: # %bb.0: 33628; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33629; SSSE3-NEXT: movdqa %xmm0, %xmm2 33630; SSSE3-NEXT: pand %xmm1, %xmm2 33631; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33632; SSSE3-NEXT: movdqa %xmm3, %xmm4 33633; SSSE3-NEXT: pshufb %xmm2, %xmm4 33634; SSSE3-NEXT: psrlw $4, %xmm0 33635; SSSE3-NEXT: pand %xmm1, %xmm0 33636; SSSE3-NEXT: pshufb %xmm0, %xmm3 33637; SSSE3-NEXT: paddb %xmm4, %xmm3 33638; SSSE3-NEXT: pxor %xmm0, %xmm0 33639; SSSE3-NEXT: psadbw %xmm3, %xmm0 33640; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 33641; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33642; SSSE3-NEXT: movdqa %xmm0, %xmm2 33643; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 33644; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33645; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 33646; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33647; SSSE3-NEXT: pand %xmm3, %xmm1 33648; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33649; SSSE3-NEXT: por %xmm1, %xmm0 33650; SSSE3-NEXT: retq 33651; 33652; SSE41-LABEL: ugt_46_v2i64: 33653; SSE41: # %bb.0: 33654; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33655; SSE41-NEXT: movdqa %xmm0, %xmm2 33656; SSE41-NEXT: pand %xmm1, %xmm2 33657; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33658; SSE41-NEXT: movdqa %xmm3, %xmm4 33659; SSE41-NEXT: pshufb %xmm2, %xmm4 33660; SSE41-NEXT: psrlw $4, %xmm0 33661; SSE41-NEXT: pand %xmm1, %xmm0 33662; SSE41-NEXT: pshufb %xmm0, %xmm3 33663; SSE41-NEXT: paddb %xmm4, %xmm3 33664; SSE41-NEXT: pxor %xmm0, %xmm0 33665; SSE41-NEXT: psadbw %xmm3, %xmm0 33666; SSE41-NEXT: por {{.*}}(%rip), %xmm0 33667; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] 33668; SSE41-NEXT: movdqa %xmm0, %xmm2 33669; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 33670; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33671; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 33672; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33673; SSE41-NEXT: pand %xmm3, %xmm1 33674; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33675; SSE41-NEXT: por %xmm1, %xmm0 33676; SSE41-NEXT: retq 33677; 33678; AVX1-LABEL: ugt_46_v2i64: 33679; AVX1: # %bb.0: 33680; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33681; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 33682; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33683; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33684; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 33685; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 33686; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33687; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33688; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 33689; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33690; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33691; AVX1-NEXT: retq 33692; 33693; AVX2-LABEL: ugt_46_v2i64: 33694; AVX2: # %bb.0: 33695; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33696; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 33697; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33698; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33699; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 33700; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 33701; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33702; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33703; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 33704; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33705; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33706; AVX2-NEXT: retq 33707; 33708; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64: 33709; AVX512VPOPCNTDQ: # %bb.0: 33710; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33711; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 33712; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33713; AVX512VPOPCNTDQ-NEXT: vzeroupper 33714; AVX512VPOPCNTDQ-NEXT: retq 33715; 33716; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64: 33717; AVX512VPOPCNTDQVL: # %bb.0: 33718; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 33719; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 33720; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33721; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33722; AVX512VPOPCNTDQVL-NEXT: retq 33723; 33724; BITALG_NOVLX-LABEL: ugt_46_v2i64: 33725; BITALG_NOVLX: # %bb.0: 33726; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33727; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 33728; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 33729; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33730; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 33731; BITALG_NOVLX-NEXT: vzeroupper 33732; BITALG_NOVLX-NEXT: retq 33733; 33734; BITALG-LABEL: ugt_46_v2i64: 33735; BITALG: # %bb.0: 33736; BITALG-NEXT: vpopcntb %xmm0, %xmm0 33737; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 33738; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33739; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 33740; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33741; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33742; BITALG-NEXT: retq 33743 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 33744 %3 = icmp ugt <2 x i64> %2, <i64 46, i64 46> 33745 %4 = sext <2 x i1> %3 to <2 x i64> 33746 ret <2 x i64> %4 33747} 33748 33749define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { 33750; SSE2-LABEL: ult_47_v2i64: 33751; SSE2: # %bb.0: 33752; SSE2-NEXT: movdqa %xmm0, %xmm1 33753; SSE2-NEXT: psrlw $1, %xmm1 33754; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33755; SSE2-NEXT: psubb %xmm1, %xmm0 33756; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33757; SSE2-NEXT: movdqa %xmm0, %xmm2 33758; SSE2-NEXT: pand %xmm1, %xmm2 33759; SSE2-NEXT: psrlw $2, %xmm0 33760; SSE2-NEXT: pand %xmm1, %xmm0 33761; SSE2-NEXT: paddb %xmm2, %xmm0 33762; SSE2-NEXT: movdqa %xmm0, %xmm1 33763; SSE2-NEXT: psrlw $4, %xmm1 33764; SSE2-NEXT: paddb %xmm0, %xmm1 33765; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33766; SSE2-NEXT: pxor %xmm0, %xmm0 33767; SSE2-NEXT: psadbw %xmm1, %xmm0 33768; SSE2-NEXT: por {{.*}}(%rip), %xmm0 33769; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 33770; SSE2-NEXT: movdqa %xmm1, %xmm2 33771; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 33772; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33773; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 33774; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33775; SSE2-NEXT: pand %xmm3, %xmm1 33776; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33777; SSE2-NEXT: por %xmm1, %xmm0 33778; SSE2-NEXT: retq 33779; 33780; SSE3-LABEL: ult_47_v2i64: 33781; SSE3: # %bb.0: 33782; SSE3-NEXT: movdqa %xmm0, %xmm1 33783; SSE3-NEXT: psrlw $1, %xmm1 33784; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33785; SSE3-NEXT: psubb %xmm1, %xmm0 33786; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33787; SSE3-NEXT: movdqa %xmm0, %xmm2 33788; SSE3-NEXT: pand %xmm1, %xmm2 33789; SSE3-NEXT: psrlw $2, %xmm0 33790; SSE3-NEXT: pand %xmm1, %xmm0 33791; SSE3-NEXT: paddb %xmm2, %xmm0 33792; SSE3-NEXT: movdqa %xmm0, %xmm1 33793; SSE3-NEXT: psrlw $4, %xmm1 33794; SSE3-NEXT: paddb %xmm0, %xmm1 33795; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33796; SSE3-NEXT: pxor %xmm0, %xmm0 33797; SSE3-NEXT: psadbw %xmm1, %xmm0 33798; SSE3-NEXT: por {{.*}}(%rip), %xmm0 33799; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 33800; SSE3-NEXT: movdqa %xmm1, %xmm2 33801; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 33802; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33803; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 33804; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33805; SSE3-NEXT: pand %xmm3, %xmm1 33806; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33807; SSE3-NEXT: por %xmm1, %xmm0 33808; SSE3-NEXT: retq 33809; 33810; SSSE3-LABEL: ult_47_v2i64: 33811; SSSE3: # %bb.0: 33812; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33813; SSSE3-NEXT: movdqa %xmm0, %xmm2 33814; SSSE3-NEXT: pand %xmm1, %xmm2 33815; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33816; SSSE3-NEXT: movdqa %xmm3, %xmm4 33817; SSSE3-NEXT: pshufb %xmm2, %xmm4 33818; SSSE3-NEXT: psrlw $4, %xmm0 33819; SSSE3-NEXT: pand %xmm1, %xmm0 33820; SSSE3-NEXT: pshufb %xmm0, %xmm3 33821; SSSE3-NEXT: paddb %xmm4, %xmm3 33822; SSSE3-NEXT: pxor %xmm0, %xmm0 33823; SSSE3-NEXT: psadbw %xmm3, %xmm0 33824; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 33825; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 33826; SSSE3-NEXT: movdqa %xmm1, %xmm2 33827; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 33828; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33829; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 33830; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33831; SSSE3-NEXT: pand %xmm3, %xmm1 33832; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33833; SSSE3-NEXT: por %xmm1, %xmm0 33834; SSSE3-NEXT: retq 33835; 33836; SSE41-LABEL: ult_47_v2i64: 33837; SSE41: # %bb.0: 33838; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33839; SSE41-NEXT: movdqa %xmm0, %xmm2 33840; SSE41-NEXT: pand %xmm1, %xmm2 33841; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33842; SSE41-NEXT: movdqa %xmm3, %xmm4 33843; SSE41-NEXT: pshufb %xmm2, %xmm4 33844; SSE41-NEXT: psrlw $4, %xmm0 33845; SSE41-NEXT: pand %xmm1, %xmm0 33846; SSE41-NEXT: pshufb %xmm0, %xmm3 33847; SSE41-NEXT: paddb %xmm4, %xmm3 33848; SSE41-NEXT: pxor %xmm0, %xmm0 33849; SSE41-NEXT: psadbw %xmm3, %xmm0 33850; SSE41-NEXT: por {{.*}}(%rip), %xmm0 33851; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 33852; SSE41-NEXT: movdqa %xmm1, %xmm2 33853; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 33854; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33855; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 33856; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33857; SSE41-NEXT: pand %xmm3, %xmm1 33858; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33859; SSE41-NEXT: por %xmm1, %xmm0 33860; SSE41-NEXT: retq 33861; 33862; AVX1-LABEL: ult_47_v2i64: 33863; AVX1: # %bb.0: 33864; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33865; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 33866; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33867; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33868; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 33869; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 33870; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33871; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33872; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 33873; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33874; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] 33875; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33876; AVX1-NEXT: retq 33877; 33878; AVX2-LABEL: ult_47_v2i64: 33879; AVX2: # %bb.0: 33880; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33881; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 33882; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 33883; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 33884; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 33885; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 33886; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 33887; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 33888; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 33889; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33890; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] 33891; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33892; AVX2-NEXT: retq 33893; 33894; AVX512VPOPCNTDQ-LABEL: ult_47_v2i64: 33895; AVX512VPOPCNTDQ: # %bb.0: 33896; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33897; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 33898; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] 33899; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33900; AVX512VPOPCNTDQ-NEXT: vzeroupper 33901; AVX512VPOPCNTDQ-NEXT: retq 33902; 33903; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64: 33904; AVX512VPOPCNTDQVL: # %bb.0: 33905; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 33906; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 33907; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33908; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33909; AVX512VPOPCNTDQVL-NEXT: retq 33910; 33911; BITALG_NOVLX-LABEL: ult_47_v2i64: 33912; BITALG_NOVLX: # %bb.0: 33913; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 33914; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 33915; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 33916; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33917; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] 33918; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 33919; BITALG_NOVLX-NEXT: vzeroupper 33920; BITALG_NOVLX-NEXT: retq 33921; 33922; BITALG-LABEL: ult_47_v2i64: 33923; BITALG: # %bb.0: 33924; BITALG-NEXT: vpopcntb %xmm0, %xmm0 33925; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 33926; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 33927; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 33928; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 33929; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 33930; BITALG-NEXT: retq 33931 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 33932 %3 = icmp ult <2 x i64> %2, <i64 47, i64 47> 33933 %4 = sext <2 x i1> %3 to <2 x i64> 33934 ret <2 x i64> %4 33935} 33936 33937define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { 33938; SSE2-LABEL: ugt_47_v2i64: 33939; SSE2: # %bb.0: 33940; SSE2-NEXT: movdqa %xmm0, %xmm1 33941; SSE2-NEXT: psrlw $1, %xmm1 33942; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33943; SSE2-NEXT: psubb %xmm1, %xmm0 33944; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33945; SSE2-NEXT: movdqa %xmm0, %xmm2 33946; SSE2-NEXT: pand %xmm1, %xmm2 33947; SSE2-NEXT: psrlw $2, %xmm0 33948; SSE2-NEXT: pand %xmm1, %xmm0 33949; SSE2-NEXT: paddb %xmm2, %xmm0 33950; SSE2-NEXT: movdqa %xmm0, %xmm1 33951; SSE2-NEXT: psrlw $4, %xmm1 33952; SSE2-NEXT: paddb %xmm0, %xmm1 33953; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 33954; SSE2-NEXT: pxor %xmm0, %xmm0 33955; SSE2-NEXT: psadbw %xmm1, %xmm0 33956; SSE2-NEXT: por {{.*}}(%rip), %xmm0 33957; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 33958; SSE2-NEXT: movdqa %xmm0, %xmm2 33959; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 33960; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33961; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 33962; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33963; SSE2-NEXT: pand %xmm3, %xmm1 33964; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33965; SSE2-NEXT: por %xmm1, %xmm0 33966; SSE2-NEXT: retq 33967; 33968; SSE3-LABEL: ugt_47_v2i64: 33969; SSE3: # %bb.0: 33970; SSE3-NEXT: movdqa %xmm0, %xmm1 33971; SSE3-NEXT: psrlw $1, %xmm1 33972; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33973; SSE3-NEXT: psubb %xmm1, %xmm0 33974; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 33975; SSE3-NEXT: movdqa %xmm0, %xmm2 33976; SSE3-NEXT: pand %xmm1, %xmm2 33977; SSE3-NEXT: psrlw $2, %xmm0 33978; SSE3-NEXT: pand %xmm1, %xmm0 33979; SSE3-NEXT: paddb %xmm2, %xmm0 33980; SSE3-NEXT: movdqa %xmm0, %xmm1 33981; SSE3-NEXT: psrlw $4, %xmm1 33982; SSE3-NEXT: paddb %xmm0, %xmm1 33983; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 33984; SSE3-NEXT: pxor %xmm0, %xmm0 33985; SSE3-NEXT: psadbw %xmm1, %xmm0 33986; SSE3-NEXT: por {{.*}}(%rip), %xmm0 33987; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 33988; SSE3-NEXT: movdqa %xmm0, %xmm2 33989; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 33990; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 33991; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 33992; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 33993; SSE3-NEXT: pand %xmm3, %xmm1 33994; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 33995; SSE3-NEXT: por %xmm1, %xmm0 33996; SSE3-NEXT: retq 33997; 33998; SSSE3-LABEL: ugt_47_v2i64: 33999; SSSE3: # %bb.0: 34000; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34001; SSSE3-NEXT: movdqa %xmm0, %xmm2 34002; SSSE3-NEXT: pand %xmm1, %xmm2 34003; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34004; SSSE3-NEXT: movdqa %xmm3, %xmm4 34005; SSSE3-NEXT: pshufb %xmm2, %xmm4 34006; SSSE3-NEXT: psrlw $4, %xmm0 34007; SSSE3-NEXT: pand %xmm1, %xmm0 34008; SSSE3-NEXT: pshufb %xmm0, %xmm3 34009; SSSE3-NEXT: paddb %xmm4, %xmm3 34010; SSSE3-NEXT: pxor %xmm0, %xmm0 34011; SSSE3-NEXT: psadbw %xmm3, %xmm0 34012; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 34013; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 34014; SSSE3-NEXT: movdqa %xmm0, %xmm2 34015; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 34016; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34017; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 34018; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34019; SSSE3-NEXT: pand %xmm3, %xmm1 34020; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34021; SSSE3-NEXT: por %xmm1, %xmm0 34022; SSSE3-NEXT: retq 34023; 34024; SSE41-LABEL: ugt_47_v2i64: 34025; SSE41: # %bb.0: 34026; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34027; SSE41-NEXT: movdqa %xmm0, %xmm2 34028; SSE41-NEXT: pand %xmm1, %xmm2 34029; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34030; SSE41-NEXT: movdqa %xmm3, %xmm4 34031; SSE41-NEXT: pshufb %xmm2, %xmm4 34032; SSE41-NEXT: psrlw $4, %xmm0 34033; SSE41-NEXT: pand %xmm1, %xmm0 34034; SSE41-NEXT: pshufb %xmm0, %xmm3 34035; SSE41-NEXT: paddb %xmm4, %xmm3 34036; SSE41-NEXT: pxor %xmm0, %xmm0 34037; SSE41-NEXT: psadbw %xmm3, %xmm0 34038; SSE41-NEXT: por {{.*}}(%rip), %xmm0 34039; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] 34040; SSE41-NEXT: movdqa %xmm0, %xmm2 34041; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 34042; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34043; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 34044; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34045; SSE41-NEXT: pand %xmm3, %xmm1 34046; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34047; SSE41-NEXT: por %xmm1, %xmm0 34048; SSE41-NEXT: retq 34049; 34050; AVX1-LABEL: ugt_47_v2i64: 34051; AVX1: # %bb.0: 34052; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34053; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 34054; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34055; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34056; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 34057; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 34058; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34059; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34060; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34061; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34062; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34063; AVX1-NEXT: retq 34064; 34065; AVX2-LABEL: ugt_47_v2i64: 34066; AVX2: # %bb.0: 34067; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34068; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 34069; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34070; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34071; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 34072; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 34073; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34074; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34075; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 34076; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34077; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34078; AVX2-NEXT: retq 34079; 34080; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64: 34081; AVX512VPOPCNTDQ: # %bb.0: 34082; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34083; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 34084; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34085; AVX512VPOPCNTDQ-NEXT: vzeroupper 34086; AVX512VPOPCNTDQ-NEXT: retq 34087; 34088; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64: 34089; AVX512VPOPCNTDQVL: # %bb.0: 34090; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 34091; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 34092; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34093; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34094; AVX512VPOPCNTDQVL-NEXT: retq 34095; 34096; BITALG_NOVLX-LABEL: ugt_47_v2i64: 34097; BITALG_NOVLX: # %bb.0: 34098; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34099; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 34100; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 34101; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34102; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34103; BITALG_NOVLX-NEXT: vzeroupper 34104; BITALG_NOVLX-NEXT: retq 34105; 34106; BITALG-LABEL: ugt_47_v2i64: 34107; BITALG: # %bb.0: 34108; BITALG-NEXT: vpopcntb %xmm0, %xmm0 34109; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 34110; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34111; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 34112; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34113; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34114; BITALG-NEXT: retq 34115 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 34116 %3 = icmp ugt <2 x i64> %2, <i64 47, i64 47> 34117 %4 = sext <2 x i1> %3 to <2 x i64> 34118 ret <2 x i64> %4 34119} 34120 34121define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { 34122; SSE2-LABEL: ult_48_v2i64: 34123; SSE2: # %bb.0: 34124; SSE2-NEXT: movdqa %xmm0, %xmm1 34125; SSE2-NEXT: psrlw $1, %xmm1 34126; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34127; SSE2-NEXT: psubb %xmm1, %xmm0 34128; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34129; SSE2-NEXT: movdqa %xmm0, %xmm2 34130; SSE2-NEXT: pand %xmm1, %xmm2 34131; SSE2-NEXT: psrlw $2, %xmm0 34132; SSE2-NEXT: pand %xmm1, %xmm0 34133; SSE2-NEXT: paddb %xmm2, %xmm0 34134; SSE2-NEXT: movdqa %xmm0, %xmm1 34135; SSE2-NEXT: psrlw $4, %xmm1 34136; SSE2-NEXT: paddb %xmm0, %xmm1 34137; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34138; SSE2-NEXT: pxor %xmm0, %xmm0 34139; SSE2-NEXT: psadbw %xmm1, %xmm0 34140; SSE2-NEXT: por {{.*}}(%rip), %xmm0 34141; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34142; SSE2-NEXT: movdqa %xmm1, %xmm2 34143; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 34144; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34145; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 34146; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34147; SSE2-NEXT: pand %xmm3, %xmm1 34148; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34149; SSE2-NEXT: por %xmm1, %xmm0 34150; SSE2-NEXT: retq 34151; 34152; SSE3-LABEL: ult_48_v2i64: 34153; SSE3: # %bb.0: 34154; SSE3-NEXT: movdqa %xmm0, %xmm1 34155; SSE3-NEXT: psrlw $1, %xmm1 34156; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34157; SSE3-NEXT: psubb %xmm1, %xmm0 34158; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34159; SSE3-NEXT: movdqa %xmm0, %xmm2 34160; SSE3-NEXT: pand %xmm1, %xmm2 34161; SSE3-NEXT: psrlw $2, %xmm0 34162; SSE3-NEXT: pand %xmm1, %xmm0 34163; SSE3-NEXT: paddb %xmm2, %xmm0 34164; SSE3-NEXT: movdqa %xmm0, %xmm1 34165; SSE3-NEXT: psrlw $4, %xmm1 34166; SSE3-NEXT: paddb %xmm0, %xmm1 34167; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34168; SSE3-NEXT: pxor %xmm0, %xmm0 34169; SSE3-NEXT: psadbw %xmm1, %xmm0 34170; SSE3-NEXT: por {{.*}}(%rip), %xmm0 34171; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34172; SSE3-NEXT: movdqa %xmm1, %xmm2 34173; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 34174; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34175; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 34176; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34177; SSE3-NEXT: pand %xmm3, %xmm1 34178; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34179; SSE3-NEXT: por %xmm1, %xmm0 34180; SSE3-NEXT: retq 34181; 34182; SSSE3-LABEL: ult_48_v2i64: 34183; SSSE3: # %bb.0: 34184; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34185; SSSE3-NEXT: movdqa %xmm0, %xmm2 34186; SSSE3-NEXT: pand %xmm1, %xmm2 34187; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34188; SSSE3-NEXT: movdqa %xmm3, %xmm4 34189; SSSE3-NEXT: pshufb %xmm2, %xmm4 34190; SSSE3-NEXT: psrlw $4, %xmm0 34191; SSSE3-NEXT: pand %xmm1, %xmm0 34192; SSSE3-NEXT: pshufb %xmm0, %xmm3 34193; SSSE3-NEXT: paddb %xmm4, %xmm3 34194; SSSE3-NEXT: pxor %xmm0, %xmm0 34195; SSSE3-NEXT: psadbw %xmm3, %xmm0 34196; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 34197; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34198; SSSE3-NEXT: movdqa %xmm1, %xmm2 34199; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 34200; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34201; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 34202; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34203; SSSE3-NEXT: pand %xmm3, %xmm1 34204; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34205; SSSE3-NEXT: por %xmm1, %xmm0 34206; SSSE3-NEXT: retq 34207; 34208; SSE41-LABEL: ult_48_v2i64: 34209; SSE41: # %bb.0: 34210; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34211; SSE41-NEXT: movdqa %xmm0, %xmm2 34212; SSE41-NEXT: pand %xmm1, %xmm2 34213; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34214; SSE41-NEXT: movdqa %xmm3, %xmm4 34215; SSE41-NEXT: pshufb %xmm2, %xmm4 34216; SSE41-NEXT: psrlw $4, %xmm0 34217; SSE41-NEXT: pand %xmm1, %xmm0 34218; SSE41-NEXT: pshufb %xmm0, %xmm3 34219; SSE41-NEXT: paddb %xmm4, %xmm3 34220; SSE41-NEXT: pxor %xmm0, %xmm0 34221; SSE41-NEXT: psadbw %xmm3, %xmm0 34222; SSE41-NEXT: por {{.*}}(%rip), %xmm0 34223; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34224; SSE41-NEXT: movdqa %xmm1, %xmm2 34225; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 34226; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34227; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 34228; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34229; SSE41-NEXT: pand %xmm3, %xmm1 34230; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34231; SSE41-NEXT: por %xmm1, %xmm0 34232; SSE41-NEXT: retq 34233; 34234; AVX1-LABEL: ult_48_v2i64: 34235; AVX1: # %bb.0: 34236; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34237; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 34238; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34239; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34240; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 34241; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 34242; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34243; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34244; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34245; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34246; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] 34247; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34248; AVX1-NEXT: retq 34249; 34250; AVX2-LABEL: ult_48_v2i64: 34251; AVX2: # %bb.0: 34252; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34253; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 34254; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34255; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34256; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 34257; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 34258; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34259; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34260; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 34261; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34262; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] 34263; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34264; AVX2-NEXT: retq 34265; 34266; AVX512VPOPCNTDQ-LABEL: ult_48_v2i64: 34267; AVX512VPOPCNTDQ: # %bb.0: 34268; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34269; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 34270; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] 34271; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34272; AVX512VPOPCNTDQ-NEXT: vzeroupper 34273; AVX512VPOPCNTDQ-NEXT: retq 34274; 34275; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64: 34276; AVX512VPOPCNTDQVL: # %bb.0: 34277; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 34278; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 34279; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34280; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34281; AVX512VPOPCNTDQVL-NEXT: retq 34282; 34283; BITALG_NOVLX-LABEL: ult_48_v2i64: 34284; BITALG_NOVLX: # %bb.0: 34285; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34286; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 34287; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 34288; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34289; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] 34290; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34291; BITALG_NOVLX-NEXT: vzeroupper 34292; BITALG_NOVLX-NEXT: retq 34293; 34294; BITALG-LABEL: ult_48_v2i64: 34295; BITALG: # %bb.0: 34296; BITALG-NEXT: vpopcntb %xmm0, %xmm0 34297; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 34298; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34299; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 34300; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34301; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34302; BITALG-NEXT: retq 34303 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 34304 %3 = icmp ult <2 x i64> %2, <i64 48, i64 48> 34305 %4 = sext <2 x i1> %3 to <2 x i64> 34306 ret <2 x i64> %4 34307} 34308 34309define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { 34310; SSE2-LABEL: ugt_48_v2i64: 34311; SSE2: # %bb.0: 34312; SSE2-NEXT: movdqa %xmm0, %xmm1 34313; SSE2-NEXT: psrlw $1, %xmm1 34314; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34315; SSE2-NEXT: psubb %xmm1, %xmm0 34316; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34317; SSE2-NEXT: movdqa %xmm0, %xmm2 34318; SSE2-NEXT: pand %xmm1, %xmm2 34319; SSE2-NEXT: psrlw $2, %xmm0 34320; SSE2-NEXT: pand %xmm1, %xmm0 34321; SSE2-NEXT: paddb %xmm2, %xmm0 34322; SSE2-NEXT: movdqa %xmm0, %xmm1 34323; SSE2-NEXT: psrlw $4, %xmm1 34324; SSE2-NEXT: paddb %xmm0, %xmm1 34325; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34326; SSE2-NEXT: pxor %xmm0, %xmm0 34327; SSE2-NEXT: psadbw %xmm1, %xmm0 34328; SSE2-NEXT: por {{.*}}(%rip), %xmm0 34329; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34330; SSE2-NEXT: movdqa %xmm0, %xmm2 34331; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 34332; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34333; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 34334; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34335; SSE2-NEXT: pand %xmm3, %xmm1 34336; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34337; SSE2-NEXT: por %xmm1, %xmm0 34338; SSE2-NEXT: retq 34339; 34340; SSE3-LABEL: ugt_48_v2i64: 34341; SSE3: # %bb.0: 34342; SSE3-NEXT: movdqa %xmm0, %xmm1 34343; SSE3-NEXT: psrlw $1, %xmm1 34344; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34345; SSE3-NEXT: psubb %xmm1, %xmm0 34346; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34347; SSE3-NEXT: movdqa %xmm0, %xmm2 34348; SSE3-NEXT: pand %xmm1, %xmm2 34349; SSE3-NEXT: psrlw $2, %xmm0 34350; SSE3-NEXT: pand %xmm1, %xmm0 34351; SSE3-NEXT: paddb %xmm2, %xmm0 34352; SSE3-NEXT: movdqa %xmm0, %xmm1 34353; SSE3-NEXT: psrlw $4, %xmm1 34354; SSE3-NEXT: paddb %xmm0, %xmm1 34355; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34356; SSE3-NEXT: pxor %xmm0, %xmm0 34357; SSE3-NEXT: psadbw %xmm1, %xmm0 34358; SSE3-NEXT: por {{.*}}(%rip), %xmm0 34359; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34360; SSE3-NEXT: movdqa %xmm0, %xmm2 34361; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 34362; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34363; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 34364; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34365; SSE3-NEXT: pand %xmm3, %xmm1 34366; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34367; SSE3-NEXT: por %xmm1, %xmm0 34368; SSE3-NEXT: retq 34369; 34370; SSSE3-LABEL: ugt_48_v2i64: 34371; SSSE3: # %bb.0: 34372; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34373; SSSE3-NEXT: movdqa %xmm0, %xmm2 34374; SSSE3-NEXT: pand %xmm1, %xmm2 34375; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34376; SSSE3-NEXT: movdqa %xmm3, %xmm4 34377; SSSE3-NEXT: pshufb %xmm2, %xmm4 34378; SSSE3-NEXT: psrlw $4, %xmm0 34379; SSSE3-NEXT: pand %xmm1, %xmm0 34380; SSSE3-NEXT: pshufb %xmm0, %xmm3 34381; SSSE3-NEXT: paddb %xmm4, %xmm3 34382; SSSE3-NEXT: pxor %xmm0, %xmm0 34383; SSSE3-NEXT: psadbw %xmm3, %xmm0 34384; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 34385; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34386; SSSE3-NEXT: movdqa %xmm0, %xmm2 34387; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 34388; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34389; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 34390; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34391; SSSE3-NEXT: pand %xmm3, %xmm1 34392; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34393; SSSE3-NEXT: por %xmm1, %xmm0 34394; SSSE3-NEXT: retq 34395; 34396; SSE41-LABEL: ugt_48_v2i64: 34397; SSE41: # %bb.0: 34398; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34399; SSE41-NEXT: movdqa %xmm0, %xmm2 34400; SSE41-NEXT: pand %xmm1, %xmm2 34401; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34402; SSE41-NEXT: movdqa %xmm3, %xmm4 34403; SSE41-NEXT: pshufb %xmm2, %xmm4 34404; SSE41-NEXT: psrlw $4, %xmm0 34405; SSE41-NEXT: pand %xmm1, %xmm0 34406; SSE41-NEXT: pshufb %xmm0, %xmm3 34407; SSE41-NEXT: paddb %xmm4, %xmm3 34408; SSE41-NEXT: pxor %xmm0, %xmm0 34409; SSE41-NEXT: psadbw %xmm3, %xmm0 34410; SSE41-NEXT: por {{.*}}(%rip), %xmm0 34411; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] 34412; SSE41-NEXT: movdqa %xmm0, %xmm2 34413; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 34414; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34415; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 34416; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34417; SSE41-NEXT: pand %xmm3, %xmm1 34418; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34419; SSE41-NEXT: por %xmm1, %xmm0 34420; SSE41-NEXT: retq 34421; 34422; AVX1-LABEL: ugt_48_v2i64: 34423; AVX1: # %bb.0: 34424; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34425; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 34426; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34427; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34428; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 34429; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 34430; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34431; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34432; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34433; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34434; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34435; AVX1-NEXT: retq 34436; 34437; AVX2-LABEL: ugt_48_v2i64: 34438; AVX2: # %bb.0: 34439; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34440; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 34441; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34442; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34443; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 34444; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 34445; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34446; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34447; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 34448; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34449; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34450; AVX2-NEXT: retq 34451; 34452; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64: 34453; AVX512VPOPCNTDQ: # %bb.0: 34454; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34455; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 34456; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34457; AVX512VPOPCNTDQ-NEXT: vzeroupper 34458; AVX512VPOPCNTDQ-NEXT: retq 34459; 34460; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64: 34461; AVX512VPOPCNTDQVL: # %bb.0: 34462; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 34463; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 34464; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34465; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34466; AVX512VPOPCNTDQVL-NEXT: retq 34467; 34468; BITALG_NOVLX-LABEL: ugt_48_v2i64: 34469; BITALG_NOVLX: # %bb.0: 34470; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34471; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 34472; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 34473; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34474; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34475; BITALG_NOVLX-NEXT: vzeroupper 34476; BITALG_NOVLX-NEXT: retq 34477; 34478; BITALG-LABEL: ugt_48_v2i64: 34479; BITALG: # %bb.0: 34480; BITALG-NEXT: vpopcntb %xmm0, %xmm0 34481; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 34482; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34483; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 34484; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34485; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34486; BITALG-NEXT: retq 34487 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 34488 %3 = icmp ugt <2 x i64> %2, <i64 48, i64 48> 34489 %4 = sext <2 x i1> %3 to <2 x i64> 34490 ret <2 x i64> %4 34491} 34492 34493define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { 34494; SSE2-LABEL: ult_49_v2i64: 34495; SSE2: # %bb.0: 34496; SSE2-NEXT: movdqa %xmm0, %xmm1 34497; SSE2-NEXT: psrlw $1, %xmm1 34498; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34499; SSE2-NEXT: psubb %xmm1, %xmm0 34500; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34501; SSE2-NEXT: movdqa %xmm0, %xmm2 34502; SSE2-NEXT: pand %xmm1, %xmm2 34503; SSE2-NEXT: psrlw $2, %xmm0 34504; SSE2-NEXT: pand %xmm1, %xmm0 34505; SSE2-NEXT: paddb %xmm2, %xmm0 34506; SSE2-NEXT: movdqa %xmm0, %xmm1 34507; SSE2-NEXT: psrlw $4, %xmm1 34508; SSE2-NEXT: paddb %xmm0, %xmm1 34509; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34510; SSE2-NEXT: pxor %xmm0, %xmm0 34511; SSE2-NEXT: psadbw %xmm1, %xmm0 34512; SSE2-NEXT: por {{.*}}(%rip), %xmm0 34513; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34514; SSE2-NEXT: movdqa %xmm1, %xmm2 34515; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 34516; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34517; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 34518; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34519; SSE2-NEXT: pand %xmm3, %xmm1 34520; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34521; SSE2-NEXT: por %xmm1, %xmm0 34522; SSE2-NEXT: retq 34523; 34524; SSE3-LABEL: ult_49_v2i64: 34525; SSE3: # %bb.0: 34526; SSE3-NEXT: movdqa %xmm0, %xmm1 34527; SSE3-NEXT: psrlw $1, %xmm1 34528; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34529; SSE3-NEXT: psubb %xmm1, %xmm0 34530; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34531; SSE3-NEXT: movdqa %xmm0, %xmm2 34532; SSE3-NEXT: pand %xmm1, %xmm2 34533; SSE3-NEXT: psrlw $2, %xmm0 34534; SSE3-NEXT: pand %xmm1, %xmm0 34535; SSE3-NEXT: paddb %xmm2, %xmm0 34536; SSE3-NEXT: movdqa %xmm0, %xmm1 34537; SSE3-NEXT: psrlw $4, %xmm1 34538; SSE3-NEXT: paddb %xmm0, %xmm1 34539; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34540; SSE3-NEXT: pxor %xmm0, %xmm0 34541; SSE3-NEXT: psadbw %xmm1, %xmm0 34542; SSE3-NEXT: por {{.*}}(%rip), %xmm0 34543; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34544; SSE3-NEXT: movdqa %xmm1, %xmm2 34545; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 34546; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34547; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 34548; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34549; SSE3-NEXT: pand %xmm3, %xmm1 34550; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34551; SSE3-NEXT: por %xmm1, %xmm0 34552; SSE3-NEXT: retq 34553; 34554; SSSE3-LABEL: ult_49_v2i64: 34555; SSSE3: # %bb.0: 34556; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34557; SSSE3-NEXT: movdqa %xmm0, %xmm2 34558; SSSE3-NEXT: pand %xmm1, %xmm2 34559; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34560; SSSE3-NEXT: movdqa %xmm3, %xmm4 34561; SSSE3-NEXT: pshufb %xmm2, %xmm4 34562; SSSE3-NEXT: psrlw $4, %xmm0 34563; SSSE3-NEXT: pand %xmm1, %xmm0 34564; SSSE3-NEXT: pshufb %xmm0, %xmm3 34565; SSSE3-NEXT: paddb %xmm4, %xmm3 34566; SSSE3-NEXT: pxor %xmm0, %xmm0 34567; SSSE3-NEXT: psadbw %xmm3, %xmm0 34568; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 34569; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34570; SSSE3-NEXT: movdqa %xmm1, %xmm2 34571; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 34572; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34573; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 34574; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34575; SSSE3-NEXT: pand %xmm3, %xmm1 34576; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34577; SSSE3-NEXT: por %xmm1, %xmm0 34578; SSSE3-NEXT: retq 34579; 34580; SSE41-LABEL: ult_49_v2i64: 34581; SSE41: # %bb.0: 34582; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34583; SSE41-NEXT: movdqa %xmm0, %xmm2 34584; SSE41-NEXT: pand %xmm1, %xmm2 34585; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34586; SSE41-NEXT: movdqa %xmm3, %xmm4 34587; SSE41-NEXT: pshufb %xmm2, %xmm4 34588; SSE41-NEXT: psrlw $4, %xmm0 34589; SSE41-NEXT: pand %xmm1, %xmm0 34590; SSE41-NEXT: pshufb %xmm0, %xmm3 34591; SSE41-NEXT: paddb %xmm4, %xmm3 34592; SSE41-NEXT: pxor %xmm0, %xmm0 34593; SSE41-NEXT: psadbw %xmm3, %xmm0 34594; SSE41-NEXT: por {{.*}}(%rip), %xmm0 34595; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34596; SSE41-NEXT: movdqa %xmm1, %xmm2 34597; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 34598; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34599; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 34600; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34601; SSE41-NEXT: pand %xmm3, %xmm1 34602; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34603; SSE41-NEXT: por %xmm1, %xmm0 34604; SSE41-NEXT: retq 34605; 34606; AVX1-LABEL: ult_49_v2i64: 34607; AVX1: # %bb.0: 34608; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34609; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 34610; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34611; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34612; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 34613; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 34614; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34615; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34616; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34617; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34618; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] 34619; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34620; AVX1-NEXT: retq 34621; 34622; AVX2-LABEL: ult_49_v2i64: 34623; AVX2: # %bb.0: 34624; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34625; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 34626; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34627; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34628; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 34629; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 34630; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34631; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34632; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 34633; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34634; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] 34635; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34636; AVX2-NEXT: retq 34637; 34638; AVX512VPOPCNTDQ-LABEL: ult_49_v2i64: 34639; AVX512VPOPCNTDQ: # %bb.0: 34640; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34641; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 34642; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] 34643; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34644; AVX512VPOPCNTDQ-NEXT: vzeroupper 34645; AVX512VPOPCNTDQ-NEXT: retq 34646; 34647; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64: 34648; AVX512VPOPCNTDQVL: # %bb.0: 34649; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 34650; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 34651; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34652; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34653; AVX512VPOPCNTDQVL-NEXT: retq 34654; 34655; BITALG_NOVLX-LABEL: ult_49_v2i64: 34656; BITALG_NOVLX: # %bb.0: 34657; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34658; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 34659; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 34660; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34661; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] 34662; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34663; BITALG_NOVLX-NEXT: vzeroupper 34664; BITALG_NOVLX-NEXT: retq 34665; 34666; BITALG-LABEL: ult_49_v2i64: 34667; BITALG: # %bb.0: 34668; BITALG-NEXT: vpopcntb %xmm0, %xmm0 34669; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 34670; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34671; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 34672; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34673; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34674; BITALG-NEXT: retq 34675 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 34676 %3 = icmp ult <2 x i64> %2, <i64 49, i64 49> 34677 %4 = sext <2 x i1> %3 to <2 x i64> 34678 ret <2 x i64> %4 34679} 34680 34681define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { 34682; SSE2-LABEL: ugt_49_v2i64: 34683; SSE2: # %bb.0: 34684; SSE2-NEXT: movdqa %xmm0, %xmm1 34685; SSE2-NEXT: psrlw $1, %xmm1 34686; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34687; SSE2-NEXT: psubb %xmm1, %xmm0 34688; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34689; SSE2-NEXT: movdqa %xmm0, %xmm2 34690; SSE2-NEXT: pand %xmm1, %xmm2 34691; SSE2-NEXT: psrlw $2, %xmm0 34692; SSE2-NEXT: pand %xmm1, %xmm0 34693; SSE2-NEXT: paddb %xmm2, %xmm0 34694; SSE2-NEXT: movdqa %xmm0, %xmm1 34695; SSE2-NEXT: psrlw $4, %xmm1 34696; SSE2-NEXT: paddb %xmm0, %xmm1 34697; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34698; SSE2-NEXT: pxor %xmm0, %xmm0 34699; SSE2-NEXT: psadbw %xmm1, %xmm0 34700; SSE2-NEXT: por {{.*}}(%rip), %xmm0 34701; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34702; SSE2-NEXT: movdqa %xmm0, %xmm2 34703; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 34704; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34705; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 34706; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34707; SSE2-NEXT: pand %xmm3, %xmm1 34708; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34709; SSE2-NEXT: por %xmm1, %xmm0 34710; SSE2-NEXT: retq 34711; 34712; SSE3-LABEL: ugt_49_v2i64: 34713; SSE3: # %bb.0: 34714; SSE3-NEXT: movdqa %xmm0, %xmm1 34715; SSE3-NEXT: psrlw $1, %xmm1 34716; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34717; SSE3-NEXT: psubb %xmm1, %xmm0 34718; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34719; SSE3-NEXT: movdqa %xmm0, %xmm2 34720; SSE3-NEXT: pand %xmm1, %xmm2 34721; SSE3-NEXT: psrlw $2, %xmm0 34722; SSE3-NEXT: pand %xmm1, %xmm0 34723; SSE3-NEXT: paddb %xmm2, %xmm0 34724; SSE3-NEXT: movdqa %xmm0, %xmm1 34725; SSE3-NEXT: psrlw $4, %xmm1 34726; SSE3-NEXT: paddb %xmm0, %xmm1 34727; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34728; SSE3-NEXT: pxor %xmm0, %xmm0 34729; SSE3-NEXT: psadbw %xmm1, %xmm0 34730; SSE3-NEXT: por {{.*}}(%rip), %xmm0 34731; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34732; SSE3-NEXT: movdqa %xmm0, %xmm2 34733; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 34734; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34735; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 34736; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34737; SSE3-NEXT: pand %xmm3, %xmm1 34738; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34739; SSE3-NEXT: por %xmm1, %xmm0 34740; SSE3-NEXT: retq 34741; 34742; SSSE3-LABEL: ugt_49_v2i64: 34743; SSSE3: # %bb.0: 34744; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34745; SSSE3-NEXT: movdqa %xmm0, %xmm2 34746; SSSE3-NEXT: pand %xmm1, %xmm2 34747; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34748; SSSE3-NEXT: movdqa %xmm3, %xmm4 34749; SSSE3-NEXT: pshufb %xmm2, %xmm4 34750; SSSE3-NEXT: psrlw $4, %xmm0 34751; SSSE3-NEXT: pand %xmm1, %xmm0 34752; SSSE3-NEXT: pshufb %xmm0, %xmm3 34753; SSSE3-NEXT: paddb %xmm4, %xmm3 34754; SSSE3-NEXT: pxor %xmm0, %xmm0 34755; SSSE3-NEXT: psadbw %xmm3, %xmm0 34756; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 34757; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34758; SSSE3-NEXT: movdqa %xmm0, %xmm2 34759; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 34760; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34761; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 34762; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34763; SSSE3-NEXT: pand %xmm3, %xmm1 34764; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34765; SSSE3-NEXT: por %xmm1, %xmm0 34766; SSSE3-NEXT: retq 34767; 34768; SSE41-LABEL: ugt_49_v2i64: 34769; SSE41: # %bb.0: 34770; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34771; SSE41-NEXT: movdqa %xmm0, %xmm2 34772; SSE41-NEXT: pand %xmm1, %xmm2 34773; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34774; SSE41-NEXT: movdqa %xmm3, %xmm4 34775; SSE41-NEXT: pshufb %xmm2, %xmm4 34776; SSE41-NEXT: psrlw $4, %xmm0 34777; SSE41-NEXT: pand %xmm1, %xmm0 34778; SSE41-NEXT: pshufb %xmm0, %xmm3 34779; SSE41-NEXT: paddb %xmm4, %xmm3 34780; SSE41-NEXT: pxor %xmm0, %xmm0 34781; SSE41-NEXT: psadbw %xmm3, %xmm0 34782; SSE41-NEXT: por {{.*}}(%rip), %xmm0 34783; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] 34784; SSE41-NEXT: movdqa %xmm0, %xmm2 34785; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 34786; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34787; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 34788; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34789; SSE41-NEXT: pand %xmm3, %xmm1 34790; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34791; SSE41-NEXT: por %xmm1, %xmm0 34792; SSE41-NEXT: retq 34793; 34794; AVX1-LABEL: ugt_49_v2i64: 34795; AVX1: # %bb.0: 34796; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34797; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 34798; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34799; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34800; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 34801; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 34802; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34803; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34804; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34805; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34806; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34807; AVX1-NEXT: retq 34808; 34809; AVX2-LABEL: ugt_49_v2i64: 34810; AVX2: # %bb.0: 34811; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34812; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 34813; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34814; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34815; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 34816; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 34817; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34818; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34819; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 34820; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34821; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34822; AVX2-NEXT: retq 34823; 34824; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64: 34825; AVX512VPOPCNTDQ: # %bb.0: 34826; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34827; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 34828; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34829; AVX512VPOPCNTDQ-NEXT: vzeroupper 34830; AVX512VPOPCNTDQ-NEXT: retq 34831; 34832; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64: 34833; AVX512VPOPCNTDQVL: # %bb.0: 34834; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 34835; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 34836; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34837; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34838; AVX512VPOPCNTDQVL-NEXT: retq 34839; 34840; BITALG_NOVLX-LABEL: ugt_49_v2i64: 34841; BITALG_NOVLX: # %bb.0: 34842; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 34843; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 34844; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 34845; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34846; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 34847; BITALG_NOVLX-NEXT: vzeroupper 34848; BITALG_NOVLX-NEXT: retq 34849; 34850; BITALG-LABEL: ugt_49_v2i64: 34851; BITALG: # %bb.0: 34852; BITALG-NEXT: vpopcntb %xmm0, %xmm0 34853; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 34854; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34855; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 34856; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 34857; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 34858; BITALG-NEXT: retq 34859 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 34860 %3 = icmp ugt <2 x i64> %2, <i64 49, i64 49> 34861 %4 = sext <2 x i1> %3 to <2 x i64> 34862 ret <2 x i64> %4 34863} 34864 34865define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { 34866; SSE2-LABEL: ult_50_v2i64: 34867; SSE2: # %bb.0: 34868; SSE2-NEXT: movdqa %xmm0, %xmm1 34869; SSE2-NEXT: psrlw $1, %xmm1 34870; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34871; SSE2-NEXT: psubb %xmm1, %xmm0 34872; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34873; SSE2-NEXT: movdqa %xmm0, %xmm2 34874; SSE2-NEXT: pand %xmm1, %xmm2 34875; SSE2-NEXT: psrlw $2, %xmm0 34876; SSE2-NEXT: pand %xmm1, %xmm0 34877; SSE2-NEXT: paddb %xmm2, %xmm0 34878; SSE2-NEXT: movdqa %xmm0, %xmm1 34879; SSE2-NEXT: psrlw $4, %xmm1 34880; SSE2-NEXT: paddb %xmm0, %xmm1 34881; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 34882; SSE2-NEXT: pxor %xmm0, %xmm0 34883; SSE2-NEXT: psadbw %xmm1, %xmm0 34884; SSE2-NEXT: por {{.*}}(%rip), %xmm0 34885; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 34886; SSE2-NEXT: movdqa %xmm1, %xmm2 34887; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 34888; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34889; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 34890; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34891; SSE2-NEXT: pand %xmm3, %xmm1 34892; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34893; SSE2-NEXT: por %xmm1, %xmm0 34894; SSE2-NEXT: retq 34895; 34896; SSE3-LABEL: ult_50_v2i64: 34897; SSE3: # %bb.0: 34898; SSE3-NEXT: movdqa %xmm0, %xmm1 34899; SSE3-NEXT: psrlw $1, %xmm1 34900; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34901; SSE3-NEXT: psubb %xmm1, %xmm0 34902; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 34903; SSE3-NEXT: movdqa %xmm0, %xmm2 34904; SSE3-NEXT: pand %xmm1, %xmm2 34905; SSE3-NEXT: psrlw $2, %xmm0 34906; SSE3-NEXT: pand %xmm1, %xmm0 34907; SSE3-NEXT: paddb %xmm2, %xmm0 34908; SSE3-NEXT: movdqa %xmm0, %xmm1 34909; SSE3-NEXT: psrlw $4, %xmm1 34910; SSE3-NEXT: paddb %xmm0, %xmm1 34911; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 34912; SSE3-NEXT: pxor %xmm0, %xmm0 34913; SSE3-NEXT: psadbw %xmm1, %xmm0 34914; SSE3-NEXT: por {{.*}}(%rip), %xmm0 34915; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 34916; SSE3-NEXT: movdqa %xmm1, %xmm2 34917; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 34918; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34919; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 34920; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34921; SSE3-NEXT: pand %xmm3, %xmm1 34922; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34923; SSE3-NEXT: por %xmm1, %xmm0 34924; SSE3-NEXT: retq 34925; 34926; SSSE3-LABEL: ult_50_v2i64: 34927; SSSE3: # %bb.0: 34928; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34929; SSSE3-NEXT: movdqa %xmm0, %xmm2 34930; SSSE3-NEXT: pand %xmm1, %xmm2 34931; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34932; SSSE3-NEXT: movdqa %xmm3, %xmm4 34933; SSSE3-NEXT: pshufb %xmm2, %xmm4 34934; SSSE3-NEXT: psrlw $4, %xmm0 34935; SSSE3-NEXT: pand %xmm1, %xmm0 34936; SSSE3-NEXT: pshufb %xmm0, %xmm3 34937; SSSE3-NEXT: paddb %xmm4, %xmm3 34938; SSSE3-NEXT: pxor %xmm0, %xmm0 34939; SSSE3-NEXT: psadbw %xmm3, %xmm0 34940; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 34941; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 34942; SSSE3-NEXT: movdqa %xmm1, %xmm2 34943; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 34944; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34945; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 34946; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34947; SSSE3-NEXT: pand %xmm3, %xmm1 34948; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34949; SSSE3-NEXT: por %xmm1, %xmm0 34950; SSSE3-NEXT: retq 34951; 34952; SSE41-LABEL: ult_50_v2i64: 34953; SSE41: # %bb.0: 34954; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34955; SSE41-NEXT: movdqa %xmm0, %xmm2 34956; SSE41-NEXT: pand %xmm1, %xmm2 34957; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34958; SSE41-NEXT: movdqa %xmm3, %xmm4 34959; SSE41-NEXT: pshufb %xmm2, %xmm4 34960; SSE41-NEXT: psrlw $4, %xmm0 34961; SSE41-NEXT: pand %xmm1, %xmm0 34962; SSE41-NEXT: pshufb %xmm0, %xmm3 34963; SSE41-NEXT: paddb %xmm4, %xmm3 34964; SSE41-NEXT: pxor %xmm0, %xmm0 34965; SSE41-NEXT: psadbw %xmm3, %xmm0 34966; SSE41-NEXT: por {{.*}}(%rip), %xmm0 34967; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 34968; SSE41-NEXT: movdqa %xmm1, %xmm2 34969; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 34970; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 34971; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 34972; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 34973; SSE41-NEXT: pand %xmm3, %xmm1 34974; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 34975; SSE41-NEXT: por %xmm1, %xmm0 34976; SSE41-NEXT: retq 34977; 34978; AVX1-LABEL: ult_50_v2i64: 34979; AVX1: # %bb.0: 34980; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34981; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 34982; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34983; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 34984; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 34985; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 34986; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 34987; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 34988; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34989; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 34990; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] 34991; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 34992; AVX1-NEXT: retq 34993; 34994; AVX2-LABEL: ult_50_v2i64: 34995; AVX2: # %bb.0: 34996; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34997; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 34998; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 34999; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35000; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 35001; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 35002; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35003; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35004; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 35005; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35006; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] 35007; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35008; AVX2-NEXT: retq 35009; 35010; AVX512VPOPCNTDQ-LABEL: ult_50_v2i64: 35011; AVX512VPOPCNTDQ: # %bb.0: 35012; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35013; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 35014; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] 35015; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35016; AVX512VPOPCNTDQ-NEXT: vzeroupper 35017; AVX512VPOPCNTDQ-NEXT: retq 35018; 35019; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64: 35020; AVX512VPOPCNTDQVL: # %bb.0: 35021; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 35022; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 35023; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35024; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35025; AVX512VPOPCNTDQVL-NEXT: retq 35026; 35027; BITALG_NOVLX-LABEL: ult_50_v2i64: 35028; BITALG_NOVLX: # %bb.0: 35029; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35030; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 35031; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 35032; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35033; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] 35034; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35035; BITALG_NOVLX-NEXT: vzeroupper 35036; BITALG_NOVLX-NEXT: retq 35037; 35038; BITALG-LABEL: ult_50_v2i64: 35039; BITALG: # %bb.0: 35040; BITALG-NEXT: vpopcntb %xmm0, %xmm0 35041; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 35042; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35043; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 35044; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35045; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35046; BITALG-NEXT: retq 35047 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 35048 %3 = icmp ult <2 x i64> %2, <i64 50, i64 50> 35049 %4 = sext <2 x i1> %3 to <2 x i64> 35050 ret <2 x i64> %4 35051} 35052 35053define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { 35054; SSE2-LABEL: ugt_50_v2i64: 35055; SSE2: # %bb.0: 35056; SSE2-NEXT: movdqa %xmm0, %xmm1 35057; SSE2-NEXT: psrlw $1, %xmm1 35058; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35059; SSE2-NEXT: psubb %xmm1, %xmm0 35060; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35061; SSE2-NEXT: movdqa %xmm0, %xmm2 35062; SSE2-NEXT: pand %xmm1, %xmm2 35063; SSE2-NEXT: psrlw $2, %xmm0 35064; SSE2-NEXT: pand %xmm1, %xmm0 35065; SSE2-NEXT: paddb %xmm2, %xmm0 35066; SSE2-NEXT: movdqa %xmm0, %xmm1 35067; SSE2-NEXT: psrlw $4, %xmm1 35068; SSE2-NEXT: paddb %xmm0, %xmm1 35069; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35070; SSE2-NEXT: pxor %xmm0, %xmm0 35071; SSE2-NEXT: psadbw %xmm1, %xmm0 35072; SSE2-NEXT: por {{.*}}(%rip), %xmm0 35073; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 35074; SSE2-NEXT: movdqa %xmm0, %xmm2 35075; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 35076; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35077; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 35078; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35079; SSE2-NEXT: pand %xmm3, %xmm1 35080; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35081; SSE2-NEXT: por %xmm1, %xmm0 35082; SSE2-NEXT: retq 35083; 35084; SSE3-LABEL: ugt_50_v2i64: 35085; SSE3: # %bb.0: 35086; SSE3-NEXT: movdqa %xmm0, %xmm1 35087; SSE3-NEXT: psrlw $1, %xmm1 35088; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35089; SSE3-NEXT: psubb %xmm1, %xmm0 35090; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35091; SSE3-NEXT: movdqa %xmm0, %xmm2 35092; SSE3-NEXT: pand %xmm1, %xmm2 35093; SSE3-NEXT: psrlw $2, %xmm0 35094; SSE3-NEXT: pand %xmm1, %xmm0 35095; SSE3-NEXT: paddb %xmm2, %xmm0 35096; SSE3-NEXT: movdqa %xmm0, %xmm1 35097; SSE3-NEXT: psrlw $4, %xmm1 35098; SSE3-NEXT: paddb %xmm0, %xmm1 35099; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35100; SSE3-NEXT: pxor %xmm0, %xmm0 35101; SSE3-NEXT: psadbw %xmm1, %xmm0 35102; SSE3-NEXT: por {{.*}}(%rip), %xmm0 35103; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 35104; SSE3-NEXT: movdqa %xmm0, %xmm2 35105; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 35106; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35107; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 35108; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35109; SSE3-NEXT: pand %xmm3, %xmm1 35110; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35111; SSE3-NEXT: por %xmm1, %xmm0 35112; SSE3-NEXT: retq 35113; 35114; SSSE3-LABEL: ugt_50_v2i64: 35115; SSSE3: # %bb.0: 35116; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35117; SSSE3-NEXT: movdqa %xmm0, %xmm2 35118; SSSE3-NEXT: pand %xmm1, %xmm2 35119; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35120; SSSE3-NEXT: movdqa %xmm3, %xmm4 35121; SSSE3-NEXT: pshufb %xmm2, %xmm4 35122; SSSE3-NEXT: psrlw $4, %xmm0 35123; SSSE3-NEXT: pand %xmm1, %xmm0 35124; SSSE3-NEXT: pshufb %xmm0, %xmm3 35125; SSSE3-NEXT: paddb %xmm4, %xmm3 35126; SSSE3-NEXT: pxor %xmm0, %xmm0 35127; SSSE3-NEXT: psadbw %xmm3, %xmm0 35128; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 35129; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 35130; SSSE3-NEXT: movdqa %xmm0, %xmm2 35131; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 35132; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35133; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 35134; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35135; SSSE3-NEXT: pand %xmm3, %xmm1 35136; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35137; SSSE3-NEXT: por %xmm1, %xmm0 35138; SSSE3-NEXT: retq 35139; 35140; SSE41-LABEL: ugt_50_v2i64: 35141; SSE41: # %bb.0: 35142; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35143; SSE41-NEXT: movdqa %xmm0, %xmm2 35144; SSE41-NEXT: pand %xmm1, %xmm2 35145; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35146; SSE41-NEXT: movdqa %xmm3, %xmm4 35147; SSE41-NEXT: pshufb %xmm2, %xmm4 35148; SSE41-NEXT: psrlw $4, %xmm0 35149; SSE41-NEXT: pand %xmm1, %xmm0 35150; SSE41-NEXT: pshufb %xmm0, %xmm3 35151; SSE41-NEXT: paddb %xmm4, %xmm3 35152; SSE41-NEXT: pxor %xmm0, %xmm0 35153; SSE41-NEXT: psadbw %xmm3, %xmm0 35154; SSE41-NEXT: por {{.*}}(%rip), %xmm0 35155; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] 35156; SSE41-NEXT: movdqa %xmm0, %xmm2 35157; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 35158; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35159; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 35160; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35161; SSE41-NEXT: pand %xmm3, %xmm1 35162; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35163; SSE41-NEXT: por %xmm1, %xmm0 35164; SSE41-NEXT: retq 35165; 35166; AVX1-LABEL: ugt_50_v2i64: 35167; AVX1: # %bb.0: 35168; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35169; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 35170; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35171; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35172; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 35173; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35174; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35175; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35176; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 35177; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35178; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35179; AVX1-NEXT: retq 35180; 35181; AVX2-LABEL: ugt_50_v2i64: 35182; AVX2: # %bb.0: 35183; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35184; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 35185; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35186; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35187; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 35188; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 35189; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35190; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35191; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 35192; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35193; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35194; AVX2-NEXT: retq 35195; 35196; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64: 35197; AVX512VPOPCNTDQ: # %bb.0: 35198; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35199; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 35200; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35201; AVX512VPOPCNTDQ-NEXT: vzeroupper 35202; AVX512VPOPCNTDQ-NEXT: retq 35203; 35204; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64: 35205; AVX512VPOPCNTDQVL: # %bb.0: 35206; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 35207; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 35208; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35209; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35210; AVX512VPOPCNTDQVL-NEXT: retq 35211; 35212; BITALG_NOVLX-LABEL: ugt_50_v2i64: 35213; BITALG_NOVLX: # %bb.0: 35214; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35215; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 35216; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 35217; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35218; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35219; BITALG_NOVLX-NEXT: vzeroupper 35220; BITALG_NOVLX-NEXT: retq 35221; 35222; BITALG-LABEL: ugt_50_v2i64: 35223; BITALG: # %bb.0: 35224; BITALG-NEXT: vpopcntb %xmm0, %xmm0 35225; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 35226; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35227; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 35228; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35229; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35230; BITALG-NEXT: retq 35231 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 35232 %3 = icmp ugt <2 x i64> %2, <i64 50, i64 50> 35233 %4 = sext <2 x i1> %3 to <2 x i64> 35234 ret <2 x i64> %4 35235} 35236 35237define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { 35238; SSE2-LABEL: ult_51_v2i64: 35239; SSE2: # %bb.0: 35240; SSE2-NEXT: movdqa %xmm0, %xmm1 35241; SSE2-NEXT: psrlw $1, %xmm1 35242; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35243; SSE2-NEXT: psubb %xmm1, %xmm0 35244; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35245; SSE2-NEXT: movdqa %xmm0, %xmm2 35246; SSE2-NEXT: pand %xmm1, %xmm2 35247; SSE2-NEXT: psrlw $2, %xmm0 35248; SSE2-NEXT: pand %xmm1, %xmm0 35249; SSE2-NEXT: paddb %xmm2, %xmm0 35250; SSE2-NEXT: movdqa %xmm0, %xmm1 35251; SSE2-NEXT: psrlw $4, %xmm1 35252; SSE2-NEXT: paddb %xmm0, %xmm1 35253; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35254; SSE2-NEXT: pxor %xmm0, %xmm0 35255; SSE2-NEXT: psadbw %xmm1, %xmm0 35256; SSE2-NEXT: por {{.*}}(%rip), %xmm0 35257; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35258; SSE2-NEXT: movdqa %xmm1, %xmm2 35259; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 35260; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35261; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 35262; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35263; SSE2-NEXT: pand %xmm3, %xmm1 35264; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35265; SSE2-NEXT: por %xmm1, %xmm0 35266; SSE2-NEXT: retq 35267; 35268; SSE3-LABEL: ult_51_v2i64: 35269; SSE3: # %bb.0: 35270; SSE3-NEXT: movdqa %xmm0, %xmm1 35271; SSE3-NEXT: psrlw $1, %xmm1 35272; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35273; SSE3-NEXT: psubb %xmm1, %xmm0 35274; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35275; SSE3-NEXT: movdqa %xmm0, %xmm2 35276; SSE3-NEXT: pand %xmm1, %xmm2 35277; SSE3-NEXT: psrlw $2, %xmm0 35278; SSE3-NEXT: pand %xmm1, %xmm0 35279; SSE3-NEXT: paddb %xmm2, %xmm0 35280; SSE3-NEXT: movdqa %xmm0, %xmm1 35281; SSE3-NEXT: psrlw $4, %xmm1 35282; SSE3-NEXT: paddb %xmm0, %xmm1 35283; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35284; SSE3-NEXT: pxor %xmm0, %xmm0 35285; SSE3-NEXT: psadbw %xmm1, %xmm0 35286; SSE3-NEXT: por {{.*}}(%rip), %xmm0 35287; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35288; SSE3-NEXT: movdqa %xmm1, %xmm2 35289; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 35290; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35291; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 35292; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35293; SSE3-NEXT: pand %xmm3, %xmm1 35294; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35295; SSE3-NEXT: por %xmm1, %xmm0 35296; SSE3-NEXT: retq 35297; 35298; SSSE3-LABEL: ult_51_v2i64: 35299; SSSE3: # %bb.0: 35300; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35301; SSSE3-NEXT: movdqa %xmm0, %xmm2 35302; SSSE3-NEXT: pand %xmm1, %xmm2 35303; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35304; SSSE3-NEXT: movdqa %xmm3, %xmm4 35305; SSSE3-NEXT: pshufb %xmm2, %xmm4 35306; SSSE3-NEXT: psrlw $4, %xmm0 35307; SSSE3-NEXT: pand %xmm1, %xmm0 35308; SSSE3-NEXT: pshufb %xmm0, %xmm3 35309; SSSE3-NEXT: paddb %xmm4, %xmm3 35310; SSSE3-NEXT: pxor %xmm0, %xmm0 35311; SSSE3-NEXT: psadbw %xmm3, %xmm0 35312; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 35313; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35314; SSSE3-NEXT: movdqa %xmm1, %xmm2 35315; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 35316; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35317; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 35318; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35319; SSSE3-NEXT: pand %xmm3, %xmm1 35320; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35321; SSSE3-NEXT: por %xmm1, %xmm0 35322; SSSE3-NEXT: retq 35323; 35324; SSE41-LABEL: ult_51_v2i64: 35325; SSE41: # %bb.0: 35326; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35327; SSE41-NEXT: movdqa %xmm0, %xmm2 35328; SSE41-NEXT: pand %xmm1, %xmm2 35329; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35330; SSE41-NEXT: movdqa %xmm3, %xmm4 35331; SSE41-NEXT: pshufb %xmm2, %xmm4 35332; SSE41-NEXT: psrlw $4, %xmm0 35333; SSE41-NEXT: pand %xmm1, %xmm0 35334; SSE41-NEXT: pshufb %xmm0, %xmm3 35335; SSE41-NEXT: paddb %xmm4, %xmm3 35336; SSE41-NEXT: pxor %xmm0, %xmm0 35337; SSE41-NEXT: psadbw %xmm3, %xmm0 35338; SSE41-NEXT: por {{.*}}(%rip), %xmm0 35339; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35340; SSE41-NEXT: movdqa %xmm1, %xmm2 35341; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 35342; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35343; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 35344; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35345; SSE41-NEXT: pand %xmm3, %xmm1 35346; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35347; SSE41-NEXT: por %xmm1, %xmm0 35348; SSE41-NEXT: retq 35349; 35350; AVX1-LABEL: ult_51_v2i64: 35351; AVX1: # %bb.0: 35352; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35353; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 35354; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35355; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35356; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 35357; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35358; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35359; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35360; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 35361; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35362; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] 35363; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35364; AVX1-NEXT: retq 35365; 35366; AVX2-LABEL: ult_51_v2i64: 35367; AVX2: # %bb.0: 35368; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35369; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 35370; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35371; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35372; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 35373; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 35374; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35375; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35376; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 35377; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35378; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] 35379; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35380; AVX2-NEXT: retq 35381; 35382; AVX512VPOPCNTDQ-LABEL: ult_51_v2i64: 35383; AVX512VPOPCNTDQ: # %bb.0: 35384; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35385; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 35386; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] 35387; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35388; AVX512VPOPCNTDQ-NEXT: vzeroupper 35389; AVX512VPOPCNTDQ-NEXT: retq 35390; 35391; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64: 35392; AVX512VPOPCNTDQVL: # %bb.0: 35393; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 35394; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 35395; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35396; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35397; AVX512VPOPCNTDQVL-NEXT: retq 35398; 35399; BITALG_NOVLX-LABEL: ult_51_v2i64: 35400; BITALG_NOVLX: # %bb.0: 35401; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35402; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 35403; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 35404; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35405; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] 35406; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35407; BITALG_NOVLX-NEXT: vzeroupper 35408; BITALG_NOVLX-NEXT: retq 35409; 35410; BITALG-LABEL: ult_51_v2i64: 35411; BITALG: # %bb.0: 35412; BITALG-NEXT: vpopcntb %xmm0, %xmm0 35413; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 35414; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35415; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 35416; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35417; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35418; BITALG-NEXT: retq 35419 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 35420 %3 = icmp ult <2 x i64> %2, <i64 51, i64 51> 35421 %4 = sext <2 x i1> %3 to <2 x i64> 35422 ret <2 x i64> %4 35423} 35424 35425define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { 35426; SSE2-LABEL: ugt_51_v2i64: 35427; SSE2: # %bb.0: 35428; SSE2-NEXT: movdqa %xmm0, %xmm1 35429; SSE2-NEXT: psrlw $1, %xmm1 35430; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35431; SSE2-NEXT: psubb %xmm1, %xmm0 35432; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35433; SSE2-NEXT: movdqa %xmm0, %xmm2 35434; SSE2-NEXT: pand %xmm1, %xmm2 35435; SSE2-NEXT: psrlw $2, %xmm0 35436; SSE2-NEXT: pand %xmm1, %xmm0 35437; SSE2-NEXT: paddb %xmm2, %xmm0 35438; SSE2-NEXT: movdqa %xmm0, %xmm1 35439; SSE2-NEXT: psrlw $4, %xmm1 35440; SSE2-NEXT: paddb %xmm0, %xmm1 35441; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35442; SSE2-NEXT: pxor %xmm0, %xmm0 35443; SSE2-NEXT: psadbw %xmm1, %xmm0 35444; SSE2-NEXT: por {{.*}}(%rip), %xmm0 35445; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35446; SSE2-NEXT: movdqa %xmm0, %xmm2 35447; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 35448; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35449; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 35450; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35451; SSE2-NEXT: pand %xmm3, %xmm1 35452; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35453; SSE2-NEXT: por %xmm1, %xmm0 35454; SSE2-NEXT: retq 35455; 35456; SSE3-LABEL: ugt_51_v2i64: 35457; SSE3: # %bb.0: 35458; SSE3-NEXT: movdqa %xmm0, %xmm1 35459; SSE3-NEXT: psrlw $1, %xmm1 35460; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35461; SSE3-NEXT: psubb %xmm1, %xmm0 35462; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35463; SSE3-NEXT: movdqa %xmm0, %xmm2 35464; SSE3-NEXT: pand %xmm1, %xmm2 35465; SSE3-NEXT: psrlw $2, %xmm0 35466; SSE3-NEXT: pand %xmm1, %xmm0 35467; SSE3-NEXT: paddb %xmm2, %xmm0 35468; SSE3-NEXT: movdqa %xmm0, %xmm1 35469; SSE3-NEXT: psrlw $4, %xmm1 35470; SSE3-NEXT: paddb %xmm0, %xmm1 35471; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35472; SSE3-NEXT: pxor %xmm0, %xmm0 35473; SSE3-NEXT: psadbw %xmm1, %xmm0 35474; SSE3-NEXT: por {{.*}}(%rip), %xmm0 35475; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35476; SSE3-NEXT: movdqa %xmm0, %xmm2 35477; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 35478; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35479; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 35480; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35481; SSE3-NEXT: pand %xmm3, %xmm1 35482; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35483; SSE3-NEXT: por %xmm1, %xmm0 35484; SSE3-NEXT: retq 35485; 35486; SSSE3-LABEL: ugt_51_v2i64: 35487; SSSE3: # %bb.0: 35488; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35489; SSSE3-NEXT: movdqa %xmm0, %xmm2 35490; SSSE3-NEXT: pand %xmm1, %xmm2 35491; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35492; SSSE3-NEXT: movdqa %xmm3, %xmm4 35493; SSSE3-NEXT: pshufb %xmm2, %xmm4 35494; SSSE3-NEXT: psrlw $4, %xmm0 35495; SSSE3-NEXT: pand %xmm1, %xmm0 35496; SSSE3-NEXT: pshufb %xmm0, %xmm3 35497; SSSE3-NEXT: paddb %xmm4, %xmm3 35498; SSSE3-NEXT: pxor %xmm0, %xmm0 35499; SSSE3-NEXT: psadbw %xmm3, %xmm0 35500; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 35501; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35502; SSSE3-NEXT: movdqa %xmm0, %xmm2 35503; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 35504; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35505; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 35506; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35507; SSSE3-NEXT: pand %xmm3, %xmm1 35508; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35509; SSSE3-NEXT: por %xmm1, %xmm0 35510; SSSE3-NEXT: retq 35511; 35512; SSE41-LABEL: ugt_51_v2i64: 35513; SSE41: # %bb.0: 35514; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35515; SSE41-NEXT: movdqa %xmm0, %xmm2 35516; SSE41-NEXT: pand %xmm1, %xmm2 35517; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35518; SSE41-NEXT: movdqa %xmm3, %xmm4 35519; SSE41-NEXT: pshufb %xmm2, %xmm4 35520; SSE41-NEXT: psrlw $4, %xmm0 35521; SSE41-NEXT: pand %xmm1, %xmm0 35522; SSE41-NEXT: pshufb %xmm0, %xmm3 35523; SSE41-NEXT: paddb %xmm4, %xmm3 35524; SSE41-NEXT: pxor %xmm0, %xmm0 35525; SSE41-NEXT: psadbw %xmm3, %xmm0 35526; SSE41-NEXT: por {{.*}}(%rip), %xmm0 35527; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] 35528; SSE41-NEXT: movdqa %xmm0, %xmm2 35529; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 35530; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35531; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 35532; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35533; SSE41-NEXT: pand %xmm3, %xmm1 35534; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35535; SSE41-NEXT: por %xmm1, %xmm0 35536; SSE41-NEXT: retq 35537; 35538; AVX1-LABEL: ugt_51_v2i64: 35539; AVX1: # %bb.0: 35540; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35541; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 35542; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35543; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35544; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 35545; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35546; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35547; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35548; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 35549; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35550; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35551; AVX1-NEXT: retq 35552; 35553; AVX2-LABEL: ugt_51_v2i64: 35554; AVX2: # %bb.0: 35555; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35556; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 35557; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35558; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35559; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 35560; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 35561; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35562; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35563; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 35564; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35565; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35566; AVX2-NEXT: retq 35567; 35568; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64: 35569; AVX512VPOPCNTDQ: # %bb.0: 35570; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35571; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 35572; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35573; AVX512VPOPCNTDQ-NEXT: vzeroupper 35574; AVX512VPOPCNTDQ-NEXT: retq 35575; 35576; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64: 35577; AVX512VPOPCNTDQVL: # %bb.0: 35578; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 35579; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 35580; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35581; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35582; AVX512VPOPCNTDQVL-NEXT: retq 35583; 35584; BITALG_NOVLX-LABEL: ugt_51_v2i64: 35585; BITALG_NOVLX: # %bb.0: 35586; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35587; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 35588; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 35589; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35590; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35591; BITALG_NOVLX-NEXT: vzeroupper 35592; BITALG_NOVLX-NEXT: retq 35593; 35594; BITALG-LABEL: ugt_51_v2i64: 35595; BITALG: # %bb.0: 35596; BITALG-NEXT: vpopcntb %xmm0, %xmm0 35597; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 35598; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35599; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 35600; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35601; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35602; BITALG-NEXT: retq 35603 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 35604 %3 = icmp ugt <2 x i64> %2, <i64 51, i64 51> 35605 %4 = sext <2 x i1> %3 to <2 x i64> 35606 ret <2 x i64> %4 35607} 35608 35609define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { 35610; SSE2-LABEL: ult_52_v2i64: 35611; SSE2: # %bb.0: 35612; SSE2-NEXT: movdqa %xmm0, %xmm1 35613; SSE2-NEXT: psrlw $1, %xmm1 35614; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35615; SSE2-NEXT: psubb %xmm1, %xmm0 35616; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35617; SSE2-NEXT: movdqa %xmm0, %xmm2 35618; SSE2-NEXT: pand %xmm1, %xmm2 35619; SSE2-NEXT: psrlw $2, %xmm0 35620; SSE2-NEXT: pand %xmm1, %xmm0 35621; SSE2-NEXT: paddb %xmm2, %xmm0 35622; SSE2-NEXT: movdqa %xmm0, %xmm1 35623; SSE2-NEXT: psrlw $4, %xmm1 35624; SSE2-NEXT: paddb %xmm0, %xmm1 35625; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35626; SSE2-NEXT: pxor %xmm0, %xmm0 35627; SSE2-NEXT: psadbw %xmm1, %xmm0 35628; SSE2-NEXT: por {{.*}}(%rip), %xmm0 35629; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35630; SSE2-NEXT: movdqa %xmm1, %xmm2 35631; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 35632; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35633; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 35634; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35635; SSE2-NEXT: pand %xmm3, %xmm1 35636; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35637; SSE2-NEXT: por %xmm1, %xmm0 35638; SSE2-NEXT: retq 35639; 35640; SSE3-LABEL: ult_52_v2i64: 35641; SSE3: # %bb.0: 35642; SSE3-NEXT: movdqa %xmm0, %xmm1 35643; SSE3-NEXT: psrlw $1, %xmm1 35644; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35645; SSE3-NEXT: psubb %xmm1, %xmm0 35646; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35647; SSE3-NEXT: movdqa %xmm0, %xmm2 35648; SSE3-NEXT: pand %xmm1, %xmm2 35649; SSE3-NEXT: psrlw $2, %xmm0 35650; SSE3-NEXT: pand %xmm1, %xmm0 35651; SSE3-NEXT: paddb %xmm2, %xmm0 35652; SSE3-NEXT: movdqa %xmm0, %xmm1 35653; SSE3-NEXT: psrlw $4, %xmm1 35654; SSE3-NEXT: paddb %xmm0, %xmm1 35655; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35656; SSE3-NEXT: pxor %xmm0, %xmm0 35657; SSE3-NEXT: psadbw %xmm1, %xmm0 35658; SSE3-NEXT: por {{.*}}(%rip), %xmm0 35659; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35660; SSE3-NEXT: movdqa %xmm1, %xmm2 35661; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 35662; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35663; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 35664; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35665; SSE3-NEXT: pand %xmm3, %xmm1 35666; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35667; SSE3-NEXT: por %xmm1, %xmm0 35668; SSE3-NEXT: retq 35669; 35670; SSSE3-LABEL: ult_52_v2i64: 35671; SSSE3: # %bb.0: 35672; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35673; SSSE3-NEXT: movdqa %xmm0, %xmm2 35674; SSSE3-NEXT: pand %xmm1, %xmm2 35675; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35676; SSSE3-NEXT: movdqa %xmm3, %xmm4 35677; SSSE3-NEXT: pshufb %xmm2, %xmm4 35678; SSSE3-NEXT: psrlw $4, %xmm0 35679; SSSE3-NEXT: pand %xmm1, %xmm0 35680; SSSE3-NEXT: pshufb %xmm0, %xmm3 35681; SSSE3-NEXT: paddb %xmm4, %xmm3 35682; SSSE3-NEXT: pxor %xmm0, %xmm0 35683; SSSE3-NEXT: psadbw %xmm3, %xmm0 35684; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 35685; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35686; SSSE3-NEXT: movdqa %xmm1, %xmm2 35687; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 35688; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35689; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 35690; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35691; SSSE3-NEXT: pand %xmm3, %xmm1 35692; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35693; SSSE3-NEXT: por %xmm1, %xmm0 35694; SSSE3-NEXT: retq 35695; 35696; SSE41-LABEL: ult_52_v2i64: 35697; SSE41: # %bb.0: 35698; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35699; SSE41-NEXT: movdqa %xmm0, %xmm2 35700; SSE41-NEXT: pand %xmm1, %xmm2 35701; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35702; SSE41-NEXT: movdqa %xmm3, %xmm4 35703; SSE41-NEXT: pshufb %xmm2, %xmm4 35704; SSE41-NEXT: psrlw $4, %xmm0 35705; SSE41-NEXT: pand %xmm1, %xmm0 35706; SSE41-NEXT: pshufb %xmm0, %xmm3 35707; SSE41-NEXT: paddb %xmm4, %xmm3 35708; SSE41-NEXT: pxor %xmm0, %xmm0 35709; SSE41-NEXT: psadbw %xmm3, %xmm0 35710; SSE41-NEXT: por {{.*}}(%rip), %xmm0 35711; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35712; SSE41-NEXT: movdqa %xmm1, %xmm2 35713; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 35714; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35715; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 35716; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35717; SSE41-NEXT: pand %xmm3, %xmm1 35718; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35719; SSE41-NEXT: por %xmm1, %xmm0 35720; SSE41-NEXT: retq 35721; 35722; AVX1-LABEL: ult_52_v2i64: 35723; AVX1: # %bb.0: 35724; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35725; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 35726; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35727; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35728; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 35729; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35730; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35731; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35732; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 35733; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35734; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] 35735; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35736; AVX1-NEXT: retq 35737; 35738; AVX2-LABEL: ult_52_v2i64: 35739; AVX2: # %bb.0: 35740; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35741; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 35742; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35743; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35744; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 35745; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 35746; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35747; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35748; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 35749; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35750; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] 35751; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35752; AVX2-NEXT: retq 35753; 35754; AVX512VPOPCNTDQ-LABEL: ult_52_v2i64: 35755; AVX512VPOPCNTDQ: # %bb.0: 35756; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35757; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 35758; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] 35759; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35760; AVX512VPOPCNTDQ-NEXT: vzeroupper 35761; AVX512VPOPCNTDQ-NEXT: retq 35762; 35763; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64: 35764; AVX512VPOPCNTDQVL: # %bb.0: 35765; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 35766; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 35767; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35768; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35769; AVX512VPOPCNTDQVL-NEXT: retq 35770; 35771; BITALG_NOVLX-LABEL: ult_52_v2i64: 35772; BITALG_NOVLX: # %bb.0: 35773; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35774; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 35775; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 35776; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35777; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] 35778; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 35779; BITALG_NOVLX-NEXT: vzeroupper 35780; BITALG_NOVLX-NEXT: retq 35781; 35782; BITALG-LABEL: ult_52_v2i64: 35783; BITALG: # %bb.0: 35784; BITALG-NEXT: vpopcntb %xmm0, %xmm0 35785; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 35786; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35787; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 35788; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35789; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35790; BITALG-NEXT: retq 35791 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 35792 %3 = icmp ult <2 x i64> %2, <i64 52, i64 52> 35793 %4 = sext <2 x i1> %3 to <2 x i64> 35794 ret <2 x i64> %4 35795} 35796 35797define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { 35798; SSE2-LABEL: ugt_52_v2i64: 35799; SSE2: # %bb.0: 35800; SSE2-NEXT: movdqa %xmm0, %xmm1 35801; SSE2-NEXT: psrlw $1, %xmm1 35802; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35803; SSE2-NEXT: psubb %xmm1, %xmm0 35804; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35805; SSE2-NEXT: movdqa %xmm0, %xmm2 35806; SSE2-NEXT: pand %xmm1, %xmm2 35807; SSE2-NEXT: psrlw $2, %xmm0 35808; SSE2-NEXT: pand %xmm1, %xmm0 35809; SSE2-NEXT: paddb %xmm2, %xmm0 35810; SSE2-NEXT: movdqa %xmm0, %xmm1 35811; SSE2-NEXT: psrlw $4, %xmm1 35812; SSE2-NEXT: paddb %xmm0, %xmm1 35813; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35814; SSE2-NEXT: pxor %xmm0, %xmm0 35815; SSE2-NEXT: psadbw %xmm1, %xmm0 35816; SSE2-NEXT: por {{.*}}(%rip), %xmm0 35817; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35818; SSE2-NEXT: movdqa %xmm0, %xmm2 35819; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 35820; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35821; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 35822; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35823; SSE2-NEXT: pand %xmm3, %xmm1 35824; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35825; SSE2-NEXT: por %xmm1, %xmm0 35826; SSE2-NEXT: retq 35827; 35828; SSE3-LABEL: ugt_52_v2i64: 35829; SSE3: # %bb.0: 35830; SSE3-NEXT: movdqa %xmm0, %xmm1 35831; SSE3-NEXT: psrlw $1, %xmm1 35832; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35833; SSE3-NEXT: psubb %xmm1, %xmm0 35834; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35835; SSE3-NEXT: movdqa %xmm0, %xmm2 35836; SSE3-NEXT: pand %xmm1, %xmm2 35837; SSE3-NEXT: psrlw $2, %xmm0 35838; SSE3-NEXT: pand %xmm1, %xmm0 35839; SSE3-NEXT: paddb %xmm2, %xmm0 35840; SSE3-NEXT: movdqa %xmm0, %xmm1 35841; SSE3-NEXT: psrlw $4, %xmm1 35842; SSE3-NEXT: paddb %xmm0, %xmm1 35843; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 35844; SSE3-NEXT: pxor %xmm0, %xmm0 35845; SSE3-NEXT: psadbw %xmm1, %xmm0 35846; SSE3-NEXT: por {{.*}}(%rip), %xmm0 35847; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35848; SSE3-NEXT: movdqa %xmm0, %xmm2 35849; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 35850; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35851; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 35852; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35853; SSE3-NEXT: pand %xmm3, %xmm1 35854; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35855; SSE3-NEXT: por %xmm1, %xmm0 35856; SSE3-NEXT: retq 35857; 35858; SSSE3-LABEL: ugt_52_v2i64: 35859; SSSE3: # %bb.0: 35860; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35861; SSSE3-NEXT: movdqa %xmm0, %xmm2 35862; SSSE3-NEXT: pand %xmm1, %xmm2 35863; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35864; SSSE3-NEXT: movdqa %xmm3, %xmm4 35865; SSSE3-NEXT: pshufb %xmm2, %xmm4 35866; SSSE3-NEXT: psrlw $4, %xmm0 35867; SSSE3-NEXT: pand %xmm1, %xmm0 35868; SSSE3-NEXT: pshufb %xmm0, %xmm3 35869; SSSE3-NEXT: paddb %xmm4, %xmm3 35870; SSSE3-NEXT: pxor %xmm0, %xmm0 35871; SSSE3-NEXT: psadbw %xmm3, %xmm0 35872; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 35873; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35874; SSSE3-NEXT: movdqa %xmm0, %xmm2 35875; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 35876; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35877; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 35878; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35879; SSSE3-NEXT: pand %xmm3, %xmm1 35880; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35881; SSSE3-NEXT: por %xmm1, %xmm0 35882; SSSE3-NEXT: retq 35883; 35884; SSE41-LABEL: ugt_52_v2i64: 35885; SSE41: # %bb.0: 35886; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35887; SSE41-NEXT: movdqa %xmm0, %xmm2 35888; SSE41-NEXT: pand %xmm1, %xmm2 35889; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35890; SSE41-NEXT: movdqa %xmm3, %xmm4 35891; SSE41-NEXT: pshufb %xmm2, %xmm4 35892; SSE41-NEXT: psrlw $4, %xmm0 35893; SSE41-NEXT: pand %xmm1, %xmm0 35894; SSE41-NEXT: pshufb %xmm0, %xmm3 35895; SSE41-NEXT: paddb %xmm4, %xmm3 35896; SSE41-NEXT: pxor %xmm0, %xmm0 35897; SSE41-NEXT: psadbw %xmm3, %xmm0 35898; SSE41-NEXT: por {{.*}}(%rip), %xmm0 35899; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] 35900; SSE41-NEXT: movdqa %xmm0, %xmm2 35901; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 35902; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 35903; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 35904; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 35905; SSE41-NEXT: pand %xmm3, %xmm1 35906; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 35907; SSE41-NEXT: por %xmm1, %xmm0 35908; SSE41-NEXT: retq 35909; 35910; AVX1-LABEL: ugt_52_v2i64: 35911; AVX1: # %bb.0: 35912; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35913; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 35914; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35915; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35916; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 35917; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35918; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35919; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35920; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 35921; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35922; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35923; AVX1-NEXT: retq 35924; 35925; AVX2-LABEL: ugt_52_v2i64: 35926; AVX2: # %bb.0: 35927; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35928; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 35929; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35930; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 35931; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 35932; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 35933; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 35934; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 35935; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 35936; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35937; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35938; AVX2-NEXT: retq 35939; 35940; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64: 35941; AVX512VPOPCNTDQ: # %bb.0: 35942; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35943; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 35944; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35945; AVX512VPOPCNTDQ-NEXT: vzeroupper 35946; AVX512VPOPCNTDQ-NEXT: retq 35947; 35948; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64: 35949; AVX512VPOPCNTDQVL: # %bb.0: 35950; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 35951; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 35952; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35953; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35954; AVX512VPOPCNTDQVL-NEXT: retq 35955; 35956; BITALG_NOVLX-LABEL: ugt_52_v2i64: 35957; BITALG_NOVLX: # %bb.0: 35958; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 35959; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 35960; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 35961; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35962; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 35963; BITALG_NOVLX-NEXT: vzeroupper 35964; BITALG_NOVLX-NEXT: retq 35965; 35966; BITALG-LABEL: ugt_52_v2i64: 35967; BITALG: # %bb.0: 35968; BITALG-NEXT: vpopcntb %xmm0, %xmm0 35969; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 35970; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 35971; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 35972; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 35973; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 35974; BITALG-NEXT: retq 35975 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 35976 %3 = icmp ugt <2 x i64> %2, <i64 52, i64 52> 35977 %4 = sext <2 x i1> %3 to <2 x i64> 35978 ret <2 x i64> %4 35979} 35980 35981define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { 35982; SSE2-LABEL: ult_53_v2i64: 35983; SSE2: # %bb.0: 35984; SSE2-NEXT: movdqa %xmm0, %xmm1 35985; SSE2-NEXT: psrlw $1, %xmm1 35986; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35987; SSE2-NEXT: psubb %xmm1, %xmm0 35988; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 35989; SSE2-NEXT: movdqa %xmm0, %xmm2 35990; SSE2-NEXT: pand %xmm1, %xmm2 35991; SSE2-NEXT: psrlw $2, %xmm0 35992; SSE2-NEXT: pand %xmm1, %xmm0 35993; SSE2-NEXT: paddb %xmm2, %xmm0 35994; SSE2-NEXT: movdqa %xmm0, %xmm1 35995; SSE2-NEXT: psrlw $4, %xmm1 35996; SSE2-NEXT: paddb %xmm0, %xmm1 35997; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 35998; SSE2-NEXT: pxor %xmm0, %xmm0 35999; SSE2-NEXT: psadbw %xmm1, %xmm0 36000; SSE2-NEXT: por {{.*}}(%rip), %xmm0 36001; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36002; SSE2-NEXT: movdqa %xmm1, %xmm2 36003; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 36004; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36005; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 36006; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36007; SSE2-NEXT: pand %xmm3, %xmm1 36008; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36009; SSE2-NEXT: por %xmm1, %xmm0 36010; SSE2-NEXT: retq 36011; 36012; SSE3-LABEL: ult_53_v2i64: 36013; SSE3: # %bb.0: 36014; SSE3-NEXT: movdqa %xmm0, %xmm1 36015; SSE3-NEXT: psrlw $1, %xmm1 36016; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36017; SSE3-NEXT: psubb %xmm1, %xmm0 36018; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36019; SSE3-NEXT: movdqa %xmm0, %xmm2 36020; SSE3-NEXT: pand %xmm1, %xmm2 36021; SSE3-NEXT: psrlw $2, %xmm0 36022; SSE3-NEXT: pand %xmm1, %xmm0 36023; SSE3-NEXT: paddb %xmm2, %xmm0 36024; SSE3-NEXT: movdqa %xmm0, %xmm1 36025; SSE3-NEXT: psrlw $4, %xmm1 36026; SSE3-NEXT: paddb %xmm0, %xmm1 36027; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36028; SSE3-NEXT: pxor %xmm0, %xmm0 36029; SSE3-NEXT: psadbw %xmm1, %xmm0 36030; SSE3-NEXT: por {{.*}}(%rip), %xmm0 36031; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36032; SSE3-NEXT: movdqa %xmm1, %xmm2 36033; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 36034; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36035; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 36036; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36037; SSE3-NEXT: pand %xmm3, %xmm1 36038; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36039; SSE3-NEXT: por %xmm1, %xmm0 36040; SSE3-NEXT: retq 36041; 36042; SSSE3-LABEL: ult_53_v2i64: 36043; SSSE3: # %bb.0: 36044; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36045; SSSE3-NEXT: movdqa %xmm0, %xmm2 36046; SSSE3-NEXT: pand %xmm1, %xmm2 36047; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36048; SSSE3-NEXT: movdqa %xmm3, %xmm4 36049; SSSE3-NEXT: pshufb %xmm2, %xmm4 36050; SSSE3-NEXT: psrlw $4, %xmm0 36051; SSSE3-NEXT: pand %xmm1, %xmm0 36052; SSSE3-NEXT: pshufb %xmm0, %xmm3 36053; SSSE3-NEXT: paddb %xmm4, %xmm3 36054; SSSE3-NEXT: pxor %xmm0, %xmm0 36055; SSSE3-NEXT: psadbw %xmm3, %xmm0 36056; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 36057; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36058; SSSE3-NEXT: movdqa %xmm1, %xmm2 36059; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 36060; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36061; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 36062; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36063; SSSE3-NEXT: pand %xmm3, %xmm1 36064; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36065; SSSE3-NEXT: por %xmm1, %xmm0 36066; SSSE3-NEXT: retq 36067; 36068; SSE41-LABEL: ult_53_v2i64: 36069; SSE41: # %bb.0: 36070; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36071; SSE41-NEXT: movdqa %xmm0, %xmm2 36072; SSE41-NEXT: pand %xmm1, %xmm2 36073; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36074; SSE41-NEXT: movdqa %xmm3, %xmm4 36075; SSE41-NEXT: pshufb %xmm2, %xmm4 36076; SSE41-NEXT: psrlw $4, %xmm0 36077; SSE41-NEXT: pand %xmm1, %xmm0 36078; SSE41-NEXT: pshufb %xmm0, %xmm3 36079; SSE41-NEXT: paddb %xmm4, %xmm3 36080; SSE41-NEXT: pxor %xmm0, %xmm0 36081; SSE41-NEXT: psadbw %xmm3, %xmm0 36082; SSE41-NEXT: por {{.*}}(%rip), %xmm0 36083; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36084; SSE41-NEXT: movdqa %xmm1, %xmm2 36085; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 36086; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36087; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 36088; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36089; SSE41-NEXT: pand %xmm3, %xmm1 36090; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36091; SSE41-NEXT: por %xmm1, %xmm0 36092; SSE41-NEXT: retq 36093; 36094; AVX1-LABEL: ult_53_v2i64: 36095; AVX1: # %bb.0: 36096; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36097; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 36098; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36099; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36100; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 36101; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 36102; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36103; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36104; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 36105; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36106; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] 36107; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36108; AVX1-NEXT: retq 36109; 36110; AVX2-LABEL: ult_53_v2i64: 36111; AVX2: # %bb.0: 36112; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36113; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 36114; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36115; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36116; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 36117; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 36118; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36119; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36120; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 36121; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36122; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] 36123; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36124; AVX2-NEXT: retq 36125; 36126; AVX512VPOPCNTDQ-LABEL: ult_53_v2i64: 36127; AVX512VPOPCNTDQ: # %bb.0: 36128; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36129; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 36130; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] 36131; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36132; AVX512VPOPCNTDQ-NEXT: vzeroupper 36133; AVX512VPOPCNTDQ-NEXT: retq 36134; 36135; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64: 36136; AVX512VPOPCNTDQVL: # %bb.0: 36137; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 36138; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 36139; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36140; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36141; AVX512VPOPCNTDQVL-NEXT: retq 36142; 36143; BITALG_NOVLX-LABEL: ult_53_v2i64: 36144; BITALG_NOVLX: # %bb.0: 36145; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36146; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 36147; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 36148; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36149; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] 36150; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36151; BITALG_NOVLX-NEXT: vzeroupper 36152; BITALG_NOVLX-NEXT: retq 36153; 36154; BITALG-LABEL: ult_53_v2i64: 36155; BITALG: # %bb.0: 36156; BITALG-NEXT: vpopcntb %xmm0, %xmm0 36157; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 36158; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36159; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 36160; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36161; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36162; BITALG-NEXT: retq 36163 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 36164 %3 = icmp ult <2 x i64> %2, <i64 53, i64 53> 36165 %4 = sext <2 x i1> %3 to <2 x i64> 36166 ret <2 x i64> %4 36167} 36168 36169define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { 36170; SSE2-LABEL: ugt_53_v2i64: 36171; SSE2: # %bb.0: 36172; SSE2-NEXT: movdqa %xmm0, %xmm1 36173; SSE2-NEXT: psrlw $1, %xmm1 36174; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36175; SSE2-NEXT: psubb %xmm1, %xmm0 36176; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36177; SSE2-NEXT: movdqa %xmm0, %xmm2 36178; SSE2-NEXT: pand %xmm1, %xmm2 36179; SSE2-NEXT: psrlw $2, %xmm0 36180; SSE2-NEXT: pand %xmm1, %xmm0 36181; SSE2-NEXT: paddb %xmm2, %xmm0 36182; SSE2-NEXT: movdqa %xmm0, %xmm1 36183; SSE2-NEXT: psrlw $4, %xmm1 36184; SSE2-NEXT: paddb %xmm0, %xmm1 36185; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36186; SSE2-NEXT: pxor %xmm0, %xmm0 36187; SSE2-NEXT: psadbw %xmm1, %xmm0 36188; SSE2-NEXT: por {{.*}}(%rip), %xmm0 36189; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36190; SSE2-NEXT: movdqa %xmm0, %xmm2 36191; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 36192; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36193; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 36194; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36195; SSE2-NEXT: pand %xmm3, %xmm1 36196; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36197; SSE2-NEXT: por %xmm1, %xmm0 36198; SSE2-NEXT: retq 36199; 36200; SSE3-LABEL: ugt_53_v2i64: 36201; SSE3: # %bb.0: 36202; SSE3-NEXT: movdqa %xmm0, %xmm1 36203; SSE3-NEXT: psrlw $1, %xmm1 36204; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36205; SSE3-NEXT: psubb %xmm1, %xmm0 36206; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36207; SSE3-NEXT: movdqa %xmm0, %xmm2 36208; SSE3-NEXT: pand %xmm1, %xmm2 36209; SSE3-NEXT: psrlw $2, %xmm0 36210; SSE3-NEXT: pand %xmm1, %xmm0 36211; SSE3-NEXT: paddb %xmm2, %xmm0 36212; SSE3-NEXT: movdqa %xmm0, %xmm1 36213; SSE3-NEXT: psrlw $4, %xmm1 36214; SSE3-NEXT: paddb %xmm0, %xmm1 36215; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36216; SSE3-NEXT: pxor %xmm0, %xmm0 36217; SSE3-NEXT: psadbw %xmm1, %xmm0 36218; SSE3-NEXT: por {{.*}}(%rip), %xmm0 36219; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36220; SSE3-NEXT: movdqa %xmm0, %xmm2 36221; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 36222; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36223; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 36224; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36225; SSE3-NEXT: pand %xmm3, %xmm1 36226; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36227; SSE3-NEXT: por %xmm1, %xmm0 36228; SSE3-NEXT: retq 36229; 36230; SSSE3-LABEL: ugt_53_v2i64: 36231; SSSE3: # %bb.0: 36232; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36233; SSSE3-NEXT: movdqa %xmm0, %xmm2 36234; SSSE3-NEXT: pand %xmm1, %xmm2 36235; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36236; SSSE3-NEXT: movdqa %xmm3, %xmm4 36237; SSSE3-NEXT: pshufb %xmm2, %xmm4 36238; SSSE3-NEXT: psrlw $4, %xmm0 36239; SSSE3-NEXT: pand %xmm1, %xmm0 36240; SSSE3-NEXT: pshufb %xmm0, %xmm3 36241; SSSE3-NEXT: paddb %xmm4, %xmm3 36242; SSSE3-NEXT: pxor %xmm0, %xmm0 36243; SSSE3-NEXT: psadbw %xmm3, %xmm0 36244; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 36245; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36246; SSSE3-NEXT: movdqa %xmm0, %xmm2 36247; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 36248; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36249; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 36250; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36251; SSSE3-NEXT: pand %xmm3, %xmm1 36252; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36253; SSSE3-NEXT: por %xmm1, %xmm0 36254; SSSE3-NEXT: retq 36255; 36256; SSE41-LABEL: ugt_53_v2i64: 36257; SSE41: # %bb.0: 36258; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36259; SSE41-NEXT: movdqa %xmm0, %xmm2 36260; SSE41-NEXT: pand %xmm1, %xmm2 36261; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36262; SSE41-NEXT: movdqa %xmm3, %xmm4 36263; SSE41-NEXT: pshufb %xmm2, %xmm4 36264; SSE41-NEXT: psrlw $4, %xmm0 36265; SSE41-NEXT: pand %xmm1, %xmm0 36266; SSE41-NEXT: pshufb %xmm0, %xmm3 36267; SSE41-NEXT: paddb %xmm4, %xmm3 36268; SSE41-NEXT: pxor %xmm0, %xmm0 36269; SSE41-NEXT: psadbw %xmm3, %xmm0 36270; SSE41-NEXT: por {{.*}}(%rip), %xmm0 36271; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] 36272; SSE41-NEXT: movdqa %xmm0, %xmm2 36273; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 36274; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36275; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 36276; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36277; SSE41-NEXT: pand %xmm3, %xmm1 36278; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36279; SSE41-NEXT: por %xmm1, %xmm0 36280; SSE41-NEXT: retq 36281; 36282; AVX1-LABEL: ugt_53_v2i64: 36283; AVX1: # %bb.0: 36284; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36285; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 36286; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36287; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36288; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 36289; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 36290; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36291; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36292; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 36293; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36294; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36295; AVX1-NEXT: retq 36296; 36297; AVX2-LABEL: ugt_53_v2i64: 36298; AVX2: # %bb.0: 36299; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36300; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 36301; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36302; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36303; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 36304; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 36305; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36306; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36307; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 36308; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36309; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36310; AVX2-NEXT: retq 36311; 36312; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64: 36313; AVX512VPOPCNTDQ: # %bb.0: 36314; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36315; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 36316; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36317; AVX512VPOPCNTDQ-NEXT: vzeroupper 36318; AVX512VPOPCNTDQ-NEXT: retq 36319; 36320; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64: 36321; AVX512VPOPCNTDQVL: # %bb.0: 36322; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 36323; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 36324; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36325; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36326; AVX512VPOPCNTDQVL-NEXT: retq 36327; 36328; BITALG_NOVLX-LABEL: ugt_53_v2i64: 36329; BITALG_NOVLX: # %bb.0: 36330; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36331; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 36332; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 36333; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36334; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36335; BITALG_NOVLX-NEXT: vzeroupper 36336; BITALG_NOVLX-NEXT: retq 36337; 36338; BITALG-LABEL: ugt_53_v2i64: 36339; BITALG: # %bb.0: 36340; BITALG-NEXT: vpopcntb %xmm0, %xmm0 36341; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 36342; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36343; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 36344; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36345; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36346; BITALG-NEXT: retq 36347 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 36348 %3 = icmp ugt <2 x i64> %2, <i64 53, i64 53> 36349 %4 = sext <2 x i1> %3 to <2 x i64> 36350 ret <2 x i64> %4 36351} 36352 36353define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { 36354; SSE2-LABEL: ult_54_v2i64: 36355; SSE2: # %bb.0: 36356; SSE2-NEXT: movdqa %xmm0, %xmm1 36357; SSE2-NEXT: psrlw $1, %xmm1 36358; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36359; SSE2-NEXT: psubb %xmm1, %xmm0 36360; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36361; SSE2-NEXT: movdqa %xmm0, %xmm2 36362; SSE2-NEXT: pand %xmm1, %xmm2 36363; SSE2-NEXT: psrlw $2, %xmm0 36364; SSE2-NEXT: pand %xmm1, %xmm0 36365; SSE2-NEXT: paddb %xmm2, %xmm0 36366; SSE2-NEXT: movdqa %xmm0, %xmm1 36367; SSE2-NEXT: psrlw $4, %xmm1 36368; SSE2-NEXT: paddb %xmm0, %xmm1 36369; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36370; SSE2-NEXT: pxor %xmm0, %xmm0 36371; SSE2-NEXT: psadbw %xmm1, %xmm0 36372; SSE2-NEXT: por {{.*}}(%rip), %xmm0 36373; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36374; SSE2-NEXT: movdqa %xmm1, %xmm2 36375; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 36376; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36377; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 36378; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36379; SSE2-NEXT: pand %xmm3, %xmm1 36380; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36381; SSE2-NEXT: por %xmm1, %xmm0 36382; SSE2-NEXT: retq 36383; 36384; SSE3-LABEL: ult_54_v2i64: 36385; SSE3: # %bb.0: 36386; SSE3-NEXT: movdqa %xmm0, %xmm1 36387; SSE3-NEXT: psrlw $1, %xmm1 36388; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36389; SSE3-NEXT: psubb %xmm1, %xmm0 36390; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36391; SSE3-NEXT: movdqa %xmm0, %xmm2 36392; SSE3-NEXT: pand %xmm1, %xmm2 36393; SSE3-NEXT: psrlw $2, %xmm0 36394; SSE3-NEXT: pand %xmm1, %xmm0 36395; SSE3-NEXT: paddb %xmm2, %xmm0 36396; SSE3-NEXT: movdqa %xmm0, %xmm1 36397; SSE3-NEXT: psrlw $4, %xmm1 36398; SSE3-NEXT: paddb %xmm0, %xmm1 36399; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36400; SSE3-NEXT: pxor %xmm0, %xmm0 36401; SSE3-NEXT: psadbw %xmm1, %xmm0 36402; SSE3-NEXT: por {{.*}}(%rip), %xmm0 36403; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36404; SSE3-NEXT: movdqa %xmm1, %xmm2 36405; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 36406; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36407; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 36408; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36409; SSE3-NEXT: pand %xmm3, %xmm1 36410; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36411; SSE3-NEXT: por %xmm1, %xmm0 36412; SSE3-NEXT: retq 36413; 36414; SSSE3-LABEL: ult_54_v2i64: 36415; SSSE3: # %bb.0: 36416; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36417; SSSE3-NEXT: movdqa %xmm0, %xmm2 36418; SSSE3-NEXT: pand %xmm1, %xmm2 36419; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36420; SSSE3-NEXT: movdqa %xmm3, %xmm4 36421; SSSE3-NEXT: pshufb %xmm2, %xmm4 36422; SSSE3-NEXT: psrlw $4, %xmm0 36423; SSSE3-NEXT: pand %xmm1, %xmm0 36424; SSSE3-NEXT: pshufb %xmm0, %xmm3 36425; SSSE3-NEXT: paddb %xmm4, %xmm3 36426; SSSE3-NEXT: pxor %xmm0, %xmm0 36427; SSSE3-NEXT: psadbw %xmm3, %xmm0 36428; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 36429; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36430; SSSE3-NEXT: movdqa %xmm1, %xmm2 36431; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 36432; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36433; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 36434; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36435; SSSE3-NEXT: pand %xmm3, %xmm1 36436; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36437; SSSE3-NEXT: por %xmm1, %xmm0 36438; SSSE3-NEXT: retq 36439; 36440; SSE41-LABEL: ult_54_v2i64: 36441; SSE41: # %bb.0: 36442; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36443; SSE41-NEXT: movdqa %xmm0, %xmm2 36444; SSE41-NEXT: pand %xmm1, %xmm2 36445; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36446; SSE41-NEXT: movdqa %xmm3, %xmm4 36447; SSE41-NEXT: pshufb %xmm2, %xmm4 36448; SSE41-NEXT: psrlw $4, %xmm0 36449; SSE41-NEXT: pand %xmm1, %xmm0 36450; SSE41-NEXT: pshufb %xmm0, %xmm3 36451; SSE41-NEXT: paddb %xmm4, %xmm3 36452; SSE41-NEXT: pxor %xmm0, %xmm0 36453; SSE41-NEXT: psadbw %xmm3, %xmm0 36454; SSE41-NEXT: por {{.*}}(%rip), %xmm0 36455; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36456; SSE41-NEXT: movdqa %xmm1, %xmm2 36457; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 36458; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36459; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 36460; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36461; SSE41-NEXT: pand %xmm3, %xmm1 36462; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36463; SSE41-NEXT: por %xmm1, %xmm0 36464; SSE41-NEXT: retq 36465; 36466; AVX1-LABEL: ult_54_v2i64: 36467; AVX1: # %bb.0: 36468; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36469; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 36470; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36471; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36472; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 36473; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 36474; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36475; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36476; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 36477; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36478; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] 36479; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36480; AVX1-NEXT: retq 36481; 36482; AVX2-LABEL: ult_54_v2i64: 36483; AVX2: # %bb.0: 36484; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36485; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 36486; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36487; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36488; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 36489; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 36490; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36491; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36492; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 36493; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36494; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] 36495; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36496; AVX2-NEXT: retq 36497; 36498; AVX512VPOPCNTDQ-LABEL: ult_54_v2i64: 36499; AVX512VPOPCNTDQ: # %bb.0: 36500; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36501; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 36502; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] 36503; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36504; AVX512VPOPCNTDQ-NEXT: vzeroupper 36505; AVX512VPOPCNTDQ-NEXT: retq 36506; 36507; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64: 36508; AVX512VPOPCNTDQVL: # %bb.0: 36509; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 36510; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 36511; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36512; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36513; AVX512VPOPCNTDQVL-NEXT: retq 36514; 36515; BITALG_NOVLX-LABEL: ult_54_v2i64: 36516; BITALG_NOVLX: # %bb.0: 36517; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36518; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 36519; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 36520; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36521; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] 36522; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36523; BITALG_NOVLX-NEXT: vzeroupper 36524; BITALG_NOVLX-NEXT: retq 36525; 36526; BITALG-LABEL: ult_54_v2i64: 36527; BITALG: # %bb.0: 36528; BITALG-NEXT: vpopcntb %xmm0, %xmm0 36529; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 36530; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36531; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 36532; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36533; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36534; BITALG-NEXT: retq 36535 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 36536 %3 = icmp ult <2 x i64> %2, <i64 54, i64 54> 36537 %4 = sext <2 x i1> %3 to <2 x i64> 36538 ret <2 x i64> %4 36539} 36540 36541define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { 36542; SSE2-LABEL: ugt_54_v2i64: 36543; SSE2: # %bb.0: 36544; SSE2-NEXT: movdqa %xmm0, %xmm1 36545; SSE2-NEXT: psrlw $1, %xmm1 36546; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36547; SSE2-NEXT: psubb %xmm1, %xmm0 36548; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36549; SSE2-NEXT: movdqa %xmm0, %xmm2 36550; SSE2-NEXT: pand %xmm1, %xmm2 36551; SSE2-NEXT: psrlw $2, %xmm0 36552; SSE2-NEXT: pand %xmm1, %xmm0 36553; SSE2-NEXT: paddb %xmm2, %xmm0 36554; SSE2-NEXT: movdqa %xmm0, %xmm1 36555; SSE2-NEXT: psrlw $4, %xmm1 36556; SSE2-NEXT: paddb %xmm0, %xmm1 36557; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36558; SSE2-NEXT: pxor %xmm0, %xmm0 36559; SSE2-NEXT: psadbw %xmm1, %xmm0 36560; SSE2-NEXT: por {{.*}}(%rip), %xmm0 36561; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36562; SSE2-NEXT: movdqa %xmm0, %xmm2 36563; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 36564; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36565; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 36566; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36567; SSE2-NEXT: pand %xmm3, %xmm1 36568; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36569; SSE2-NEXT: por %xmm1, %xmm0 36570; SSE2-NEXT: retq 36571; 36572; SSE3-LABEL: ugt_54_v2i64: 36573; SSE3: # %bb.0: 36574; SSE3-NEXT: movdqa %xmm0, %xmm1 36575; SSE3-NEXT: psrlw $1, %xmm1 36576; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36577; SSE3-NEXT: psubb %xmm1, %xmm0 36578; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36579; SSE3-NEXT: movdqa %xmm0, %xmm2 36580; SSE3-NEXT: pand %xmm1, %xmm2 36581; SSE3-NEXT: psrlw $2, %xmm0 36582; SSE3-NEXT: pand %xmm1, %xmm0 36583; SSE3-NEXT: paddb %xmm2, %xmm0 36584; SSE3-NEXT: movdqa %xmm0, %xmm1 36585; SSE3-NEXT: psrlw $4, %xmm1 36586; SSE3-NEXT: paddb %xmm0, %xmm1 36587; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36588; SSE3-NEXT: pxor %xmm0, %xmm0 36589; SSE3-NEXT: psadbw %xmm1, %xmm0 36590; SSE3-NEXT: por {{.*}}(%rip), %xmm0 36591; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36592; SSE3-NEXT: movdqa %xmm0, %xmm2 36593; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 36594; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36595; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 36596; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36597; SSE3-NEXT: pand %xmm3, %xmm1 36598; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36599; SSE3-NEXT: por %xmm1, %xmm0 36600; SSE3-NEXT: retq 36601; 36602; SSSE3-LABEL: ugt_54_v2i64: 36603; SSSE3: # %bb.0: 36604; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36605; SSSE3-NEXT: movdqa %xmm0, %xmm2 36606; SSSE3-NEXT: pand %xmm1, %xmm2 36607; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36608; SSSE3-NEXT: movdqa %xmm3, %xmm4 36609; SSSE3-NEXT: pshufb %xmm2, %xmm4 36610; SSSE3-NEXT: psrlw $4, %xmm0 36611; SSSE3-NEXT: pand %xmm1, %xmm0 36612; SSSE3-NEXT: pshufb %xmm0, %xmm3 36613; SSSE3-NEXT: paddb %xmm4, %xmm3 36614; SSSE3-NEXT: pxor %xmm0, %xmm0 36615; SSSE3-NEXT: psadbw %xmm3, %xmm0 36616; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 36617; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36618; SSSE3-NEXT: movdqa %xmm0, %xmm2 36619; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 36620; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36621; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 36622; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36623; SSSE3-NEXT: pand %xmm3, %xmm1 36624; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36625; SSSE3-NEXT: por %xmm1, %xmm0 36626; SSSE3-NEXT: retq 36627; 36628; SSE41-LABEL: ugt_54_v2i64: 36629; SSE41: # %bb.0: 36630; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36631; SSE41-NEXT: movdqa %xmm0, %xmm2 36632; SSE41-NEXT: pand %xmm1, %xmm2 36633; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36634; SSE41-NEXT: movdqa %xmm3, %xmm4 36635; SSE41-NEXT: pshufb %xmm2, %xmm4 36636; SSE41-NEXT: psrlw $4, %xmm0 36637; SSE41-NEXT: pand %xmm1, %xmm0 36638; SSE41-NEXT: pshufb %xmm0, %xmm3 36639; SSE41-NEXT: paddb %xmm4, %xmm3 36640; SSE41-NEXT: pxor %xmm0, %xmm0 36641; SSE41-NEXT: psadbw %xmm3, %xmm0 36642; SSE41-NEXT: por {{.*}}(%rip), %xmm0 36643; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] 36644; SSE41-NEXT: movdqa %xmm0, %xmm2 36645; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 36646; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36647; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 36648; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36649; SSE41-NEXT: pand %xmm3, %xmm1 36650; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36651; SSE41-NEXT: por %xmm1, %xmm0 36652; SSE41-NEXT: retq 36653; 36654; AVX1-LABEL: ugt_54_v2i64: 36655; AVX1: # %bb.0: 36656; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36657; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 36658; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36659; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36660; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 36661; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 36662; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36663; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36664; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 36665; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36666; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36667; AVX1-NEXT: retq 36668; 36669; AVX2-LABEL: ugt_54_v2i64: 36670; AVX2: # %bb.0: 36671; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36672; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 36673; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36674; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36675; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 36676; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 36677; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36678; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36679; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 36680; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36681; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36682; AVX2-NEXT: retq 36683; 36684; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64: 36685; AVX512VPOPCNTDQ: # %bb.0: 36686; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36687; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 36688; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36689; AVX512VPOPCNTDQ-NEXT: vzeroupper 36690; AVX512VPOPCNTDQ-NEXT: retq 36691; 36692; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64: 36693; AVX512VPOPCNTDQVL: # %bb.0: 36694; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 36695; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 36696; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36697; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36698; AVX512VPOPCNTDQVL-NEXT: retq 36699; 36700; BITALG_NOVLX-LABEL: ugt_54_v2i64: 36701; BITALG_NOVLX: # %bb.0: 36702; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36703; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 36704; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 36705; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36706; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 36707; BITALG_NOVLX-NEXT: vzeroupper 36708; BITALG_NOVLX-NEXT: retq 36709; 36710; BITALG-LABEL: ugt_54_v2i64: 36711; BITALG: # %bb.0: 36712; BITALG-NEXT: vpopcntb %xmm0, %xmm0 36713; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 36714; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36715; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 36716; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36717; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36718; BITALG-NEXT: retq 36719 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 36720 %3 = icmp ugt <2 x i64> %2, <i64 54, i64 54> 36721 %4 = sext <2 x i1> %3 to <2 x i64> 36722 ret <2 x i64> %4 36723} 36724 36725define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { 36726; SSE2-LABEL: ult_55_v2i64: 36727; SSE2: # %bb.0: 36728; SSE2-NEXT: movdqa %xmm0, %xmm1 36729; SSE2-NEXT: psrlw $1, %xmm1 36730; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36731; SSE2-NEXT: psubb %xmm1, %xmm0 36732; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36733; SSE2-NEXT: movdqa %xmm0, %xmm2 36734; SSE2-NEXT: pand %xmm1, %xmm2 36735; SSE2-NEXT: psrlw $2, %xmm0 36736; SSE2-NEXT: pand %xmm1, %xmm0 36737; SSE2-NEXT: paddb %xmm2, %xmm0 36738; SSE2-NEXT: movdqa %xmm0, %xmm1 36739; SSE2-NEXT: psrlw $4, %xmm1 36740; SSE2-NEXT: paddb %xmm0, %xmm1 36741; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36742; SSE2-NEXT: pxor %xmm0, %xmm0 36743; SSE2-NEXT: psadbw %xmm1, %xmm0 36744; SSE2-NEXT: por {{.*}}(%rip), %xmm0 36745; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 36746; SSE2-NEXT: movdqa %xmm1, %xmm2 36747; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 36748; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36749; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 36750; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36751; SSE2-NEXT: pand %xmm3, %xmm1 36752; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36753; SSE2-NEXT: por %xmm1, %xmm0 36754; SSE2-NEXT: retq 36755; 36756; SSE3-LABEL: ult_55_v2i64: 36757; SSE3: # %bb.0: 36758; SSE3-NEXT: movdqa %xmm0, %xmm1 36759; SSE3-NEXT: psrlw $1, %xmm1 36760; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36761; SSE3-NEXT: psubb %xmm1, %xmm0 36762; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36763; SSE3-NEXT: movdqa %xmm0, %xmm2 36764; SSE3-NEXT: pand %xmm1, %xmm2 36765; SSE3-NEXT: psrlw $2, %xmm0 36766; SSE3-NEXT: pand %xmm1, %xmm0 36767; SSE3-NEXT: paddb %xmm2, %xmm0 36768; SSE3-NEXT: movdqa %xmm0, %xmm1 36769; SSE3-NEXT: psrlw $4, %xmm1 36770; SSE3-NEXT: paddb %xmm0, %xmm1 36771; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36772; SSE3-NEXT: pxor %xmm0, %xmm0 36773; SSE3-NEXT: psadbw %xmm1, %xmm0 36774; SSE3-NEXT: por {{.*}}(%rip), %xmm0 36775; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 36776; SSE3-NEXT: movdqa %xmm1, %xmm2 36777; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 36778; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36779; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 36780; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36781; SSE3-NEXT: pand %xmm3, %xmm1 36782; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36783; SSE3-NEXT: por %xmm1, %xmm0 36784; SSE3-NEXT: retq 36785; 36786; SSSE3-LABEL: ult_55_v2i64: 36787; SSSE3: # %bb.0: 36788; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36789; SSSE3-NEXT: movdqa %xmm0, %xmm2 36790; SSSE3-NEXT: pand %xmm1, %xmm2 36791; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36792; SSSE3-NEXT: movdqa %xmm3, %xmm4 36793; SSSE3-NEXT: pshufb %xmm2, %xmm4 36794; SSSE3-NEXT: psrlw $4, %xmm0 36795; SSSE3-NEXT: pand %xmm1, %xmm0 36796; SSSE3-NEXT: pshufb %xmm0, %xmm3 36797; SSSE3-NEXT: paddb %xmm4, %xmm3 36798; SSSE3-NEXT: pxor %xmm0, %xmm0 36799; SSSE3-NEXT: psadbw %xmm3, %xmm0 36800; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 36801; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 36802; SSSE3-NEXT: movdqa %xmm1, %xmm2 36803; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 36804; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36805; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 36806; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36807; SSSE3-NEXT: pand %xmm3, %xmm1 36808; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36809; SSSE3-NEXT: por %xmm1, %xmm0 36810; SSSE3-NEXT: retq 36811; 36812; SSE41-LABEL: ult_55_v2i64: 36813; SSE41: # %bb.0: 36814; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36815; SSE41-NEXT: movdqa %xmm0, %xmm2 36816; SSE41-NEXT: pand %xmm1, %xmm2 36817; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36818; SSE41-NEXT: movdqa %xmm3, %xmm4 36819; SSE41-NEXT: pshufb %xmm2, %xmm4 36820; SSE41-NEXT: psrlw $4, %xmm0 36821; SSE41-NEXT: pand %xmm1, %xmm0 36822; SSE41-NEXT: pshufb %xmm0, %xmm3 36823; SSE41-NEXT: paddb %xmm4, %xmm3 36824; SSE41-NEXT: pxor %xmm0, %xmm0 36825; SSE41-NEXT: psadbw %xmm3, %xmm0 36826; SSE41-NEXT: por {{.*}}(%rip), %xmm0 36827; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 36828; SSE41-NEXT: movdqa %xmm1, %xmm2 36829; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 36830; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36831; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 36832; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36833; SSE41-NEXT: pand %xmm3, %xmm1 36834; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36835; SSE41-NEXT: por %xmm1, %xmm0 36836; SSE41-NEXT: retq 36837; 36838; AVX1-LABEL: ult_55_v2i64: 36839; AVX1: # %bb.0: 36840; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36841; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 36842; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36843; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36844; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 36845; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 36846; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36847; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36848; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 36849; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36850; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] 36851; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36852; AVX1-NEXT: retq 36853; 36854; AVX2-LABEL: ult_55_v2i64: 36855; AVX2: # %bb.0: 36856; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36857; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 36858; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36859; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 36860; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 36861; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 36862; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 36863; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 36864; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 36865; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36866; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] 36867; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36868; AVX2-NEXT: retq 36869; 36870; AVX512VPOPCNTDQ-LABEL: ult_55_v2i64: 36871; AVX512VPOPCNTDQ: # %bb.0: 36872; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36873; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 36874; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] 36875; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36876; AVX512VPOPCNTDQ-NEXT: vzeroupper 36877; AVX512VPOPCNTDQ-NEXT: retq 36878; 36879; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64: 36880; AVX512VPOPCNTDQVL: # %bb.0: 36881; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 36882; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 36883; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36884; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36885; AVX512VPOPCNTDQVL-NEXT: retq 36886; 36887; BITALG_NOVLX-LABEL: ult_55_v2i64: 36888; BITALG_NOVLX: # %bb.0: 36889; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 36890; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 36891; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 36892; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36893; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] 36894; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 36895; BITALG_NOVLX-NEXT: vzeroupper 36896; BITALG_NOVLX-NEXT: retq 36897; 36898; BITALG-LABEL: ult_55_v2i64: 36899; BITALG: # %bb.0: 36900; BITALG-NEXT: vpopcntb %xmm0, %xmm0 36901; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 36902; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 36903; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 36904; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 36905; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 36906; BITALG-NEXT: retq 36907 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 36908 %3 = icmp ult <2 x i64> %2, <i64 55, i64 55> 36909 %4 = sext <2 x i1> %3 to <2 x i64> 36910 ret <2 x i64> %4 36911} 36912 36913define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { 36914; SSE2-LABEL: ugt_55_v2i64: 36915; SSE2: # %bb.0: 36916; SSE2-NEXT: movdqa %xmm0, %xmm1 36917; SSE2-NEXT: psrlw $1, %xmm1 36918; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36919; SSE2-NEXT: psubb %xmm1, %xmm0 36920; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36921; SSE2-NEXT: movdqa %xmm0, %xmm2 36922; SSE2-NEXT: pand %xmm1, %xmm2 36923; SSE2-NEXT: psrlw $2, %xmm0 36924; SSE2-NEXT: pand %xmm1, %xmm0 36925; SSE2-NEXT: paddb %xmm2, %xmm0 36926; SSE2-NEXT: movdqa %xmm0, %xmm1 36927; SSE2-NEXT: psrlw $4, %xmm1 36928; SSE2-NEXT: paddb %xmm0, %xmm1 36929; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 36930; SSE2-NEXT: pxor %xmm0, %xmm0 36931; SSE2-NEXT: psadbw %xmm1, %xmm0 36932; SSE2-NEXT: por {{.*}}(%rip), %xmm0 36933; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 36934; SSE2-NEXT: movdqa %xmm0, %xmm2 36935; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 36936; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36937; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 36938; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36939; SSE2-NEXT: pand %xmm3, %xmm1 36940; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36941; SSE2-NEXT: por %xmm1, %xmm0 36942; SSE2-NEXT: retq 36943; 36944; SSE3-LABEL: ugt_55_v2i64: 36945; SSE3: # %bb.0: 36946; SSE3-NEXT: movdqa %xmm0, %xmm1 36947; SSE3-NEXT: psrlw $1, %xmm1 36948; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36949; SSE3-NEXT: psubb %xmm1, %xmm0 36950; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 36951; SSE3-NEXT: movdqa %xmm0, %xmm2 36952; SSE3-NEXT: pand %xmm1, %xmm2 36953; SSE3-NEXT: psrlw $2, %xmm0 36954; SSE3-NEXT: pand %xmm1, %xmm0 36955; SSE3-NEXT: paddb %xmm2, %xmm0 36956; SSE3-NEXT: movdqa %xmm0, %xmm1 36957; SSE3-NEXT: psrlw $4, %xmm1 36958; SSE3-NEXT: paddb %xmm0, %xmm1 36959; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 36960; SSE3-NEXT: pxor %xmm0, %xmm0 36961; SSE3-NEXT: psadbw %xmm1, %xmm0 36962; SSE3-NEXT: por {{.*}}(%rip), %xmm0 36963; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 36964; SSE3-NEXT: movdqa %xmm0, %xmm2 36965; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 36966; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36967; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 36968; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36969; SSE3-NEXT: pand %xmm3, %xmm1 36970; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36971; SSE3-NEXT: por %xmm1, %xmm0 36972; SSE3-NEXT: retq 36973; 36974; SSSE3-LABEL: ugt_55_v2i64: 36975; SSSE3: # %bb.0: 36976; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36977; SSSE3-NEXT: movdqa %xmm0, %xmm2 36978; SSSE3-NEXT: pand %xmm1, %xmm2 36979; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 36980; SSSE3-NEXT: movdqa %xmm3, %xmm4 36981; SSSE3-NEXT: pshufb %xmm2, %xmm4 36982; SSSE3-NEXT: psrlw $4, %xmm0 36983; SSSE3-NEXT: pand %xmm1, %xmm0 36984; SSSE3-NEXT: pshufb %xmm0, %xmm3 36985; SSSE3-NEXT: paddb %xmm4, %xmm3 36986; SSSE3-NEXT: pxor %xmm0, %xmm0 36987; SSSE3-NEXT: psadbw %xmm3, %xmm0 36988; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 36989; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 36990; SSSE3-NEXT: movdqa %xmm0, %xmm2 36991; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 36992; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 36993; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 36994; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36995; SSSE3-NEXT: pand %xmm3, %xmm1 36996; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 36997; SSSE3-NEXT: por %xmm1, %xmm0 36998; SSSE3-NEXT: retq 36999; 37000; SSE41-LABEL: ugt_55_v2i64: 37001; SSE41: # %bb.0: 37002; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37003; SSE41-NEXT: movdqa %xmm0, %xmm2 37004; SSE41-NEXT: pand %xmm1, %xmm2 37005; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37006; SSE41-NEXT: movdqa %xmm3, %xmm4 37007; SSE41-NEXT: pshufb %xmm2, %xmm4 37008; SSE41-NEXT: psrlw $4, %xmm0 37009; SSE41-NEXT: pand %xmm1, %xmm0 37010; SSE41-NEXT: pshufb %xmm0, %xmm3 37011; SSE41-NEXT: paddb %xmm4, %xmm3 37012; SSE41-NEXT: pxor %xmm0, %xmm0 37013; SSE41-NEXT: psadbw %xmm3, %xmm0 37014; SSE41-NEXT: por {{.*}}(%rip), %xmm0 37015; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] 37016; SSE41-NEXT: movdqa %xmm0, %xmm2 37017; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 37018; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37019; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 37020; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37021; SSE41-NEXT: pand %xmm3, %xmm1 37022; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37023; SSE41-NEXT: por %xmm1, %xmm0 37024; SSE41-NEXT: retq 37025; 37026; AVX1-LABEL: ugt_55_v2i64: 37027; AVX1: # %bb.0: 37028; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37029; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 37030; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37031; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37032; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 37033; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 37034; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37035; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37036; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 37037; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37038; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37039; AVX1-NEXT: retq 37040; 37041; AVX2-LABEL: ugt_55_v2i64: 37042; AVX2: # %bb.0: 37043; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37044; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 37045; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37046; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37047; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 37048; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 37049; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37050; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37051; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 37052; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37053; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37054; AVX2-NEXT: retq 37055; 37056; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64: 37057; AVX512VPOPCNTDQ: # %bb.0: 37058; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37059; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 37060; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37061; AVX512VPOPCNTDQ-NEXT: vzeroupper 37062; AVX512VPOPCNTDQ-NEXT: retq 37063; 37064; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64: 37065; AVX512VPOPCNTDQVL: # %bb.0: 37066; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 37067; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 37068; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37069; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37070; AVX512VPOPCNTDQVL-NEXT: retq 37071; 37072; BITALG_NOVLX-LABEL: ugt_55_v2i64: 37073; BITALG_NOVLX: # %bb.0: 37074; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37075; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 37076; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 37077; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37078; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37079; BITALG_NOVLX-NEXT: vzeroupper 37080; BITALG_NOVLX-NEXT: retq 37081; 37082; BITALG-LABEL: ugt_55_v2i64: 37083; BITALG: # %bb.0: 37084; BITALG-NEXT: vpopcntb %xmm0, %xmm0 37085; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 37086; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37087; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 37088; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37089; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37090; BITALG-NEXT: retq 37091 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 37092 %3 = icmp ugt <2 x i64> %2, <i64 55, i64 55> 37093 %4 = sext <2 x i1> %3 to <2 x i64> 37094 ret <2 x i64> %4 37095} 37096 37097define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { 37098; SSE2-LABEL: ult_56_v2i64: 37099; SSE2: # %bb.0: 37100; SSE2-NEXT: movdqa %xmm0, %xmm1 37101; SSE2-NEXT: psrlw $1, %xmm1 37102; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37103; SSE2-NEXT: psubb %xmm1, %xmm0 37104; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37105; SSE2-NEXT: movdqa %xmm0, %xmm2 37106; SSE2-NEXT: pand %xmm1, %xmm2 37107; SSE2-NEXT: psrlw $2, %xmm0 37108; SSE2-NEXT: pand %xmm1, %xmm0 37109; SSE2-NEXT: paddb %xmm2, %xmm0 37110; SSE2-NEXT: movdqa %xmm0, %xmm1 37111; SSE2-NEXT: psrlw $4, %xmm1 37112; SSE2-NEXT: paddb %xmm0, %xmm1 37113; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37114; SSE2-NEXT: pxor %xmm0, %xmm0 37115; SSE2-NEXT: psadbw %xmm1, %xmm0 37116; SSE2-NEXT: por {{.*}}(%rip), %xmm0 37117; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37118; SSE2-NEXT: movdqa %xmm1, %xmm2 37119; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 37120; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37121; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 37122; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37123; SSE2-NEXT: pand %xmm3, %xmm1 37124; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37125; SSE2-NEXT: por %xmm1, %xmm0 37126; SSE2-NEXT: retq 37127; 37128; SSE3-LABEL: ult_56_v2i64: 37129; SSE3: # %bb.0: 37130; SSE3-NEXT: movdqa %xmm0, %xmm1 37131; SSE3-NEXT: psrlw $1, %xmm1 37132; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37133; SSE3-NEXT: psubb %xmm1, %xmm0 37134; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37135; SSE3-NEXT: movdqa %xmm0, %xmm2 37136; SSE3-NEXT: pand %xmm1, %xmm2 37137; SSE3-NEXT: psrlw $2, %xmm0 37138; SSE3-NEXT: pand %xmm1, %xmm0 37139; SSE3-NEXT: paddb %xmm2, %xmm0 37140; SSE3-NEXT: movdqa %xmm0, %xmm1 37141; SSE3-NEXT: psrlw $4, %xmm1 37142; SSE3-NEXT: paddb %xmm0, %xmm1 37143; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37144; SSE3-NEXT: pxor %xmm0, %xmm0 37145; SSE3-NEXT: psadbw %xmm1, %xmm0 37146; SSE3-NEXT: por {{.*}}(%rip), %xmm0 37147; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37148; SSE3-NEXT: movdqa %xmm1, %xmm2 37149; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 37150; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37151; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 37152; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37153; SSE3-NEXT: pand %xmm3, %xmm1 37154; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37155; SSE3-NEXT: por %xmm1, %xmm0 37156; SSE3-NEXT: retq 37157; 37158; SSSE3-LABEL: ult_56_v2i64: 37159; SSSE3: # %bb.0: 37160; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37161; SSSE3-NEXT: movdqa %xmm0, %xmm2 37162; SSSE3-NEXT: pand %xmm1, %xmm2 37163; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37164; SSSE3-NEXT: movdqa %xmm3, %xmm4 37165; SSSE3-NEXT: pshufb %xmm2, %xmm4 37166; SSSE3-NEXT: psrlw $4, %xmm0 37167; SSSE3-NEXT: pand %xmm1, %xmm0 37168; SSSE3-NEXT: pshufb %xmm0, %xmm3 37169; SSSE3-NEXT: paddb %xmm4, %xmm3 37170; SSSE3-NEXT: pxor %xmm0, %xmm0 37171; SSSE3-NEXT: psadbw %xmm3, %xmm0 37172; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 37173; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37174; SSSE3-NEXT: movdqa %xmm1, %xmm2 37175; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 37176; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37177; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 37178; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37179; SSSE3-NEXT: pand %xmm3, %xmm1 37180; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37181; SSSE3-NEXT: por %xmm1, %xmm0 37182; SSSE3-NEXT: retq 37183; 37184; SSE41-LABEL: ult_56_v2i64: 37185; SSE41: # %bb.0: 37186; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37187; SSE41-NEXT: movdqa %xmm0, %xmm2 37188; SSE41-NEXT: pand %xmm1, %xmm2 37189; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37190; SSE41-NEXT: movdqa %xmm3, %xmm4 37191; SSE41-NEXT: pshufb %xmm2, %xmm4 37192; SSE41-NEXT: psrlw $4, %xmm0 37193; SSE41-NEXT: pand %xmm1, %xmm0 37194; SSE41-NEXT: pshufb %xmm0, %xmm3 37195; SSE41-NEXT: paddb %xmm4, %xmm3 37196; SSE41-NEXT: pxor %xmm0, %xmm0 37197; SSE41-NEXT: psadbw %xmm3, %xmm0 37198; SSE41-NEXT: por {{.*}}(%rip), %xmm0 37199; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37200; SSE41-NEXT: movdqa %xmm1, %xmm2 37201; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 37202; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37203; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 37204; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37205; SSE41-NEXT: pand %xmm3, %xmm1 37206; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37207; SSE41-NEXT: por %xmm1, %xmm0 37208; SSE41-NEXT: retq 37209; 37210; AVX1-LABEL: ult_56_v2i64: 37211; AVX1: # %bb.0: 37212; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37213; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 37214; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37215; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37216; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 37217; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 37218; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37219; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37220; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 37221; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37222; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] 37223; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37224; AVX1-NEXT: retq 37225; 37226; AVX2-LABEL: ult_56_v2i64: 37227; AVX2: # %bb.0: 37228; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37229; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 37230; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37231; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37232; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 37233; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 37234; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37235; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37236; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 37237; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37238; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] 37239; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37240; AVX2-NEXT: retq 37241; 37242; AVX512VPOPCNTDQ-LABEL: ult_56_v2i64: 37243; AVX512VPOPCNTDQ: # %bb.0: 37244; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37245; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 37246; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] 37247; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37248; AVX512VPOPCNTDQ-NEXT: vzeroupper 37249; AVX512VPOPCNTDQ-NEXT: retq 37250; 37251; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64: 37252; AVX512VPOPCNTDQVL: # %bb.0: 37253; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 37254; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 37255; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37256; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37257; AVX512VPOPCNTDQVL-NEXT: retq 37258; 37259; BITALG_NOVLX-LABEL: ult_56_v2i64: 37260; BITALG_NOVLX: # %bb.0: 37261; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37262; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 37263; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 37264; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37265; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] 37266; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37267; BITALG_NOVLX-NEXT: vzeroupper 37268; BITALG_NOVLX-NEXT: retq 37269; 37270; BITALG-LABEL: ult_56_v2i64: 37271; BITALG: # %bb.0: 37272; BITALG-NEXT: vpopcntb %xmm0, %xmm0 37273; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 37274; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37275; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 37276; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37277; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37278; BITALG-NEXT: retq 37279 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 37280 %3 = icmp ult <2 x i64> %2, <i64 56, i64 56> 37281 %4 = sext <2 x i1> %3 to <2 x i64> 37282 ret <2 x i64> %4 37283} 37284 37285define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { 37286; SSE2-LABEL: ugt_56_v2i64: 37287; SSE2: # %bb.0: 37288; SSE2-NEXT: movdqa %xmm0, %xmm1 37289; SSE2-NEXT: psrlw $1, %xmm1 37290; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37291; SSE2-NEXT: psubb %xmm1, %xmm0 37292; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37293; SSE2-NEXT: movdqa %xmm0, %xmm2 37294; SSE2-NEXT: pand %xmm1, %xmm2 37295; SSE2-NEXT: psrlw $2, %xmm0 37296; SSE2-NEXT: pand %xmm1, %xmm0 37297; SSE2-NEXT: paddb %xmm2, %xmm0 37298; SSE2-NEXT: movdqa %xmm0, %xmm1 37299; SSE2-NEXT: psrlw $4, %xmm1 37300; SSE2-NEXT: paddb %xmm0, %xmm1 37301; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37302; SSE2-NEXT: pxor %xmm0, %xmm0 37303; SSE2-NEXT: psadbw %xmm1, %xmm0 37304; SSE2-NEXT: por {{.*}}(%rip), %xmm0 37305; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37306; SSE2-NEXT: movdqa %xmm0, %xmm2 37307; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 37308; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37309; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 37310; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37311; SSE2-NEXT: pand %xmm3, %xmm1 37312; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37313; SSE2-NEXT: por %xmm1, %xmm0 37314; SSE2-NEXT: retq 37315; 37316; SSE3-LABEL: ugt_56_v2i64: 37317; SSE3: # %bb.0: 37318; SSE3-NEXT: movdqa %xmm0, %xmm1 37319; SSE3-NEXT: psrlw $1, %xmm1 37320; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37321; SSE3-NEXT: psubb %xmm1, %xmm0 37322; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37323; SSE3-NEXT: movdqa %xmm0, %xmm2 37324; SSE3-NEXT: pand %xmm1, %xmm2 37325; SSE3-NEXT: psrlw $2, %xmm0 37326; SSE3-NEXT: pand %xmm1, %xmm0 37327; SSE3-NEXT: paddb %xmm2, %xmm0 37328; SSE3-NEXT: movdqa %xmm0, %xmm1 37329; SSE3-NEXT: psrlw $4, %xmm1 37330; SSE3-NEXT: paddb %xmm0, %xmm1 37331; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37332; SSE3-NEXT: pxor %xmm0, %xmm0 37333; SSE3-NEXT: psadbw %xmm1, %xmm0 37334; SSE3-NEXT: por {{.*}}(%rip), %xmm0 37335; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37336; SSE3-NEXT: movdqa %xmm0, %xmm2 37337; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 37338; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37339; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 37340; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37341; SSE3-NEXT: pand %xmm3, %xmm1 37342; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37343; SSE3-NEXT: por %xmm1, %xmm0 37344; SSE3-NEXT: retq 37345; 37346; SSSE3-LABEL: ugt_56_v2i64: 37347; SSSE3: # %bb.0: 37348; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37349; SSSE3-NEXT: movdqa %xmm0, %xmm2 37350; SSSE3-NEXT: pand %xmm1, %xmm2 37351; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37352; SSSE3-NEXT: movdqa %xmm3, %xmm4 37353; SSSE3-NEXT: pshufb %xmm2, %xmm4 37354; SSSE3-NEXT: psrlw $4, %xmm0 37355; SSSE3-NEXT: pand %xmm1, %xmm0 37356; SSSE3-NEXT: pshufb %xmm0, %xmm3 37357; SSSE3-NEXT: paddb %xmm4, %xmm3 37358; SSSE3-NEXT: pxor %xmm0, %xmm0 37359; SSSE3-NEXT: psadbw %xmm3, %xmm0 37360; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 37361; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37362; SSSE3-NEXT: movdqa %xmm0, %xmm2 37363; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 37364; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37365; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 37366; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37367; SSSE3-NEXT: pand %xmm3, %xmm1 37368; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37369; SSSE3-NEXT: por %xmm1, %xmm0 37370; SSSE3-NEXT: retq 37371; 37372; SSE41-LABEL: ugt_56_v2i64: 37373; SSE41: # %bb.0: 37374; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37375; SSE41-NEXT: movdqa %xmm0, %xmm2 37376; SSE41-NEXT: pand %xmm1, %xmm2 37377; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37378; SSE41-NEXT: movdqa %xmm3, %xmm4 37379; SSE41-NEXT: pshufb %xmm2, %xmm4 37380; SSE41-NEXT: psrlw $4, %xmm0 37381; SSE41-NEXT: pand %xmm1, %xmm0 37382; SSE41-NEXT: pshufb %xmm0, %xmm3 37383; SSE41-NEXT: paddb %xmm4, %xmm3 37384; SSE41-NEXT: pxor %xmm0, %xmm0 37385; SSE41-NEXT: psadbw %xmm3, %xmm0 37386; SSE41-NEXT: por {{.*}}(%rip), %xmm0 37387; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] 37388; SSE41-NEXT: movdqa %xmm0, %xmm2 37389; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 37390; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37391; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 37392; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37393; SSE41-NEXT: pand %xmm3, %xmm1 37394; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37395; SSE41-NEXT: por %xmm1, %xmm0 37396; SSE41-NEXT: retq 37397; 37398; AVX1-LABEL: ugt_56_v2i64: 37399; AVX1: # %bb.0: 37400; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37401; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 37402; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37403; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37404; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 37405; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 37406; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37407; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37408; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 37409; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37410; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37411; AVX1-NEXT: retq 37412; 37413; AVX2-LABEL: ugt_56_v2i64: 37414; AVX2: # %bb.0: 37415; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37416; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 37417; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37418; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37419; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 37420; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 37421; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37422; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37423; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 37424; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37425; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37426; AVX2-NEXT: retq 37427; 37428; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64: 37429; AVX512VPOPCNTDQ: # %bb.0: 37430; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37431; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 37432; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37433; AVX512VPOPCNTDQ-NEXT: vzeroupper 37434; AVX512VPOPCNTDQ-NEXT: retq 37435; 37436; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64: 37437; AVX512VPOPCNTDQVL: # %bb.0: 37438; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 37439; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 37440; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37441; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37442; AVX512VPOPCNTDQVL-NEXT: retq 37443; 37444; BITALG_NOVLX-LABEL: ugt_56_v2i64: 37445; BITALG_NOVLX: # %bb.0: 37446; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37447; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 37448; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 37449; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37450; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37451; BITALG_NOVLX-NEXT: vzeroupper 37452; BITALG_NOVLX-NEXT: retq 37453; 37454; BITALG-LABEL: ugt_56_v2i64: 37455; BITALG: # %bb.0: 37456; BITALG-NEXT: vpopcntb %xmm0, %xmm0 37457; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 37458; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37459; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 37460; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37461; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37462; BITALG-NEXT: retq 37463 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 37464 %3 = icmp ugt <2 x i64> %2, <i64 56, i64 56> 37465 %4 = sext <2 x i1> %3 to <2 x i64> 37466 ret <2 x i64> %4 37467} 37468 37469define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { 37470; SSE2-LABEL: ult_57_v2i64: 37471; SSE2: # %bb.0: 37472; SSE2-NEXT: movdqa %xmm0, %xmm1 37473; SSE2-NEXT: psrlw $1, %xmm1 37474; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37475; SSE2-NEXT: psubb %xmm1, %xmm0 37476; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37477; SSE2-NEXT: movdqa %xmm0, %xmm2 37478; SSE2-NEXT: pand %xmm1, %xmm2 37479; SSE2-NEXT: psrlw $2, %xmm0 37480; SSE2-NEXT: pand %xmm1, %xmm0 37481; SSE2-NEXT: paddb %xmm2, %xmm0 37482; SSE2-NEXT: movdqa %xmm0, %xmm1 37483; SSE2-NEXT: psrlw $4, %xmm1 37484; SSE2-NEXT: paddb %xmm0, %xmm1 37485; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37486; SSE2-NEXT: pxor %xmm0, %xmm0 37487; SSE2-NEXT: psadbw %xmm1, %xmm0 37488; SSE2-NEXT: por {{.*}}(%rip), %xmm0 37489; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37490; SSE2-NEXT: movdqa %xmm1, %xmm2 37491; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 37492; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37493; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 37494; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37495; SSE2-NEXT: pand %xmm3, %xmm1 37496; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37497; SSE2-NEXT: por %xmm1, %xmm0 37498; SSE2-NEXT: retq 37499; 37500; SSE3-LABEL: ult_57_v2i64: 37501; SSE3: # %bb.0: 37502; SSE3-NEXT: movdqa %xmm0, %xmm1 37503; SSE3-NEXT: psrlw $1, %xmm1 37504; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37505; SSE3-NEXT: psubb %xmm1, %xmm0 37506; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37507; SSE3-NEXT: movdqa %xmm0, %xmm2 37508; SSE3-NEXT: pand %xmm1, %xmm2 37509; SSE3-NEXT: psrlw $2, %xmm0 37510; SSE3-NEXT: pand %xmm1, %xmm0 37511; SSE3-NEXT: paddb %xmm2, %xmm0 37512; SSE3-NEXT: movdqa %xmm0, %xmm1 37513; SSE3-NEXT: psrlw $4, %xmm1 37514; SSE3-NEXT: paddb %xmm0, %xmm1 37515; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37516; SSE3-NEXT: pxor %xmm0, %xmm0 37517; SSE3-NEXT: psadbw %xmm1, %xmm0 37518; SSE3-NEXT: por {{.*}}(%rip), %xmm0 37519; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37520; SSE3-NEXT: movdqa %xmm1, %xmm2 37521; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 37522; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37523; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 37524; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37525; SSE3-NEXT: pand %xmm3, %xmm1 37526; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37527; SSE3-NEXT: por %xmm1, %xmm0 37528; SSE3-NEXT: retq 37529; 37530; SSSE3-LABEL: ult_57_v2i64: 37531; SSSE3: # %bb.0: 37532; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37533; SSSE3-NEXT: movdqa %xmm0, %xmm2 37534; SSSE3-NEXT: pand %xmm1, %xmm2 37535; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37536; SSSE3-NEXT: movdqa %xmm3, %xmm4 37537; SSSE3-NEXT: pshufb %xmm2, %xmm4 37538; SSSE3-NEXT: psrlw $4, %xmm0 37539; SSSE3-NEXT: pand %xmm1, %xmm0 37540; SSSE3-NEXT: pshufb %xmm0, %xmm3 37541; SSSE3-NEXT: paddb %xmm4, %xmm3 37542; SSSE3-NEXT: pxor %xmm0, %xmm0 37543; SSSE3-NEXT: psadbw %xmm3, %xmm0 37544; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 37545; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37546; SSSE3-NEXT: movdqa %xmm1, %xmm2 37547; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 37548; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37549; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 37550; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37551; SSSE3-NEXT: pand %xmm3, %xmm1 37552; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37553; SSSE3-NEXT: por %xmm1, %xmm0 37554; SSSE3-NEXT: retq 37555; 37556; SSE41-LABEL: ult_57_v2i64: 37557; SSE41: # %bb.0: 37558; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37559; SSE41-NEXT: movdqa %xmm0, %xmm2 37560; SSE41-NEXT: pand %xmm1, %xmm2 37561; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37562; SSE41-NEXT: movdqa %xmm3, %xmm4 37563; SSE41-NEXT: pshufb %xmm2, %xmm4 37564; SSE41-NEXT: psrlw $4, %xmm0 37565; SSE41-NEXT: pand %xmm1, %xmm0 37566; SSE41-NEXT: pshufb %xmm0, %xmm3 37567; SSE41-NEXT: paddb %xmm4, %xmm3 37568; SSE41-NEXT: pxor %xmm0, %xmm0 37569; SSE41-NEXT: psadbw %xmm3, %xmm0 37570; SSE41-NEXT: por {{.*}}(%rip), %xmm0 37571; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37572; SSE41-NEXT: movdqa %xmm1, %xmm2 37573; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 37574; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37575; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 37576; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37577; SSE41-NEXT: pand %xmm3, %xmm1 37578; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37579; SSE41-NEXT: por %xmm1, %xmm0 37580; SSE41-NEXT: retq 37581; 37582; AVX1-LABEL: ult_57_v2i64: 37583; AVX1: # %bb.0: 37584; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37585; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 37586; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37587; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37588; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 37589; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 37590; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37591; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37592; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 37593; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37594; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] 37595; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37596; AVX1-NEXT: retq 37597; 37598; AVX2-LABEL: ult_57_v2i64: 37599; AVX2: # %bb.0: 37600; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37601; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 37602; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37603; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37604; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 37605; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 37606; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37607; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37608; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 37609; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37610; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] 37611; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37612; AVX2-NEXT: retq 37613; 37614; AVX512VPOPCNTDQ-LABEL: ult_57_v2i64: 37615; AVX512VPOPCNTDQ: # %bb.0: 37616; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37617; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 37618; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] 37619; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37620; AVX512VPOPCNTDQ-NEXT: vzeroupper 37621; AVX512VPOPCNTDQ-NEXT: retq 37622; 37623; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64: 37624; AVX512VPOPCNTDQVL: # %bb.0: 37625; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 37626; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 37627; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37628; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37629; AVX512VPOPCNTDQVL-NEXT: retq 37630; 37631; BITALG_NOVLX-LABEL: ult_57_v2i64: 37632; BITALG_NOVLX: # %bb.0: 37633; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37634; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 37635; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 37636; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37637; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] 37638; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37639; BITALG_NOVLX-NEXT: vzeroupper 37640; BITALG_NOVLX-NEXT: retq 37641; 37642; BITALG-LABEL: ult_57_v2i64: 37643; BITALG: # %bb.0: 37644; BITALG-NEXT: vpopcntb %xmm0, %xmm0 37645; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 37646; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37647; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 37648; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37649; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37650; BITALG-NEXT: retq 37651 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 37652 %3 = icmp ult <2 x i64> %2, <i64 57, i64 57> 37653 %4 = sext <2 x i1> %3 to <2 x i64> 37654 ret <2 x i64> %4 37655} 37656 37657define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { 37658; SSE2-LABEL: ugt_57_v2i64: 37659; SSE2: # %bb.0: 37660; SSE2-NEXT: movdqa %xmm0, %xmm1 37661; SSE2-NEXT: psrlw $1, %xmm1 37662; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37663; SSE2-NEXT: psubb %xmm1, %xmm0 37664; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37665; SSE2-NEXT: movdqa %xmm0, %xmm2 37666; SSE2-NEXT: pand %xmm1, %xmm2 37667; SSE2-NEXT: psrlw $2, %xmm0 37668; SSE2-NEXT: pand %xmm1, %xmm0 37669; SSE2-NEXT: paddb %xmm2, %xmm0 37670; SSE2-NEXT: movdqa %xmm0, %xmm1 37671; SSE2-NEXT: psrlw $4, %xmm1 37672; SSE2-NEXT: paddb %xmm0, %xmm1 37673; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37674; SSE2-NEXT: pxor %xmm0, %xmm0 37675; SSE2-NEXT: psadbw %xmm1, %xmm0 37676; SSE2-NEXT: por {{.*}}(%rip), %xmm0 37677; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37678; SSE2-NEXT: movdqa %xmm0, %xmm2 37679; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 37680; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37681; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 37682; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37683; SSE2-NEXT: pand %xmm3, %xmm1 37684; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37685; SSE2-NEXT: por %xmm1, %xmm0 37686; SSE2-NEXT: retq 37687; 37688; SSE3-LABEL: ugt_57_v2i64: 37689; SSE3: # %bb.0: 37690; SSE3-NEXT: movdqa %xmm0, %xmm1 37691; SSE3-NEXT: psrlw $1, %xmm1 37692; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37693; SSE3-NEXT: psubb %xmm1, %xmm0 37694; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37695; SSE3-NEXT: movdqa %xmm0, %xmm2 37696; SSE3-NEXT: pand %xmm1, %xmm2 37697; SSE3-NEXT: psrlw $2, %xmm0 37698; SSE3-NEXT: pand %xmm1, %xmm0 37699; SSE3-NEXT: paddb %xmm2, %xmm0 37700; SSE3-NEXT: movdqa %xmm0, %xmm1 37701; SSE3-NEXT: psrlw $4, %xmm1 37702; SSE3-NEXT: paddb %xmm0, %xmm1 37703; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37704; SSE3-NEXT: pxor %xmm0, %xmm0 37705; SSE3-NEXT: psadbw %xmm1, %xmm0 37706; SSE3-NEXT: por {{.*}}(%rip), %xmm0 37707; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37708; SSE3-NEXT: movdqa %xmm0, %xmm2 37709; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 37710; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37711; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 37712; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37713; SSE3-NEXT: pand %xmm3, %xmm1 37714; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37715; SSE3-NEXT: por %xmm1, %xmm0 37716; SSE3-NEXT: retq 37717; 37718; SSSE3-LABEL: ugt_57_v2i64: 37719; SSSE3: # %bb.0: 37720; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37721; SSSE3-NEXT: movdqa %xmm0, %xmm2 37722; SSSE3-NEXT: pand %xmm1, %xmm2 37723; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37724; SSSE3-NEXT: movdqa %xmm3, %xmm4 37725; SSSE3-NEXT: pshufb %xmm2, %xmm4 37726; SSSE3-NEXT: psrlw $4, %xmm0 37727; SSSE3-NEXT: pand %xmm1, %xmm0 37728; SSSE3-NEXT: pshufb %xmm0, %xmm3 37729; SSSE3-NEXT: paddb %xmm4, %xmm3 37730; SSSE3-NEXT: pxor %xmm0, %xmm0 37731; SSSE3-NEXT: psadbw %xmm3, %xmm0 37732; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 37733; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37734; SSSE3-NEXT: movdqa %xmm0, %xmm2 37735; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 37736; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37737; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 37738; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37739; SSSE3-NEXT: pand %xmm3, %xmm1 37740; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37741; SSSE3-NEXT: por %xmm1, %xmm0 37742; SSSE3-NEXT: retq 37743; 37744; SSE41-LABEL: ugt_57_v2i64: 37745; SSE41: # %bb.0: 37746; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37747; SSE41-NEXT: movdqa %xmm0, %xmm2 37748; SSE41-NEXT: pand %xmm1, %xmm2 37749; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37750; SSE41-NEXT: movdqa %xmm3, %xmm4 37751; SSE41-NEXT: pshufb %xmm2, %xmm4 37752; SSE41-NEXT: psrlw $4, %xmm0 37753; SSE41-NEXT: pand %xmm1, %xmm0 37754; SSE41-NEXT: pshufb %xmm0, %xmm3 37755; SSE41-NEXT: paddb %xmm4, %xmm3 37756; SSE41-NEXT: pxor %xmm0, %xmm0 37757; SSE41-NEXT: psadbw %xmm3, %xmm0 37758; SSE41-NEXT: por {{.*}}(%rip), %xmm0 37759; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] 37760; SSE41-NEXT: movdqa %xmm0, %xmm2 37761; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 37762; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37763; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 37764; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37765; SSE41-NEXT: pand %xmm3, %xmm1 37766; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37767; SSE41-NEXT: por %xmm1, %xmm0 37768; SSE41-NEXT: retq 37769; 37770; AVX1-LABEL: ugt_57_v2i64: 37771; AVX1: # %bb.0: 37772; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37773; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 37774; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37775; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37776; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 37777; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 37778; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37779; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37780; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 37781; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37782; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37783; AVX1-NEXT: retq 37784; 37785; AVX2-LABEL: ugt_57_v2i64: 37786; AVX2: # %bb.0: 37787; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37788; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 37789; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37790; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37791; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 37792; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 37793; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37794; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37795; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 37796; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37797; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37798; AVX2-NEXT: retq 37799; 37800; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64: 37801; AVX512VPOPCNTDQ: # %bb.0: 37802; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37803; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 37804; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37805; AVX512VPOPCNTDQ-NEXT: vzeroupper 37806; AVX512VPOPCNTDQ-NEXT: retq 37807; 37808; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64: 37809; AVX512VPOPCNTDQVL: # %bb.0: 37810; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 37811; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 37812; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37813; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37814; AVX512VPOPCNTDQVL-NEXT: retq 37815; 37816; BITALG_NOVLX-LABEL: ugt_57_v2i64: 37817; BITALG_NOVLX: # %bb.0: 37818; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37819; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 37820; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 37821; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37822; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 37823; BITALG_NOVLX-NEXT: vzeroupper 37824; BITALG_NOVLX-NEXT: retq 37825; 37826; BITALG-LABEL: ugt_57_v2i64: 37827; BITALG: # %bb.0: 37828; BITALG-NEXT: vpopcntb %xmm0, %xmm0 37829; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 37830; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37831; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 37832; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 37833; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 37834; BITALG-NEXT: retq 37835 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 37836 %3 = icmp ugt <2 x i64> %2, <i64 57, i64 57> 37837 %4 = sext <2 x i1> %3 to <2 x i64> 37838 ret <2 x i64> %4 37839} 37840 37841define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { 37842; SSE2-LABEL: ult_58_v2i64: 37843; SSE2: # %bb.0: 37844; SSE2-NEXT: movdqa %xmm0, %xmm1 37845; SSE2-NEXT: psrlw $1, %xmm1 37846; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37847; SSE2-NEXT: psubb %xmm1, %xmm0 37848; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37849; SSE2-NEXT: movdqa %xmm0, %xmm2 37850; SSE2-NEXT: pand %xmm1, %xmm2 37851; SSE2-NEXT: psrlw $2, %xmm0 37852; SSE2-NEXT: pand %xmm1, %xmm0 37853; SSE2-NEXT: paddb %xmm2, %xmm0 37854; SSE2-NEXT: movdqa %xmm0, %xmm1 37855; SSE2-NEXT: psrlw $4, %xmm1 37856; SSE2-NEXT: paddb %xmm0, %xmm1 37857; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 37858; SSE2-NEXT: pxor %xmm0, %xmm0 37859; SSE2-NEXT: psadbw %xmm1, %xmm0 37860; SSE2-NEXT: por {{.*}}(%rip), %xmm0 37861; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 37862; SSE2-NEXT: movdqa %xmm1, %xmm2 37863; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 37864; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37865; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 37866; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37867; SSE2-NEXT: pand %xmm3, %xmm1 37868; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37869; SSE2-NEXT: por %xmm1, %xmm0 37870; SSE2-NEXT: retq 37871; 37872; SSE3-LABEL: ult_58_v2i64: 37873; SSE3: # %bb.0: 37874; SSE3-NEXT: movdqa %xmm0, %xmm1 37875; SSE3-NEXT: psrlw $1, %xmm1 37876; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37877; SSE3-NEXT: psubb %xmm1, %xmm0 37878; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 37879; SSE3-NEXT: movdqa %xmm0, %xmm2 37880; SSE3-NEXT: pand %xmm1, %xmm2 37881; SSE3-NEXT: psrlw $2, %xmm0 37882; SSE3-NEXT: pand %xmm1, %xmm0 37883; SSE3-NEXT: paddb %xmm2, %xmm0 37884; SSE3-NEXT: movdqa %xmm0, %xmm1 37885; SSE3-NEXT: psrlw $4, %xmm1 37886; SSE3-NEXT: paddb %xmm0, %xmm1 37887; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 37888; SSE3-NEXT: pxor %xmm0, %xmm0 37889; SSE3-NEXT: psadbw %xmm1, %xmm0 37890; SSE3-NEXT: por {{.*}}(%rip), %xmm0 37891; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 37892; SSE3-NEXT: movdqa %xmm1, %xmm2 37893; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 37894; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37895; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 37896; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37897; SSE3-NEXT: pand %xmm3, %xmm1 37898; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37899; SSE3-NEXT: por %xmm1, %xmm0 37900; SSE3-NEXT: retq 37901; 37902; SSSE3-LABEL: ult_58_v2i64: 37903; SSSE3: # %bb.0: 37904; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37905; SSSE3-NEXT: movdqa %xmm0, %xmm2 37906; SSSE3-NEXT: pand %xmm1, %xmm2 37907; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37908; SSSE3-NEXT: movdqa %xmm3, %xmm4 37909; SSSE3-NEXT: pshufb %xmm2, %xmm4 37910; SSSE3-NEXT: psrlw $4, %xmm0 37911; SSSE3-NEXT: pand %xmm1, %xmm0 37912; SSSE3-NEXT: pshufb %xmm0, %xmm3 37913; SSSE3-NEXT: paddb %xmm4, %xmm3 37914; SSSE3-NEXT: pxor %xmm0, %xmm0 37915; SSSE3-NEXT: psadbw %xmm3, %xmm0 37916; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 37917; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 37918; SSSE3-NEXT: movdqa %xmm1, %xmm2 37919; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 37920; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37921; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 37922; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37923; SSSE3-NEXT: pand %xmm3, %xmm1 37924; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37925; SSSE3-NEXT: por %xmm1, %xmm0 37926; SSSE3-NEXT: retq 37927; 37928; SSE41-LABEL: ult_58_v2i64: 37929; SSE41: # %bb.0: 37930; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37931; SSE41-NEXT: movdqa %xmm0, %xmm2 37932; SSE41-NEXT: pand %xmm1, %xmm2 37933; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37934; SSE41-NEXT: movdqa %xmm3, %xmm4 37935; SSE41-NEXT: pshufb %xmm2, %xmm4 37936; SSE41-NEXT: psrlw $4, %xmm0 37937; SSE41-NEXT: pand %xmm1, %xmm0 37938; SSE41-NEXT: pshufb %xmm0, %xmm3 37939; SSE41-NEXT: paddb %xmm4, %xmm3 37940; SSE41-NEXT: pxor %xmm0, %xmm0 37941; SSE41-NEXT: psadbw %xmm3, %xmm0 37942; SSE41-NEXT: por {{.*}}(%rip), %xmm0 37943; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 37944; SSE41-NEXT: movdqa %xmm1, %xmm2 37945; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 37946; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 37947; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 37948; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 37949; SSE41-NEXT: pand %xmm3, %xmm1 37950; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 37951; SSE41-NEXT: por %xmm1, %xmm0 37952; SSE41-NEXT: retq 37953; 37954; AVX1-LABEL: ult_58_v2i64: 37955; AVX1: # %bb.0: 37956; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37957; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 37958; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37959; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37960; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 37961; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 37962; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37963; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37964; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 37965; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37966; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] 37967; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37968; AVX1-NEXT: retq 37969; 37970; AVX2-LABEL: ult_58_v2i64: 37971; AVX2: # %bb.0: 37972; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 37973; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 37974; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37975; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 37976; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 37977; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 37978; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 37979; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 37980; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 37981; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 37982; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] 37983; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37984; AVX2-NEXT: retq 37985; 37986; AVX512VPOPCNTDQ-LABEL: ult_58_v2i64: 37987; AVX512VPOPCNTDQ: # %bb.0: 37988; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37989; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 37990; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] 37991; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 37992; AVX512VPOPCNTDQ-NEXT: vzeroupper 37993; AVX512VPOPCNTDQ-NEXT: retq 37994; 37995; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64: 37996; AVX512VPOPCNTDQVL: # %bb.0: 37997; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 37998; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 37999; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38000; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38001; AVX512VPOPCNTDQVL-NEXT: retq 38002; 38003; BITALG_NOVLX-LABEL: ult_58_v2i64: 38004; BITALG_NOVLX: # %bb.0: 38005; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38006; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 38007; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 38008; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38009; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] 38010; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38011; BITALG_NOVLX-NEXT: vzeroupper 38012; BITALG_NOVLX-NEXT: retq 38013; 38014; BITALG-LABEL: ult_58_v2i64: 38015; BITALG: # %bb.0: 38016; BITALG-NEXT: vpopcntb %xmm0, %xmm0 38017; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 38018; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38019; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 38020; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38021; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38022; BITALG-NEXT: retq 38023 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 38024 %3 = icmp ult <2 x i64> %2, <i64 58, i64 58> 38025 %4 = sext <2 x i1> %3 to <2 x i64> 38026 ret <2 x i64> %4 38027} 38028 38029define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { 38030; SSE2-LABEL: ugt_58_v2i64: 38031; SSE2: # %bb.0: 38032; SSE2-NEXT: movdqa %xmm0, %xmm1 38033; SSE2-NEXT: psrlw $1, %xmm1 38034; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38035; SSE2-NEXT: psubb %xmm1, %xmm0 38036; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38037; SSE2-NEXT: movdqa %xmm0, %xmm2 38038; SSE2-NEXT: pand %xmm1, %xmm2 38039; SSE2-NEXT: psrlw $2, %xmm0 38040; SSE2-NEXT: pand %xmm1, %xmm0 38041; SSE2-NEXT: paddb %xmm2, %xmm0 38042; SSE2-NEXT: movdqa %xmm0, %xmm1 38043; SSE2-NEXT: psrlw $4, %xmm1 38044; SSE2-NEXT: paddb %xmm0, %xmm1 38045; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38046; SSE2-NEXT: pxor %xmm0, %xmm0 38047; SSE2-NEXT: psadbw %xmm1, %xmm0 38048; SSE2-NEXT: por {{.*}}(%rip), %xmm0 38049; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 38050; SSE2-NEXT: movdqa %xmm0, %xmm2 38051; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 38052; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38053; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 38054; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38055; SSE2-NEXT: pand %xmm3, %xmm1 38056; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38057; SSE2-NEXT: por %xmm1, %xmm0 38058; SSE2-NEXT: retq 38059; 38060; SSE3-LABEL: ugt_58_v2i64: 38061; SSE3: # %bb.0: 38062; SSE3-NEXT: movdqa %xmm0, %xmm1 38063; SSE3-NEXT: psrlw $1, %xmm1 38064; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38065; SSE3-NEXT: psubb %xmm1, %xmm0 38066; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38067; SSE3-NEXT: movdqa %xmm0, %xmm2 38068; SSE3-NEXT: pand %xmm1, %xmm2 38069; SSE3-NEXT: psrlw $2, %xmm0 38070; SSE3-NEXT: pand %xmm1, %xmm0 38071; SSE3-NEXT: paddb %xmm2, %xmm0 38072; SSE3-NEXT: movdqa %xmm0, %xmm1 38073; SSE3-NEXT: psrlw $4, %xmm1 38074; SSE3-NEXT: paddb %xmm0, %xmm1 38075; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38076; SSE3-NEXT: pxor %xmm0, %xmm0 38077; SSE3-NEXT: psadbw %xmm1, %xmm0 38078; SSE3-NEXT: por {{.*}}(%rip), %xmm0 38079; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 38080; SSE3-NEXT: movdqa %xmm0, %xmm2 38081; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 38082; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38083; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 38084; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38085; SSE3-NEXT: pand %xmm3, %xmm1 38086; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38087; SSE3-NEXT: por %xmm1, %xmm0 38088; SSE3-NEXT: retq 38089; 38090; SSSE3-LABEL: ugt_58_v2i64: 38091; SSSE3: # %bb.0: 38092; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38093; SSSE3-NEXT: movdqa %xmm0, %xmm2 38094; SSSE3-NEXT: pand %xmm1, %xmm2 38095; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38096; SSSE3-NEXT: movdqa %xmm3, %xmm4 38097; SSSE3-NEXT: pshufb %xmm2, %xmm4 38098; SSSE3-NEXT: psrlw $4, %xmm0 38099; SSSE3-NEXT: pand %xmm1, %xmm0 38100; SSSE3-NEXT: pshufb %xmm0, %xmm3 38101; SSSE3-NEXT: paddb %xmm4, %xmm3 38102; SSSE3-NEXT: pxor %xmm0, %xmm0 38103; SSSE3-NEXT: psadbw %xmm3, %xmm0 38104; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 38105; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 38106; SSSE3-NEXT: movdqa %xmm0, %xmm2 38107; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 38108; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38109; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 38110; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38111; SSSE3-NEXT: pand %xmm3, %xmm1 38112; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38113; SSSE3-NEXT: por %xmm1, %xmm0 38114; SSSE3-NEXT: retq 38115; 38116; SSE41-LABEL: ugt_58_v2i64: 38117; SSE41: # %bb.0: 38118; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38119; SSE41-NEXT: movdqa %xmm0, %xmm2 38120; SSE41-NEXT: pand %xmm1, %xmm2 38121; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38122; SSE41-NEXT: movdqa %xmm3, %xmm4 38123; SSE41-NEXT: pshufb %xmm2, %xmm4 38124; SSE41-NEXT: psrlw $4, %xmm0 38125; SSE41-NEXT: pand %xmm1, %xmm0 38126; SSE41-NEXT: pshufb %xmm0, %xmm3 38127; SSE41-NEXT: paddb %xmm4, %xmm3 38128; SSE41-NEXT: pxor %xmm0, %xmm0 38129; SSE41-NEXT: psadbw %xmm3, %xmm0 38130; SSE41-NEXT: por {{.*}}(%rip), %xmm0 38131; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] 38132; SSE41-NEXT: movdqa %xmm0, %xmm2 38133; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 38134; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38135; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 38136; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38137; SSE41-NEXT: pand %xmm3, %xmm1 38138; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38139; SSE41-NEXT: por %xmm1, %xmm0 38140; SSE41-NEXT: retq 38141; 38142; AVX1-LABEL: ugt_58_v2i64: 38143; AVX1: # %bb.0: 38144; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38145; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 38146; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38147; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38148; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 38149; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 38150; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38151; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38152; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 38153; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38154; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38155; AVX1-NEXT: retq 38156; 38157; AVX2-LABEL: ugt_58_v2i64: 38158; AVX2: # %bb.0: 38159; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38160; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 38161; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38162; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38163; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 38164; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 38165; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38166; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38167; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 38168; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38169; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38170; AVX2-NEXT: retq 38171; 38172; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64: 38173; AVX512VPOPCNTDQ: # %bb.0: 38174; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38175; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 38176; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38177; AVX512VPOPCNTDQ-NEXT: vzeroupper 38178; AVX512VPOPCNTDQ-NEXT: retq 38179; 38180; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64: 38181; AVX512VPOPCNTDQVL: # %bb.0: 38182; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 38183; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 38184; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38185; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38186; AVX512VPOPCNTDQVL-NEXT: retq 38187; 38188; BITALG_NOVLX-LABEL: ugt_58_v2i64: 38189; BITALG_NOVLX: # %bb.0: 38190; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38191; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 38192; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 38193; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38194; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38195; BITALG_NOVLX-NEXT: vzeroupper 38196; BITALG_NOVLX-NEXT: retq 38197; 38198; BITALG-LABEL: ugt_58_v2i64: 38199; BITALG: # %bb.0: 38200; BITALG-NEXT: vpopcntb %xmm0, %xmm0 38201; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 38202; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38203; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 38204; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38205; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38206; BITALG-NEXT: retq 38207 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 38208 %3 = icmp ugt <2 x i64> %2, <i64 58, i64 58> 38209 %4 = sext <2 x i1> %3 to <2 x i64> 38210 ret <2 x i64> %4 38211} 38212 38213define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { 38214; SSE2-LABEL: ult_59_v2i64: 38215; SSE2: # %bb.0: 38216; SSE2-NEXT: movdqa %xmm0, %xmm1 38217; SSE2-NEXT: psrlw $1, %xmm1 38218; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38219; SSE2-NEXT: psubb %xmm1, %xmm0 38220; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38221; SSE2-NEXT: movdqa %xmm0, %xmm2 38222; SSE2-NEXT: pand %xmm1, %xmm2 38223; SSE2-NEXT: psrlw $2, %xmm0 38224; SSE2-NEXT: pand %xmm1, %xmm0 38225; SSE2-NEXT: paddb %xmm2, %xmm0 38226; SSE2-NEXT: movdqa %xmm0, %xmm1 38227; SSE2-NEXT: psrlw $4, %xmm1 38228; SSE2-NEXT: paddb %xmm0, %xmm1 38229; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38230; SSE2-NEXT: pxor %xmm0, %xmm0 38231; SSE2-NEXT: psadbw %xmm1, %xmm0 38232; SSE2-NEXT: por {{.*}}(%rip), %xmm0 38233; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38234; SSE2-NEXT: movdqa %xmm1, %xmm2 38235; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 38236; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38237; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 38238; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38239; SSE2-NEXT: pand %xmm3, %xmm1 38240; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38241; SSE2-NEXT: por %xmm1, %xmm0 38242; SSE2-NEXT: retq 38243; 38244; SSE3-LABEL: ult_59_v2i64: 38245; SSE3: # %bb.0: 38246; SSE3-NEXT: movdqa %xmm0, %xmm1 38247; SSE3-NEXT: psrlw $1, %xmm1 38248; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38249; SSE3-NEXT: psubb %xmm1, %xmm0 38250; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38251; SSE3-NEXT: movdqa %xmm0, %xmm2 38252; SSE3-NEXT: pand %xmm1, %xmm2 38253; SSE3-NEXT: psrlw $2, %xmm0 38254; SSE3-NEXT: pand %xmm1, %xmm0 38255; SSE3-NEXT: paddb %xmm2, %xmm0 38256; SSE3-NEXT: movdqa %xmm0, %xmm1 38257; SSE3-NEXT: psrlw $4, %xmm1 38258; SSE3-NEXT: paddb %xmm0, %xmm1 38259; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38260; SSE3-NEXT: pxor %xmm0, %xmm0 38261; SSE3-NEXT: psadbw %xmm1, %xmm0 38262; SSE3-NEXT: por {{.*}}(%rip), %xmm0 38263; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38264; SSE3-NEXT: movdqa %xmm1, %xmm2 38265; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 38266; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38267; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 38268; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38269; SSE3-NEXT: pand %xmm3, %xmm1 38270; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38271; SSE3-NEXT: por %xmm1, %xmm0 38272; SSE3-NEXT: retq 38273; 38274; SSSE3-LABEL: ult_59_v2i64: 38275; SSSE3: # %bb.0: 38276; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38277; SSSE3-NEXT: movdqa %xmm0, %xmm2 38278; SSSE3-NEXT: pand %xmm1, %xmm2 38279; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38280; SSSE3-NEXT: movdqa %xmm3, %xmm4 38281; SSSE3-NEXT: pshufb %xmm2, %xmm4 38282; SSSE3-NEXT: psrlw $4, %xmm0 38283; SSSE3-NEXT: pand %xmm1, %xmm0 38284; SSSE3-NEXT: pshufb %xmm0, %xmm3 38285; SSSE3-NEXT: paddb %xmm4, %xmm3 38286; SSSE3-NEXT: pxor %xmm0, %xmm0 38287; SSSE3-NEXT: psadbw %xmm3, %xmm0 38288; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 38289; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38290; SSSE3-NEXT: movdqa %xmm1, %xmm2 38291; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 38292; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38293; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 38294; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38295; SSSE3-NEXT: pand %xmm3, %xmm1 38296; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38297; SSSE3-NEXT: por %xmm1, %xmm0 38298; SSSE3-NEXT: retq 38299; 38300; SSE41-LABEL: ult_59_v2i64: 38301; SSE41: # %bb.0: 38302; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38303; SSE41-NEXT: movdqa %xmm0, %xmm2 38304; SSE41-NEXT: pand %xmm1, %xmm2 38305; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38306; SSE41-NEXT: movdqa %xmm3, %xmm4 38307; SSE41-NEXT: pshufb %xmm2, %xmm4 38308; SSE41-NEXT: psrlw $4, %xmm0 38309; SSE41-NEXT: pand %xmm1, %xmm0 38310; SSE41-NEXT: pshufb %xmm0, %xmm3 38311; SSE41-NEXT: paddb %xmm4, %xmm3 38312; SSE41-NEXT: pxor %xmm0, %xmm0 38313; SSE41-NEXT: psadbw %xmm3, %xmm0 38314; SSE41-NEXT: por {{.*}}(%rip), %xmm0 38315; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38316; SSE41-NEXT: movdqa %xmm1, %xmm2 38317; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 38318; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38319; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 38320; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38321; SSE41-NEXT: pand %xmm3, %xmm1 38322; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38323; SSE41-NEXT: por %xmm1, %xmm0 38324; SSE41-NEXT: retq 38325; 38326; AVX1-LABEL: ult_59_v2i64: 38327; AVX1: # %bb.0: 38328; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38329; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 38330; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38331; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38332; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 38333; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 38334; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38335; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38336; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 38337; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38338; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] 38339; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38340; AVX1-NEXT: retq 38341; 38342; AVX2-LABEL: ult_59_v2i64: 38343; AVX2: # %bb.0: 38344; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38345; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 38346; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38347; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38348; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 38349; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 38350; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38351; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38352; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 38353; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38354; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] 38355; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38356; AVX2-NEXT: retq 38357; 38358; AVX512VPOPCNTDQ-LABEL: ult_59_v2i64: 38359; AVX512VPOPCNTDQ: # %bb.0: 38360; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38361; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 38362; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] 38363; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38364; AVX512VPOPCNTDQ-NEXT: vzeroupper 38365; AVX512VPOPCNTDQ-NEXT: retq 38366; 38367; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64: 38368; AVX512VPOPCNTDQVL: # %bb.0: 38369; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 38370; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 38371; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38372; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38373; AVX512VPOPCNTDQVL-NEXT: retq 38374; 38375; BITALG_NOVLX-LABEL: ult_59_v2i64: 38376; BITALG_NOVLX: # %bb.0: 38377; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38378; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 38379; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 38380; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38381; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] 38382; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38383; BITALG_NOVLX-NEXT: vzeroupper 38384; BITALG_NOVLX-NEXT: retq 38385; 38386; BITALG-LABEL: ult_59_v2i64: 38387; BITALG: # %bb.0: 38388; BITALG-NEXT: vpopcntb %xmm0, %xmm0 38389; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 38390; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38391; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 38392; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38393; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38394; BITALG-NEXT: retq 38395 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 38396 %3 = icmp ult <2 x i64> %2, <i64 59, i64 59> 38397 %4 = sext <2 x i1> %3 to <2 x i64> 38398 ret <2 x i64> %4 38399} 38400 38401define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { 38402; SSE2-LABEL: ugt_59_v2i64: 38403; SSE2: # %bb.0: 38404; SSE2-NEXT: movdqa %xmm0, %xmm1 38405; SSE2-NEXT: psrlw $1, %xmm1 38406; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38407; SSE2-NEXT: psubb %xmm1, %xmm0 38408; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38409; SSE2-NEXT: movdqa %xmm0, %xmm2 38410; SSE2-NEXT: pand %xmm1, %xmm2 38411; SSE2-NEXT: psrlw $2, %xmm0 38412; SSE2-NEXT: pand %xmm1, %xmm0 38413; SSE2-NEXT: paddb %xmm2, %xmm0 38414; SSE2-NEXT: movdqa %xmm0, %xmm1 38415; SSE2-NEXT: psrlw $4, %xmm1 38416; SSE2-NEXT: paddb %xmm0, %xmm1 38417; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38418; SSE2-NEXT: pxor %xmm0, %xmm0 38419; SSE2-NEXT: psadbw %xmm1, %xmm0 38420; SSE2-NEXT: por {{.*}}(%rip), %xmm0 38421; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38422; SSE2-NEXT: movdqa %xmm0, %xmm2 38423; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 38424; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38425; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 38426; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38427; SSE2-NEXT: pand %xmm3, %xmm1 38428; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38429; SSE2-NEXT: por %xmm1, %xmm0 38430; SSE2-NEXT: retq 38431; 38432; SSE3-LABEL: ugt_59_v2i64: 38433; SSE3: # %bb.0: 38434; SSE3-NEXT: movdqa %xmm0, %xmm1 38435; SSE3-NEXT: psrlw $1, %xmm1 38436; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38437; SSE3-NEXT: psubb %xmm1, %xmm0 38438; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38439; SSE3-NEXT: movdqa %xmm0, %xmm2 38440; SSE3-NEXT: pand %xmm1, %xmm2 38441; SSE3-NEXT: psrlw $2, %xmm0 38442; SSE3-NEXT: pand %xmm1, %xmm0 38443; SSE3-NEXT: paddb %xmm2, %xmm0 38444; SSE3-NEXT: movdqa %xmm0, %xmm1 38445; SSE3-NEXT: psrlw $4, %xmm1 38446; SSE3-NEXT: paddb %xmm0, %xmm1 38447; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38448; SSE3-NEXT: pxor %xmm0, %xmm0 38449; SSE3-NEXT: psadbw %xmm1, %xmm0 38450; SSE3-NEXT: por {{.*}}(%rip), %xmm0 38451; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38452; SSE3-NEXT: movdqa %xmm0, %xmm2 38453; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 38454; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38455; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 38456; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38457; SSE3-NEXT: pand %xmm3, %xmm1 38458; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38459; SSE3-NEXT: por %xmm1, %xmm0 38460; SSE3-NEXT: retq 38461; 38462; SSSE3-LABEL: ugt_59_v2i64: 38463; SSSE3: # %bb.0: 38464; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38465; SSSE3-NEXT: movdqa %xmm0, %xmm2 38466; SSSE3-NEXT: pand %xmm1, %xmm2 38467; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38468; SSSE3-NEXT: movdqa %xmm3, %xmm4 38469; SSSE3-NEXT: pshufb %xmm2, %xmm4 38470; SSSE3-NEXT: psrlw $4, %xmm0 38471; SSSE3-NEXT: pand %xmm1, %xmm0 38472; SSSE3-NEXT: pshufb %xmm0, %xmm3 38473; SSSE3-NEXT: paddb %xmm4, %xmm3 38474; SSSE3-NEXT: pxor %xmm0, %xmm0 38475; SSSE3-NEXT: psadbw %xmm3, %xmm0 38476; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 38477; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38478; SSSE3-NEXT: movdqa %xmm0, %xmm2 38479; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 38480; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38481; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 38482; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38483; SSSE3-NEXT: pand %xmm3, %xmm1 38484; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38485; SSSE3-NEXT: por %xmm1, %xmm0 38486; SSSE3-NEXT: retq 38487; 38488; SSE41-LABEL: ugt_59_v2i64: 38489; SSE41: # %bb.0: 38490; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38491; SSE41-NEXT: movdqa %xmm0, %xmm2 38492; SSE41-NEXT: pand %xmm1, %xmm2 38493; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38494; SSE41-NEXT: movdqa %xmm3, %xmm4 38495; SSE41-NEXT: pshufb %xmm2, %xmm4 38496; SSE41-NEXT: psrlw $4, %xmm0 38497; SSE41-NEXT: pand %xmm1, %xmm0 38498; SSE41-NEXT: pshufb %xmm0, %xmm3 38499; SSE41-NEXT: paddb %xmm4, %xmm3 38500; SSE41-NEXT: pxor %xmm0, %xmm0 38501; SSE41-NEXT: psadbw %xmm3, %xmm0 38502; SSE41-NEXT: por {{.*}}(%rip), %xmm0 38503; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] 38504; SSE41-NEXT: movdqa %xmm0, %xmm2 38505; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 38506; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38507; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 38508; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38509; SSE41-NEXT: pand %xmm3, %xmm1 38510; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38511; SSE41-NEXT: por %xmm1, %xmm0 38512; SSE41-NEXT: retq 38513; 38514; AVX1-LABEL: ugt_59_v2i64: 38515; AVX1: # %bb.0: 38516; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38517; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 38518; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38519; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38520; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 38521; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 38522; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38523; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38524; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 38525; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38526; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38527; AVX1-NEXT: retq 38528; 38529; AVX2-LABEL: ugt_59_v2i64: 38530; AVX2: # %bb.0: 38531; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38532; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 38533; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38534; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38535; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 38536; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 38537; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38538; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38539; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 38540; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38541; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38542; AVX2-NEXT: retq 38543; 38544; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64: 38545; AVX512VPOPCNTDQ: # %bb.0: 38546; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38547; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 38548; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38549; AVX512VPOPCNTDQ-NEXT: vzeroupper 38550; AVX512VPOPCNTDQ-NEXT: retq 38551; 38552; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64: 38553; AVX512VPOPCNTDQVL: # %bb.0: 38554; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 38555; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 38556; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38557; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38558; AVX512VPOPCNTDQVL-NEXT: retq 38559; 38560; BITALG_NOVLX-LABEL: ugt_59_v2i64: 38561; BITALG_NOVLX: # %bb.0: 38562; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38563; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 38564; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 38565; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38566; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38567; BITALG_NOVLX-NEXT: vzeroupper 38568; BITALG_NOVLX-NEXT: retq 38569; 38570; BITALG-LABEL: ugt_59_v2i64: 38571; BITALG: # %bb.0: 38572; BITALG-NEXT: vpopcntb %xmm0, %xmm0 38573; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 38574; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38575; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 38576; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38577; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38578; BITALG-NEXT: retq 38579 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 38580 %3 = icmp ugt <2 x i64> %2, <i64 59, i64 59> 38581 %4 = sext <2 x i1> %3 to <2 x i64> 38582 ret <2 x i64> %4 38583} 38584 38585define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { 38586; SSE2-LABEL: ult_60_v2i64: 38587; SSE2: # %bb.0: 38588; SSE2-NEXT: movdqa %xmm0, %xmm1 38589; SSE2-NEXT: psrlw $1, %xmm1 38590; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38591; SSE2-NEXT: psubb %xmm1, %xmm0 38592; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38593; SSE2-NEXT: movdqa %xmm0, %xmm2 38594; SSE2-NEXT: pand %xmm1, %xmm2 38595; SSE2-NEXT: psrlw $2, %xmm0 38596; SSE2-NEXT: pand %xmm1, %xmm0 38597; SSE2-NEXT: paddb %xmm2, %xmm0 38598; SSE2-NEXT: movdqa %xmm0, %xmm1 38599; SSE2-NEXT: psrlw $4, %xmm1 38600; SSE2-NEXT: paddb %xmm0, %xmm1 38601; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38602; SSE2-NEXT: pxor %xmm0, %xmm0 38603; SSE2-NEXT: psadbw %xmm1, %xmm0 38604; SSE2-NEXT: por {{.*}}(%rip), %xmm0 38605; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38606; SSE2-NEXT: movdqa %xmm1, %xmm2 38607; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 38608; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38609; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 38610; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38611; SSE2-NEXT: pand %xmm3, %xmm1 38612; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38613; SSE2-NEXT: por %xmm1, %xmm0 38614; SSE2-NEXT: retq 38615; 38616; SSE3-LABEL: ult_60_v2i64: 38617; SSE3: # %bb.0: 38618; SSE3-NEXT: movdqa %xmm0, %xmm1 38619; SSE3-NEXT: psrlw $1, %xmm1 38620; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38621; SSE3-NEXT: psubb %xmm1, %xmm0 38622; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38623; SSE3-NEXT: movdqa %xmm0, %xmm2 38624; SSE3-NEXT: pand %xmm1, %xmm2 38625; SSE3-NEXT: psrlw $2, %xmm0 38626; SSE3-NEXT: pand %xmm1, %xmm0 38627; SSE3-NEXT: paddb %xmm2, %xmm0 38628; SSE3-NEXT: movdqa %xmm0, %xmm1 38629; SSE3-NEXT: psrlw $4, %xmm1 38630; SSE3-NEXT: paddb %xmm0, %xmm1 38631; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38632; SSE3-NEXT: pxor %xmm0, %xmm0 38633; SSE3-NEXT: psadbw %xmm1, %xmm0 38634; SSE3-NEXT: por {{.*}}(%rip), %xmm0 38635; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38636; SSE3-NEXT: movdqa %xmm1, %xmm2 38637; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 38638; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38639; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 38640; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38641; SSE3-NEXT: pand %xmm3, %xmm1 38642; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38643; SSE3-NEXT: por %xmm1, %xmm0 38644; SSE3-NEXT: retq 38645; 38646; SSSE3-LABEL: ult_60_v2i64: 38647; SSSE3: # %bb.0: 38648; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38649; SSSE3-NEXT: movdqa %xmm0, %xmm2 38650; SSSE3-NEXT: pand %xmm1, %xmm2 38651; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38652; SSSE3-NEXT: movdqa %xmm3, %xmm4 38653; SSSE3-NEXT: pshufb %xmm2, %xmm4 38654; SSSE3-NEXT: psrlw $4, %xmm0 38655; SSSE3-NEXT: pand %xmm1, %xmm0 38656; SSSE3-NEXT: pshufb %xmm0, %xmm3 38657; SSSE3-NEXT: paddb %xmm4, %xmm3 38658; SSSE3-NEXT: pxor %xmm0, %xmm0 38659; SSSE3-NEXT: psadbw %xmm3, %xmm0 38660; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 38661; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38662; SSSE3-NEXT: movdqa %xmm1, %xmm2 38663; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 38664; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38665; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 38666; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38667; SSSE3-NEXT: pand %xmm3, %xmm1 38668; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38669; SSSE3-NEXT: por %xmm1, %xmm0 38670; SSSE3-NEXT: retq 38671; 38672; SSE41-LABEL: ult_60_v2i64: 38673; SSE41: # %bb.0: 38674; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38675; SSE41-NEXT: movdqa %xmm0, %xmm2 38676; SSE41-NEXT: pand %xmm1, %xmm2 38677; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38678; SSE41-NEXT: movdqa %xmm3, %xmm4 38679; SSE41-NEXT: pshufb %xmm2, %xmm4 38680; SSE41-NEXT: psrlw $4, %xmm0 38681; SSE41-NEXT: pand %xmm1, %xmm0 38682; SSE41-NEXT: pshufb %xmm0, %xmm3 38683; SSE41-NEXT: paddb %xmm4, %xmm3 38684; SSE41-NEXT: pxor %xmm0, %xmm0 38685; SSE41-NEXT: psadbw %xmm3, %xmm0 38686; SSE41-NEXT: por {{.*}}(%rip), %xmm0 38687; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38688; SSE41-NEXT: movdqa %xmm1, %xmm2 38689; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 38690; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38691; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 38692; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38693; SSE41-NEXT: pand %xmm3, %xmm1 38694; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38695; SSE41-NEXT: por %xmm1, %xmm0 38696; SSE41-NEXT: retq 38697; 38698; AVX1-LABEL: ult_60_v2i64: 38699; AVX1: # %bb.0: 38700; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38701; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 38702; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38703; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38704; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 38705; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 38706; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38707; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38708; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 38709; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38710; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] 38711; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38712; AVX1-NEXT: retq 38713; 38714; AVX2-LABEL: ult_60_v2i64: 38715; AVX2: # %bb.0: 38716; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38717; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 38718; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38719; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38720; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 38721; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 38722; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38723; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38724; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 38725; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38726; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] 38727; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38728; AVX2-NEXT: retq 38729; 38730; AVX512VPOPCNTDQ-LABEL: ult_60_v2i64: 38731; AVX512VPOPCNTDQ: # %bb.0: 38732; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38733; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 38734; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] 38735; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38736; AVX512VPOPCNTDQ-NEXT: vzeroupper 38737; AVX512VPOPCNTDQ-NEXT: retq 38738; 38739; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64: 38740; AVX512VPOPCNTDQVL: # %bb.0: 38741; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 38742; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 38743; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38744; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38745; AVX512VPOPCNTDQVL-NEXT: retq 38746; 38747; BITALG_NOVLX-LABEL: ult_60_v2i64: 38748; BITALG_NOVLX: # %bb.0: 38749; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38750; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 38751; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 38752; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38753; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] 38754; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 38755; BITALG_NOVLX-NEXT: vzeroupper 38756; BITALG_NOVLX-NEXT: retq 38757; 38758; BITALG-LABEL: ult_60_v2i64: 38759; BITALG: # %bb.0: 38760; BITALG-NEXT: vpopcntb %xmm0, %xmm0 38761; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 38762; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38763; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 38764; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38765; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38766; BITALG-NEXT: retq 38767 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 38768 %3 = icmp ult <2 x i64> %2, <i64 60, i64 60> 38769 %4 = sext <2 x i1> %3 to <2 x i64> 38770 ret <2 x i64> %4 38771} 38772 38773define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { 38774; SSE2-LABEL: ugt_60_v2i64: 38775; SSE2: # %bb.0: 38776; SSE2-NEXT: movdqa %xmm0, %xmm1 38777; SSE2-NEXT: psrlw $1, %xmm1 38778; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38779; SSE2-NEXT: psubb %xmm1, %xmm0 38780; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38781; SSE2-NEXT: movdqa %xmm0, %xmm2 38782; SSE2-NEXT: pand %xmm1, %xmm2 38783; SSE2-NEXT: psrlw $2, %xmm0 38784; SSE2-NEXT: pand %xmm1, %xmm0 38785; SSE2-NEXT: paddb %xmm2, %xmm0 38786; SSE2-NEXT: movdqa %xmm0, %xmm1 38787; SSE2-NEXT: psrlw $4, %xmm1 38788; SSE2-NEXT: paddb %xmm0, %xmm1 38789; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38790; SSE2-NEXT: pxor %xmm0, %xmm0 38791; SSE2-NEXT: psadbw %xmm1, %xmm0 38792; SSE2-NEXT: por {{.*}}(%rip), %xmm0 38793; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38794; SSE2-NEXT: movdqa %xmm0, %xmm2 38795; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 38796; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38797; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 38798; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38799; SSE2-NEXT: pand %xmm3, %xmm1 38800; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38801; SSE2-NEXT: por %xmm1, %xmm0 38802; SSE2-NEXT: retq 38803; 38804; SSE3-LABEL: ugt_60_v2i64: 38805; SSE3: # %bb.0: 38806; SSE3-NEXT: movdqa %xmm0, %xmm1 38807; SSE3-NEXT: psrlw $1, %xmm1 38808; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38809; SSE3-NEXT: psubb %xmm1, %xmm0 38810; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38811; SSE3-NEXT: movdqa %xmm0, %xmm2 38812; SSE3-NEXT: pand %xmm1, %xmm2 38813; SSE3-NEXT: psrlw $2, %xmm0 38814; SSE3-NEXT: pand %xmm1, %xmm0 38815; SSE3-NEXT: paddb %xmm2, %xmm0 38816; SSE3-NEXT: movdqa %xmm0, %xmm1 38817; SSE3-NEXT: psrlw $4, %xmm1 38818; SSE3-NEXT: paddb %xmm0, %xmm1 38819; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38820; SSE3-NEXT: pxor %xmm0, %xmm0 38821; SSE3-NEXT: psadbw %xmm1, %xmm0 38822; SSE3-NEXT: por {{.*}}(%rip), %xmm0 38823; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38824; SSE3-NEXT: movdqa %xmm0, %xmm2 38825; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 38826; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38827; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 38828; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38829; SSE3-NEXT: pand %xmm3, %xmm1 38830; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38831; SSE3-NEXT: por %xmm1, %xmm0 38832; SSE3-NEXT: retq 38833; 38834; SSSE3-LABEL: ugt_60_v2i64: 38835; SSSE3: # %bb.0: 38836; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38837; SSSE3-NEXT: movdqa %xmm0, %xmm2 38838; SSSE3-NEXT: pand %xmm1, %xmm2 38839; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38840; SSSE3-NEXT: movdqa %xmm3, %xmm4 38841; SSSE3-NEXT: pshufb %xmm2, %xmm4 38842; SSSE3-NEXT: psrlw $4, %xmm0 38843; SSSE3-NEXT: pand %xmm1, %xmm0 38844; SSSE3-NEXT: pshufb %xmm0, %xmm3 38845; SSSE3-NEXT: paddb %xmm4, %xmm3 38846; SSSE3-NEXT: pxor %xmm0, %xmm0 38847; SSSE3-NEXT: psadbw %xmm3, %xmm0 38848; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 38849; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38850; SSSE3-NEXT: movdqa %xmm0, %xmm2 38851; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 38852; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38853; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 38854; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38855; SSSE3-NEXT: pand %xmm3, %xmm1 38856; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38857; SSSE3-NEXT: por %xmm1, %xmm0 38858; SSSE3-NEXT: retq 38859; 38860; SSE41-LABEL: ugt_60_v2i64: 38861; SSE41: # %bb.0: 38862; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38863; SSE41-NEXT: movdqa %xmm0, %xmm2 38864; SSE41-NEXT: pand %xmm1, %xmm2 38865; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38866; SSE41-NEXT: movdqa %xmm3, %xmm4 38867; SSE41-NEXT: pshufb %xmm2, %xmm4 38868; SSE41-NEXT: psrlw $4, %xmm0 38869; SSE41-NEXT: pand %xmm1, %xmm0 38870; SSE41-NEXT: pshufb %xmm0, %xmm3 38871; SSE41-NEXT: paddb %xmm4, %xmm3 38872; SSE41-NEXT: pxor %xmm0, %xmm0 38873; SSE41-NEXT: psadbw %xmm3, %xmm0 38874; SSE41-NEXT: por {{.*}}(%rip), %xmm0 38875; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] 38876; SSE41-NEXT: movdqa %xmm0, %xmm2 38877; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 38878; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38879; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 38880; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38881; SSE41-NEXT: pand %xmm3, %xmm1 38882; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38883; SSE41-NEXT: por %xmm1, %xmm0 38884; SSE41-NEXT: retq 38885; 38886; AVX1-LABEL: ugt_60_v2i64: 38887; AVX1: # %bb.0: 38888; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38889; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 38890; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38891; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38892; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 38893; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 38894; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38895; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38896; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 38897; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38898; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38899; AVX1-NEXT: retq 38900; 38901; AVX2-LABEL: ugt_60_v2i64: 38902; AVX2: # %bb.0: 38903; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 38904; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 38905; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38906; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 38907; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 38908; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 38909; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 38910; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 38911; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 38912; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38913; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38914; AVX2-NEXT: retq 38915; 38916; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64: 38917; AVX512VPOPCNTDQ: # %bb.0: 38918; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38919; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 38920; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38921; AVX512VPOPCNTDQ-NEXT: vzeroupper 38922; AVX512VPOPCNTDQ-NEXT: retq 38923; 38924; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64: 38925; AVX512VPOPCNTDQVL: # %bb.0: 38926; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 38927; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 38928; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38929; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38930; AVX512VPOPCNTDQVL-NEXT: retq 38931; 38932; BITALG_NOVLX-LABEL: ugt_60_v2i64: 38933; BITALG_NOVLX: # %bb.0: 38934; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 38935; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 38936; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 38937; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38938; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 38939; BITALG_NOVLX-NEXT: vzeroupper 38940; BITALG_NOVLX-NEXT: retq 38941; 38942; BITALG-LABEL: ugt_60_v2i64: 38943; BITALG: # %bb.0: 38944; BITALG-NEXT: vpopcntb %xmm0, %xmm0 38945; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 38946; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 38947; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 38948; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 38949; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 38950; BITALG-NEXT: retq 38951 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 38952 %3 = icmp ugt <2 x i64> %2, <i64 60, i64 60> 38953 %4 = sext <2 x i1> %3 to <2 x i64> 38954 ret <2 x i64> %4 38955} 38956 38957define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { 38958; SSE2-LABEL: ult_61_v2i64: 38959; SSE2: # %bb.0: 38960; SSE2-NEXT: movdqa %xmm0, %xmm1 38961; SSE2-NEXT: psrlw $1, %xmm1 38962; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38963; SSE2-NEXT: psubb %xmm1, %xmm0 38964; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38965; SSE2-NEXT: movdqa %xmm0, %xmm2 38966; SSE2-NEXT: pand %xmm1, %xmm2 38967; SSE2-NEXT: psrlw $2, %xmm0 38968; SSE2-NEXT: pand %xmm1, %xmm0 38969; SSE2-NEXT: paddb %xmm2, %xmm0 38970; SSE2-NEXT: movdqa %xmm0, %xmm1 38971; SSE2-NEXT: psrlw $4, %xmm1 38972; SSE2-NEXT: paddb %xmm0, %xmm1 38973; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 38974; SSE2-NEXT: pxor %xmm0, %xmm0 38975; SSE2-NEXT: psadbw %xmm1, %xmm0 38976; SSE2-NEXT: por {{.*}}(%rip), %xmm0 38977; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 38978; SSE2-NEXT: movdqa %xmm1, %xmm2 38979; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 38980; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 38981; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 38982; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 38983; SSE2-NEXT: pand %xmm3, %xmm1 38984; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 38985; SSE2-NEXT: por %xmm1, %xmm0 38986; SSE2-NEXT: retq 38987; 38988; SSE3-LABEL: ult_61_v2i64: 38989; SSE3: # %bb.0: 38990; SSE3-NEXT: movdqa %xmm0, %xmm1 38991; SSE3-NEXT: psrlw $1, %xmm1 38992; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 38993; SSE3-NEXT: psubb %xmm1, %xmm0 38994; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 38995; SSE3-NEXT: movdqa %xmm0, %xmm2 38996; SSE3-NEXT: pand %xmm1, %xmm2 38997; SSE3-NEXT: psrlw $2, %xmm0 38998; SSE3-NEXT: pand %xmm1, %xmm0 38999; SSE3-NEXT: paddb %xmm2, %xmm0 39000; SSE3-NEXT: movdqa %xmm0, %xmm1 39001; SSE3-NEXT: psrlw $4, %xmm1 39002; SSE3-NEXT: paddb %xmm0, %xmm1 39003; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39004; SSE3-NEXT: pxor %xmm0, %xmm0 39005; SSE3-NEXT: psadbw %xmm1, %xmm0 39006; SSE3-NEXT: por {{.*}}(%rip), %xmm0 39007; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 39008; SSE3-NEXT: movdqa %xmm1, %xmm2 39009; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 39010; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39011; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 39012; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39013; SSE3-NEXT: pand %xmm3, %xmm1 39014; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39015; SSE3-NEXT: por %xmm1, %xmm0 39016; SSE3-NEXT: retq 39017; 39018; SSSE3-LABEL: ult_61_v2i64: 39019; SSSE3: # %bb.0: 39020; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39021; SSSE3-NEXT: movdqa %xmm0, %xmm2 39022; SSSE3-NEXT: pand %xmm1, %xmm2 39023; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39024; SSSE3-NEXT: movdqa %xmm3, %xmm4 39025; SSSE3-NEXT: pshufb %xmm2, %xmm4 39026; SSSE3-NEXT: psrlw $4, %xmm0 39027; SSSE3-NEXT: pand %xmm1, %xmm0 39028; SSSE3-NEXT: pshufb %xmm0, %xmm3 39029; SSSE3-NEXT: paddb %xmm4, %xmm3 39030; SSSE3-NEXT: pxor %xmm0, %xmm0 39031; SSSE3-NEXT: psadbw %xmm3, %xmm0 39032; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 39033; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 39034; SSSE3-NEXT: movdqa %xmm1, %xmm2 39035; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 39036; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39037; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 39038; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39039; SSSE3-NEXT: pand %xmm3, %xmm1 39040; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39041; SSSE3-NEXT: por %xmm1, %xmm0 39042; SSSE3-NEXT: retq 39043; 39044; SSE41-LABEL: ult_61_v2i64: 39045; SSE41: # %bb.0: 39046; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39047; SSE41-NEXT: movdqa %xmm0, %xmm2 39048; SSE41-NEXT: pand %xmm1, %xmm2 39049; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39050; SSE41-NEXT: movdqa %xmm3, %xmm4 39051; SSE41-NEXT: pshufb %xmm2, %xmm4 39052; SSE41-NEXT: psrlw $4, %xmm0 39053; SSE41-NEXT: pand %xmm1, %xmm0 39054; SSE41-NEXT: pshufb %xmm0, %xmm3 39055; SSE41-NEXT: paddb %xmm4, %xmm3 39056; SSE41-NEXT: pxor %xmm0, %xmm0 39057; SSE41-NEXT: psadbw %xmm3, %xmm0 39058; SSE41-NEXT: por {{.*}}(%rip), %xmm0 39059; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 39060; SSE41-NEXT: movdqa %xmm1, %xmm2 39061; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 39062; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39063; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 39064; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39065; SSE41-NEXT: pand %xmm3, %xmm1 39066; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39067; SSE41-NEXT: por %xmm1, %xmm0 39068; SSE41-NEXT: retq 39069; 39070; AVX1-LABEL: ult_61_v2i64: 39071; AVX1: # %bb.0: 39072; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39073; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 39074; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39075; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39076; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 39077; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 39078; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39079; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39080; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 39081; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39082; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] 39083; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39084; AVX1-NEXT: retq 39085; 39086; AVX2-LABEL: ult_61_v2i64: 39087; AVX2: # %bb.0: 39088; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39089; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 39090; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39091; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39092; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 39093; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 39094; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39095; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39096; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 39097; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39098; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] 39099; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39100; AVX2-NEXT: retq 39101; 39102; AVX512VPOPCNTDQ-LABEL: ult_61_v2i64: 39103; AVX512VPOPCNTDQ: # %bb.0: 39104; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39105; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 39106; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] 39107; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39108; AVX512VPOPCNTDQ-NEXT: vzeroupper 39109; AVX512VPOPCNTDQ-NEXT: retq 39110; 39111; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64: 39112; AVX512VPOPCNTDQVL: # %bb.0: 39113; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 39114; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 39115; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39116; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39117; AVX512VPOPCNTDQVL-NEXT: retq 39118; 39119; BITALG_NOVLX-LABEL: ult_61_v2i64: 39120; BITALG_NOVLX: # %bb.0: 39121; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39122; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 39123; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 39124; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39125; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] 39126; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39127; BITALG_NOVLX-NEXT: vzeroupper 39128; BITALG_NOVLX-NEXT: retq 39129; 39130; BITALG-LABEL: ult_61_v2i64: 39131; BITALG: # %bb.0: 39132; BITALG-NEXT: vpopcntb %xmm0, %xmm0 39133; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 39134; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39135; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 39136; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39137; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39138; BITALG-NEXT: retq 39139 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 39140 %3 = icmp ult <2 x i64> %2, <i64 61, i64 61> 39141 %4 = sext <2 x i1> %3 to <2 x i64> 39142 ret <2 x i64> %4 39143} 39144 39145define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { 39146; SSE2-LABEL: ugt_61_v2i64: 39147; SSE2: # %bb.0: 39148; SSE2-NEXT: movdqa %xmm0, %xmm1 39149; SSE2-NEXT: psrlw $1, %xmm1 39150; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39151; SSE2-NEXT: psubb %xmm1, %xmm0 39152; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39153; SSE2-NEXT: movdqa %xmm0, %xmm2 39154; SSE2-NEXT: pand %xmm1, %xmm2 39155; SSE2-NEXT: psrlw $2, %xmm0 39156; SSE2-NEXT: pand %xmm1, %xmm0 39157; SSE2-NEXT: paddb %xmm2, %xmm0 39158; SSE2-NEXT: movdqa %xmm0, %xmm1 39159; SSE2-NEXT: psrlw $4, %xmm1 39160; SSE2-NEXT: paddb %xmm0, %xmm1 39161; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39162; SSE2-NEXT: pxor %xmm0, %xmm0 39163; SSE2-NEXT: psadbw %xmm1, %xmm0 39164; SSE2-NEXT: por {{.*}}(%rip), %xmm0 39165; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 39166; SSE2-NEXT: movdqa %xmm0, %xmm2 39167; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 39168; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39169; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 39170; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39171; SSE2-NEXT: pand %xmm3, %xmm1 39172; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39173; SSE2-NEXT: por %xmm1, %xmm0 39174; SSE2-NEXT: retq 39175; 39176; SSE3-LABEL: ugt_61_v2i64: 39177; SSE3: # %bb.0: 39178; SSE3-NEXT: movdqa %xmm0, %xmm1 39179; SSE3-NEXT: psrlw $1, %xmm1 39180; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39181; SSE3-NEXT: psubb %xmm1, %xmm0 39182; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39183; SSE3-NEXT: movdqa %xmm0, %xmm2 39184; SSE3-NEXT: pand %xmm1, %xmm2 39185; SSE3-NEXT: psrlw $2, %xmm0 39186; SSE3-NEXT: pand %xmm1, %xmm0 39187; SSE3-NEXT: paddb %xmm2, %xmm0 39188; SSE3-NEXT: movdqa %xmm0, %xmm1 39189; SSE3-NEXT: psrlw $4, %xmm1 39190; SSE3-NEXT: paddb %xmm0, %xmm1 39191; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39192; SSE3-NEXT: pxor %xmm0, %xmm0 39193; SSE3-NEXT: psadbw %xmm1, %xmm0 39194; SSE3-NEXT: por {{.*}}(%rip), %xmm0 39195; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 39196; SSE3-NEXT: movdqa %xmm0, %xmm2 39197; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 39198; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39199; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 39200; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39201; SSE3-NEXT: pand %xmm3, %xmm1 39202; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39203; SSE3-NEXT: por %xmm1, %xmm0 39204; SSE3-NEXT: retq 39205; 39206; SSSE3-LABEL: ugt_61_v2i64: 39207; SSSE3: # %bb.0: 39208; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39209; SSSE3-NEXT: movdqa %xmm0, %xmm2 39210; SSSE3-NEXT: pand %xmm1, %xmm2 39211; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39212; SSSE3-NEXT: movdqa %xmm3, %xmm4 39213; SSSE3-NEXT: pshufb %xmm2, %xmm4 39214; SSSE3-NEXT: psrlw $4, %xmm0 39215; SSSE3-NEXT: pand %xmm1, %xmm0 39216; SSSE3-NEXT: pshufb %xmm0, %xmm3 39217; SSSE3-NEXT: paddb %xmm4, %xmm3 39218; SSSE3-NEXT: pxor %xmm0, %xmm0 39219; SSSE3-NEXT: psadbw %xmm3, %xmm0 39220; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 39221; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 39222; SSSE3-NEXT: movdqa %xmm0, %xmm2 39223; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 39224; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39225; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 39226; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39227; SSSE3-NEXT: pand %xmm3, %xmm1 39228; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39229; SSSE3-NEXT: por %xmm1, %xmm0 39230; SSSE3-NEXT: retq 39231; 39232; SSE41-LABEL: ugt_61_v2i64: 39233; SSE41: # %bb.0: 39234; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39235; SSE41-NEXT: movdqa %xmm0, %xmm2 39236; SSE41-NEXT: pand %xmm1, %xmm2 39237; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39238; SSE41-NEXT: movdqa %xmm3, %xmm4 39239; SSE41-NEXT: pshufb %xmm2, %xmm4 39240; SSE41-NEXT: psrlw $4, %xmm0 39241; SSE41-NEXT: pand %xmm1, %xmm0 39242; SSE41-NEXT: pshufb %xmm0, %xmm3 39243; SSE41-NEXT: paddb %xmm4, %xmm3 39244; SSE41-NEXT: pxor %xmm0, %xmm0 39245; SSE41-NEXT: psadbw %xmm3, %xmm0 39246; SSE41-NEXT: por {{.*}}(%rip), %xmm0 39247; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] 39248; SSE41-NEXT: movdqa %xmm0, %xmm2 39249; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 39250; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39251; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 39252; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39253; SSE41-NEXT: pand %xmm3, %xmm1 39254; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39255; SSE41-NEXT: por %xmm1, %xmm0 39256; SSE41-NEXT: retq 39257; 39258; AVX1-LABEL: ugt_61_v2i64: 39259; AVX1: # %bb.0: 39260; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39261; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 39262; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39263; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39264; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 39265; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 39266; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39267; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39268; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 39269; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39270; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39271; AVX1-NEXT: retq 39272; 39273; AVX2-LABEL: ugt_61_v2i64: 39274; AVX2: # %bb.0: 39275; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39276; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 39277; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39278; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39279; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 39280; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 39281; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39282; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39283; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 39284; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39285; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39286; AVX2-NEXT: retq 39287; 39288; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64: 39289; AVX512VPOPCNTDQ: # %bb.0: 39290; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39291; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 39292; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39293; AVX512VPOPCNTDQ-NEXT: vzeroupper 39294; AVX512VPOPCNTDQ-NEXT: retq 39295; 39296; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64: 39297; AVX512VPOPCNTDQVL: # %bb.0: 39298; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 39299; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 39300; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39301; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39302; AVX512VPOPCNTDQVL-NEXT: retq 39303; 39304; BITALG_NOVLX-LABEL: ugt_61_v2i64: 39305; BITALG_NOVLX: # %bb.0: 39306; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39307; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 39308; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 39309; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39310; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39311; BITALG_NOVLX-NEXT: vzeroupper 39312; BITALG_NOVLX-NEXT: retq 39313; 39314; BITALG-LABEL: ugt_61_v2i64: 39315; BITALG: # %bb.0: 39316; BITALG-NEXT: vpopcntb %xmm0, %xmm0 39317; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 39318; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39319; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 39320; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39321; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39322; BITALG-NEXT: retq 39323 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 39324 %3 = icmp ugt <2 x i64> %2, <i64 61, i64 61> 39325 %4 = sext <2 x i1> %3 to <2 x i64> 39326 ret <2 x i64> %4 39327} 39328 39329define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { 39330; SSE2-LABEL: ult_62_v2i64: 39331; SSE2: # %bb.0: 39332; SSE2-NEXT: movdqa %xmm0, %xmm1 39333; SSE2-NEXT: psrlw $1, %xmm1 39334; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39335; SSE2-NEXT: psubb %xmm1, %xmm0 39336; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39337; SSE2-NEXT: movdqa %xmm0, %xmm2 39338; SSE2-NEXT: pand %xmm1, %xmm2 39339; SSE2-NEXT: psrlw $2, %xmm0 39340; SSE2-NEXT: pand %xmm1, %xmm0 39341; SSE2-NEXT: paddb %xmm2, %xmm0 39342; SSE2-NEXT: movdqa %xmm0, %xmm1 39343; SSE2-NEXT: psrlw $4, %xmm1 39344; SSE2-NEXT: paddb %xmm0, %xmm1 39345; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39346; SSE2-NEXT: pxor %xmm0, %xmm0 39347; SSE2-NEXT: psadbw %xmm1, %xmm0 39348; SSE2-NEXT: por {{.*}}(%rip), %xmm0 39349; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39350; SSE2-NEXT: movdqa %xmm1, %xmm2 39351; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 39352; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39353; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 39354; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39355; SSE2-NEXT: pand %xmm3, %xmm1 39356; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39357; SSE2-NEXT: por %xmm1, %xmm0 39358; SSE2-NEXT: retq 39359; 39360; SSE3-LABEL: ult_62_v2i64: 39361; SSE3: # %bb.0: 39362; SSE3-NEXT: movdqa %xmm0, %xmm1 39363; SSE3-NEXT: psrlw $1, %xmm1 39364; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39365; SSE3-NEXT: psubb %xmm1, %xmm0 39366; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39367; SSE3-NEXT: movdqa %xmm0, %xmm2 39368; SSE3-NEXT: pand %xmm1, %xmm2 39369; SSE3-NEXT: psrlw $2, %xmm0 39370; SSE3-NEXT: pand %xmm1, %xmm0 39371; SSE3-NEXT: paddb %xmm2, %xmm0 39372; SSE3-NEXT: movdqa %xmm0, %xmm1 39373; SSE3-NEXT: psrlw $4, %xmm1 39374; SSE3-NEXT: paddb %xmm0, %xmm1 39375; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39376; SSE3-NEXT: pxor %xmm0, %xmm0 39377; SSE3-NEXT: psadbw %xmm1, %xmm0 39378; SSE3-NEXT: por {{.*}}(%rip), %xmm0 39379; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39380; SSE3-NEXT: movdqa %xmm1, %xmm2 39381; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 39382; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39383; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 39384; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39385; SSE3-NEXT: pand %xmm3, %xmm1 39386; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39387; SSE3-NEXT: por %xmm1, %xmm0 39388; SSE3-NEXT: retq 39389; 39390; SSSE3-LABEL: ult_62_v2i64: 39391; SSSE3: # %bb.0: 39392; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39393; SSSE3-NEXT: movdqa %xmm0, %xmm2 39394; SSSE3-NEXT: pand %xmm1, %xmm2 39395; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39396; SSSE3-NEXT: movdqa %xmm3, %xmm4 39397; SSSE3-NEXT: pshufb %xmm2, %xmm4 39398; SSSE3-NEXT: psrlw $4, %xmm0 39399; SSSE3-NEXT: pand %xmm1, %xmm0 39400; SSSE3-NEXT: pshufb %xmm0, %xmm3 39401; SSSE3-NEXT: paddb %xmm4, %xmm3 39402; SSSE3-NEXT: pxor %xmm0, %xmm0 39403; SSSE3-NEXT: psadbw %xmm3, %xmm0 39404; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 39405; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39406; SSSE3-NEXT: movdqa %xmm1, %xmm2 39407; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 39408; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39409; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 39410; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39411; SSSE3-NEXT: pand %xmm3, %xmm1 39412; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39413; SSSE3-NEXT: por %xmm1, %xmm0 39414; SSSE3-NEXT: retq 39415; 39416; SSE41-LABEL: ult_62_v2i64: 39417; SSE41: # %bb.0: 39418; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39419; SSE41-NEXT: movdqa %xmm0, %xmm2 39420; SSE41-NEXT: pand %xmm1, %xmm2 39421; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39422; SSE41-NEXT: movdqa %xmm3, %xmm4 39423; SSE41-NEXT: pshufb %xmm2, %xmm4 39424; SSE41-NEXT: psrlw $4, %xmm0 39425; SSE41-NEXT: pand %xmm1, %xmm0 39426; SSE41-NEXT: pshufb %xmm0, %xmm3 39427; SSE41-NEXT: paddb %xmm4, %xmm3 39428; SSE41-NEXT: pxor %xmm0, %xmm0 39429; SSE41-NEXT: psadbw %xmm3, %xmm0 39430; SSE41-NEXT: por {{.*}}(%rip), %xmm0 39431; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39432; SSE41-NEXT: movdqa %xmm1, %xmm2 39433; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 39434; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39435; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 39436; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39437; SSE41-NEXT: pand %xmm3, %xmm1 39438; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39439; SSE41-NEXT: por %xmm1, %xmm0 39440; SSE41-NEXT: retq 39441; 39442; AVX1-LABEL: ult_62_v2i64: 39443; AVX1: # %bb.0: 39444; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39445; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 39446; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39447; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39448; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 39449; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 39450; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39451; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39452; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 39453; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39454; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] 39455; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39456; AVX1-NEXT: retq 39457; 39458; AVX2-LABEL: ult_62_v2i64: 39459; AVX2: # %bb.0: 39460; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39461; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 39462; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39463; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39464; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 39465; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 39466; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39467; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39468; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 39469; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39470; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] 39471; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39472; AVX2-NEXT: retq 39473; 39474; AVX512VPOPCNTDQ-LABEL: ult_62_v2i64: 39475; AVX512VPOPCNTDQ: # %bb.0: 39476; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39477; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 39478; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] 39479; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39480; AVX512VPOPCNTDQ-NEXT: vzeroupper 39481; AVX512VPOPCNTDQ-NEXT: retq 39482; 39483; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64: 39484; AVX512VPOPCNTDQVL: # %bb.0: 39485; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 39486; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 39487; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39488; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39489; AVX512VPOPCNTDQVL-NEXT: retq 39490; 39491; BITALG_NOVLX-LABEL: ult_62_v2i64: 39492; BITALG_NOVLX: # %bb.0: 39493; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39494; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 39495; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 39496; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39497; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] 39498; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39499; BITALG_NOVLX-NEXT: vzeroupper 39500; BITALG_NOVLX-NEXT: retq 39501; 39502; BITALG-LABEL: ult_62_v2i64: 39503; BITALG: # %bb.0: 39504; BITALG-NEXT: vpopcntb %xmm0, %xmm0 39505; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 39506; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39507; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 39508; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39509; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39510; BITALG-NEXT: retq 39511 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 39512 %3 = icmp ult <2 x i64> %2, <i64 62, i64 62> 39513 %4 = sext <2 x i1> %3 to <2 x i64> 39514 ret <2 x i64> %4 39515} 39516 39517define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { 39518; SSE2-LABEL: ugt_62_v2i64: 39519; SSE2: # %bb.0: 39520; SSE2-NEXT: movdqa %xmm0, %xmm1 39521; SSE2-NEXT: psrlw $1, %xmm1 39522; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39523; SSE2-NEXT: psubb %xmm1, %xmm0 39524; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39525; SSE2-NEXT: movdqa %xmm0, %xmm2 39526; SSE2-NEXT: pand %xmm1, %xmm2 39527; SSE2-NEXT: psrlw $2, %xmm0 39528; SSE2-NEXT: pand %xmm1, %xmm0 39529; SSE2-NEXT: paddb %xmm2, %xmm0 39530; SSE2-NEXT: movdqa %xmm0, %xmm1 39531; SSE2-NEXT: psrlw $4, %xmm1 39532; SSE2-NEXT: paddb %xmm0, %xmm1 39533; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39534; SSE2-NEXT: pxor %xmm0, %xmm0 39535; SSE2-NEXT: psadbw %xmm1, %xmm0 39536; SSE2-NEXT: por {{.*}}(%rip), %xmm0 39537; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39538; SSE2-NEXT: movdqa %xmm0, %xmm2 39539; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 39540; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39541; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 39542; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39543; SSE2-NEXT: pand %xmm3, %xmm1 39544; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39545; SSE2-NEXT: por %xmm1, %xmm0 39546; SSE2-NEXT: retq 39547; 39548; SSE3-LABEL: ugt_62_v2i64: 39549; SSE3: # %bb.0: 39550; SSE3-NEXT: movdqa %xmm0, %xmm1 39551; SSE3-NEXT: psrlw $1, %xmm1 39552; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39553; SSE3-NEXT: psubb %xmm1, %xmm0 39554; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39555; SSE3-NEXT: movdqa %xmm0, %xmm2 39556; SSE3-NEXT: pand %xmm1, %xmm2 39557; SSE3-NEXT: psrlw $2, %xmm0 39558; SSE3-NEXT: pand %xmm1, %xmm0 39559; SSE3-NEXT: paddb %xmm2, %xmm0 39560; SSE3-NEXT: movdqa %xmm0, %xmm1 39561; SSE3-NEXT: psrlw $4, %xmm1 39562; SSE3-NEXT: paddb %xmm0, %xmm1 39563; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39564; SSE3-NEXT: pxor %xmm0, %xmm0 39565; SSE3-NEXT: psadbw %xmm1, %xmm0 39566; SSE3-NEXT: por {{.*}}(%rip), %xmm0 39567; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39568; SSE3-NEXT: movdqa %xmm0, %xmm2 39569; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 39570; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39571; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 39572; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39573; SSE3-NEXT: pand %xmm3, %xmm1 39574; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39575; SSE3-NEXT: por %xmm1, %xmm0 39576; SSE3-NEXT: retq 39577; 39578; SSSE3-LABEL: ugt_62_v2i64: 39579; SSSE3: # %bb.0: 39580; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39581; SSSE3-NEXT: movdqa %xmm0, %xmm2 39582; SSSE3-NEXT: pand %xmm1, %xmm2 39583; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39584; SSSE3-NEXT: movdqa %xmm3, %xmm4 39585; SSSE3-NEXT: pshufb %xmm2, %xmm4 39586; SSSE3-NEXT: psrlw $4, %xmm0 39587; SSSE3-NEXT: pand %xmm1, %xmm0 39588; SSSE3-NEXT: pshufb %xmm0, %xmm3 39589; SSSE3-NEXT: paddb %xmm4, %xmm3 39590; SSSE3-NEXT: pxor %xmm0, %xmm0 39591; SSSE3-NEXT: psadbw %xmm3, %xmm0 39592; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 39593; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39594; SSSE3-NEXT: movdqa %xmm0, %xmm2 39595; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 39596; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39597; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 39598; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39599; SSSE3-NEXT: pand %xmm3, %xmm1 39600; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39601; SSSE3-NEXT: por %xmm1, %xmm0 39602; SSSE3-NEXT: retq 39603; 39604; SSE41-LABEL: ugt_62_v2i64: 39605; SSE41: # %bb.0: 39606; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39607; SSE41-NEXT: movdqa %xmm0, %xmm2 39608; SSE41-NEXT: pand %xmm1, %xmm2 39609; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39610; SSE41-NEXT: movdqa %xmm3, %xmm4 39611; SSE41-NEXT: pshufb %xmm2, %xmm4 39612; SSE41-NEXT: psrlw $4, %xmm0 39613; SSE41-NEXT: pand %xmm1, %xmm0 39614; SSE41-NEXT: pshufb %xmm0, %xmm3 39615; SSE41-NEXT: paddb %xmm4, %xmm3 39616; SSE41-NEXT: pxor %xmm0, %xmm0 39617; SSE41-NEXT: psadbw %xmm3, %xmm0 39618; SSE41-NEXT: por {{.*}}(%rip), %xmm0 39619; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] 39620; SSE41-NEXT: movdqa %xmm0, %xmm2 39621; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 39622; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39623; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 39624; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39625; SSE41-NEXT: pand %xmm3, %xmm1 39626; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39627; SSE41-NEXT: por %xmm1, %xmm0 39628; SSE41-NEXT: retq 39629; 39630; AVX1-LABEL: ugt_62_v2i64: 39631; AVX1: # %bb.0: 39632; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39633; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 39634; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39635; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39636; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 39637; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 39638; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39639; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39640; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 39641; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39642; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39643; AVX1-NEXT: retq 39644; 39645; AVX2-LABEL: ugt_62_v2i64: 39646; AVX2: # %bb.0: 39647; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39648; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 39649; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39650; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39651; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 39652; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 39653; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39654; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39655; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 39656; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39657; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39658; AVX2-NEXT: retq 39659; 39660; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64: 39661; AVX512VPOPCNTDQ: # %bb.0: 39662; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39663; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 39664; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39665; AVX512VPOPCNTDQ-NEXT: vzeroupper 39666; AVX512VPOPCNTDQ-NEXT: retq 39667; 39668; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64: 39669; AVX512VPOPCNTDQVL: # %bb.0: 39670; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 39671; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 39672; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39673; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39674; AVX512VPOPCNTDQVL-NEXT: retq 39675; 39676; BITALG_NOVLX-LABEL: ugt_62_v2i64: 39677; BITALG_NOVLX: # %bb.0: 39678; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39679; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 39680; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 39681; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39682; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 39683; BITALG_NOVLX-NEXT: vzeroupper 39684; BITALG_NOVLX-NEXT: retq 39685; 39686; BITALG-LABEL: ugt_62_v2i64: 39687; BITALG: # %bb.0: 39688; BITALG-NEXT: vpopcntb %xmm0, %xmm0 39689; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 39690; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39691; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 39692; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39693; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39694; BITALG-NEXT: retq 39695 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 39696 %3 = icmp ugt <2 x i64> %2, <i64 62, i64 62> 39697 %4 = sext <2 x i1> %3 to <2 x i64> 39698 ret <2 x i64> %4 39699} 39700 39701define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { 39702; SSE2-LABEL: ult_63_v2i64: 39703; SSE2: # %bb.0: 39704; SSE2-NEXT: movdqa %xmm0, %xmm1 39705; SSE2-NEXT: psrlw $1, %xmm1 39706; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39707; SSE2-NEXT: psubb %xmm1, %xmm0 39708; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39709; SSE2-NEXT: movdqa %xmm0, %xmm2 39710; SSE2-NEXT: pand %xmm1, %xmm2 39711; SSE2-NEXT: psrlw $2, %xmm0 39712; SSE2-NEXT: pand %xmm1, %xmm0 39713; SSE2-NEXT: paddb %xmm2, %xmm0 39714; SSE2-NEXT: movdqa %xmm0, %xmm1 39715; SSE2-NEXT: psrlw $4, %xmm1 39716; SSE2-NEXT: paddb %xmm0, %xmm1 39717; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 39718; SSE2-NEXT: pxor %xmm0, %xmm0 39719; SSE2-NEXT: psadbw %xmm1, %xmm0 39720; SSE2-NEXT: por {{.*}}(%rip), %xmm0 39721; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] 39722; SSE2-NEXT: movdqa %xmm1, %xmm2 39723; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 39724; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39725; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 39726; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39727; SSE2-NEXT: pand %xmm3, %xmm1 39728; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39729; SSE2-NEXT: por %xmm1, %xmm0 39730; SSE2-NEXT: retq 39731; 39732; SSE3-LABEL: ult_63_v2i64: 39733; SSE3: # %bb.0: 39734; SSE3-NEXT: movdqa %xmm0, %xmm1 39735; SSE3-NEXT: psrlw $1, %xmm1 39736; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39737; SSE3-NEXT: psubb %xmm1, %xmm0 39738; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 39739; SSE3-NEXT: movdqa %xmm0, %xmm2 39740; SSE3-NEXT: pand %xmm1, %xmm2 39741; SSE3-NEXT: psrlw $2, %xmm0 39742; SSE3-NEXT: pand %xmm1, %xmm0 39743; SSE3-NEXT: paddb %xmm2, %xmm0 39744; SSE3-NEXT: movdqa %xmm0, %xmm1 39745; SSE3-NEXT: psrlw $4, %xmm1 39746; SSE3-NEXT: paddb %xmm0, %xmm1 39747; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 39748; SSE3-NEXT: pxor %xmm0, %xmm0 39749; SSE3-NEXT: psadbw %xmm1, %xmm0 39750; SSE3-NEXT: por {{.*}}(%rip), %xmm0 39751; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] 39752; SSE3-NEXT: movdqa %xmm1, %xmm2 39753; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 39754; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39755; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 39756; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39757; SSE3-NEXT: pand %xmm3, %xmm1 39758; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39759; SSE3-NEXT: por %xmm1, %xmm0 39760; SSE3-NEXT: retq 39761; 39762; SSSE3-LABEL: ult_63_v2i64: 39763; SSSE3: # %bb.0: 39764; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39765; SSSE3-NEXT: movdqa %xmm0, %xmm2 39766; SSSE3-NEXT: pand %xmm1, %xmm2 39767; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39768; SSSE3-NEXT: movdqa %xmm3, %xmm4 39769; SSSE3-NEXT: pshufb %xmm2, %xmm4 39770; SSSE3-NEXT: psrlw $4, %xmm0 39771; SSSE3-NEXT: pand %xmm1, %xmm0 39772; SSSE3-NEXT: pshufb %xmm0, %xmm3 39773; SSSE3-NEXT: paddb %xmm4, %xmm3 39774; SSSE3-NEXT: pxor %xmm0, %xmm0 39775; SSSE3-NEXT: psadbw %xmm3, %xmm0 39776; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 39777; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] 39778; SSSE3-NEXT: movdqa %xmm1, %xmm2 39779; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 39780; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39781; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 39782; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39783; SSSE3-NEXT: pand %xmm3, %xmm1 39784; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39785; SSSE3-NEXT: por %xmm1, %xmm0 39786; SSSE3-NEXT: retq 39787; 39788; SSE41-LABEL: ult_63_v2i64: 39789; SSE41: # %bb.0: 39790; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39791; SSE41-NEXT: movdqa %xmm0, %xmm2 39792; SSE41-NEXT: pand %xmm1, %xmm2 39793; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39794; SSE41-NEXT: movdqa %xmm3, %xmm4 39795; SSE41-NEXT: pshufb %xmm2, %xmm4 39796; SSE41-NEXT: psrlw $4, %xmm0 39797; SSE41-NEXT: pand %xmm1, %xmm0 39798; SSE41-NEXT: pshufb %xmm0, %xmm3 39799; SSE41-NEXT: paddb %xmm4, %xmm3 39800; SSE41-NEXT: pxor %xmm0, %xmm0 39801; SSE41-NEXT: psadbw %xmm3, %xmm0 39802; SSE41-NEXT: por {{.*}}(%rip), %xmm0 39803; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] 39804; SSE41-NEXT: movdqa %xmm1, %xmm2 39805; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 39806; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39807; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 39808; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 39809; SSE41-NEXT: pand %xmm3, %xmm1 39810; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 39811; SSE41-NEXT: por %xmm1, %xmm0 39812; SSE41-NEXT: retq 39813; 39814; AVX1-LABEL: ult_63_v2i64: 39815; AVX1: # %bb.0: 39816; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39817; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 39818; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39819; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39820; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 39821; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 39822; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39823; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39824; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 39825; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39826; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] 39827; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39828; AVX1-NEXT: retq 39829; 39830; AVX2-LABEL: ult_63_v2i64: 39831; AVX2: # %bb.0: 39832; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 39833; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 39834; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 39835; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 39836; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 39837; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 39838; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 39839; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 39840; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 39841; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39842; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] 39843; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39844; AVX2-NEXT: retq 39845; 39846; AVX512VPOPCNTDQ-LABEL: ult_63_v2i64: 39847; AVX512VPOPCNTDQ: # %bb.0: 39848; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39849; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 39850; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] 39851; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39852; AVX512VPOPCNTDQ-NEXT: vzeroupper 39853; AVX512VPOPCNTDQ-NEXT: retq 39854; 39855; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64: 39856; AVX512VPOPCNTDQVL: # %bb.0: 39857; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 39858; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 39859; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39860; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39861; AVX512VPOPCNTDQVL-NEXT: retq 39862; 39863; BITALG_NOVLX-LABEL: ult_63_v2i64: 39864; BITALG_NOVLX: # %bb.0: 39865; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39866; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 39867; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 39868; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39869; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] 39870; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 39871; BITALG_NOVLX-NEXT: vzeroupper 39872; BITALG_NOVLX-NEXT: retq 39873; 39874; BITALG-LABEL: ult_63_v2i64: 39875; BITALG: # %bb.0: 39876; BITALG-NEXT: vpopcntb %xmm0, %xmm0 39877; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 39878; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 39879; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 39880; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 39881; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 39882; BITALG-NEXT: retq 39883 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 39884 %3 = icmp ult <2 x i64> %2, <i64 63, i64 63> 39885 %4 = sext <2 x i1> %3 to <2 x i64> 39886 ret <2 x i64> %4 39887} 39888 39889declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) 39890declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) 39891declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 39892declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 39893