1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7; Lower common integer comparisons such as 'isPositive' efficiently: 8; https://llvm.org/bugs/show_bug.cgi?id=26701 9 10define <16 x i8> @test_pcmpgtb(<16 x i8> %x) { 11; SSE-LABEL: test_pcmpgtb: 12; SSE: # BB#0: 13; SSE-NEXT: pcmpeqd %xmm1, %xmm1 14; SSE-NEXT: pcmpgtb %xmm1, %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: test_pcmpgtb: 18; AVX: # BB#0: 19; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 20; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 21; AVX-NEXT: retq 22; 23 %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 24 %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 25 ret <16 x i8> %not 26} 27 28define <8 x i16> @test_pcmpgtw(<8 x i16> %x) { 29; SSE-LABEL: test_pcmpgtw: 30; SSE: # BB#0: 31; SSE-NEXT: pcmpeqd %xmm1, %xmm1 32; SSE-NEXT: pcmpgtw %xmm1, %xmm0 33; SSE-NEXT: retq 34; 35; AVX-LABEL: test_pcmpgtw: 36; AVX: # BB#0: 37; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 38; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 39; AVX-NEXT: retq 40; 41 %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 42 %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 43 ret <8 x i16> %not 44} 45 46define <4 x i32> @test_pcmpgtd(<4 x i32> %x) { 47; SSE-LABEL: test_pcmpgtd: 48; SSE: # BB#0: 49; SSE-NEXT: pcmpeqd %xmm1, %xmm1 50; SSE-NEXT: pcmpgtd %xmm1, %xmm0 51; SSE-NEXT: retq 52; 53; AVX-LABEL: test_pcmpgtd: 54; AVX: # BB#0: 55; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 56; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 57; AVX-NEXT: retq 58; 59 %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> 60 %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1> 61 ret <4 x i32> %not 62} 63 64define <2 x i64> @test_pcmpgtq(<2 x i64> %x) { 65; SSE2-LABEL: test_pcmpgtq: 66; SSE2: # BB#0: 67; SSE2-NEXT: psrad $31, %xmm0 68; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 69; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 70; SSE2-NEXT: pxor %xmm1, %xmm0 71; SSE2-NEXT: retq 72; 73; SSE42-LABEL: test_pcmpgtq: 74; SSE42: # BB#0: 75; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 76; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 77; SSE42-NEXT: retq 78; 79; AVX-LABEL: test_pcmpgtq: 80; AVX: # BB#0: 81; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 82; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 83; AVX-NEXT: retq 84; 85 %sign = ashr <2 x i64> %x, <i64 63, i64 63> 86 %not = xor <2 x i64> %sign, <i64 -1, i64 -1> 87 ret <2 x i64> %not 88} 89 90define <1 x i128> @test_strange_type(<1 x i128> %x) { 91; SSE2-LABEL: test_strange_type: 92; SSE2: # BB#0: 93; SSE2-NEXT: sarq $63, %rsi 94; SSE2-NEXT: movd %rsi, %xmm0 95; SSE2-NEXT: notq %rsi 96; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 97; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 98; SSE2-NEXT: pxor %xmm0, %xmm1 99; SSE2-NEXT: movd %xmm1, %rax 100; SSE2-NEXT: movq %rsi, %rdx 101; SSE2-NEXT: retq 102; 103; SSE42-LABEL: test_strange_type: 104; SSE42: # BB#0: 105; SSE42-NEXT: sarq $63, %rsi 106; SSE42-NEXT: movd %rsi, %xmm0 107; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 108; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 109; SSE42-NEXT: pxor %xmm0, %xmm1 110; SSE42-NEXT: movd %xmm1, %rax 111; SSE42-NEXT: pextrq $1, %xmm1, %rdx 112; SSE42-NEXT: retq 113; 114; AVX1-LABEL: test_strange_type: 115; AVX1: # BB#0: 116; AVX1-NEXT: sarq $63, %rsi 117; AVX1-NEXT: vmovq %rsi, %xmm0 118; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 119; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 120; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 121; AVX1-NEXT: vmovq %xmm0, %rax 122; AVX1-NEXT: vpextrq $1, %xmm0, %rdx 123; AVX1-NEXT: retq 124; 125; AVX2-LABEL: test_strange_type: 126; AVX2: # BB#0: 127; AVX2-NEXT: sarq $63, %rsi 128; AVX2-NEXT: vmovq %rsi, %xmm0 129; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 130; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 131; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 132; AVX2-NEXT: vmovq %xmm0, %rax 133; AVX2-NEXT: vpextrq $1, %xmm0, %rdx 134; AVX2-NEXT: retq 135; 136 %sign = ashr <1 x i128> %x, <i128 127> 137 %not = xor <1 x i128> %sign, <i128 -1> 138 ret <1 x i128> %not 139} 140 141define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) { 142; SSE-LABEL: test_pcmpgtb_256: 143; SSE: # BB#0: 144; SSE-NEXT: pcmpeqd %xmm2, %xmm2 145; SSE-NEXT: pcmpgtb %xmm2, %xmm0 146; SSE-NEXT: pcmpgtb %xmm2, %xmm1 147; SSE-NEXT: retq 148; 149; AVX1-LABEL: test_pcmpgtb_256: 150; AVX1: # BB#0: 151; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 152; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 153; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 154; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 155; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 156; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 157; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 158; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 159; AVX1-NEXT: retq 160; 161; AVX2-LABEL: test_pcmpgtb_256: 162; AVX2: # BB#0: 163; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 164; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 165; AVX2-NEXT: retq 166; 167 %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 168 %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 169 ret <32 x i8> %not 170} 171 172define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) { 173; SSE-LABEL: test_pcmpgtw_256: 174; SSE: # BB#0: 175; SSE-NEXT: pcmpeqd %xmm2, %xmm2 176; SSE-NEXT: pcmpgtw %xmm2, %xmm0 177; SSE-NEXT: pcmpgtw %xmm2, %xmm1 178; SSE-NEXT: retq 179; 180; AVX1-LABEL: test_pcmpgtw_256: 181; AVX1: # BB#0: 182; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 184; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 185; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 186; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 187; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 188; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 189; AVX1-NEXT: retq 190; 191; AVX2-LABEL: test_pcmpgtw_256: 192; AVX2: # BB#0: 193; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 194; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 195; AVX2-NEXT: retq 196; 197 %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 198 %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 199 ret <16 x i16> %not 200} 201 202define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) { 203; SSE-LABEL: test_pcmpgtd_256: 204; SSE: # BB#0: 205; SSE-NEXT: pcmpeqd %xmm2, %xmm2 206; SSE-NEXT: pcmpgtd %xmm2, %xmm0 207; SSE-NEXT: pcmpgtd %xmm2, %xmm1 208; SSE-NEXT: retq 209; 210; AVX1-LABEL: test_pcmpgtd_256: 211; AVX1: # BB#0: 212; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 213; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 214; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 215; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 216; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 217; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 218; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 219; AVX1-NEXT: retq 220; 221; AVX2-LABEL: test_pcmpgtd_256: 222; AVX2: # BB#0: 223; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 224; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 225; AVX2-NEXT: retq 226; 227 %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 228 %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 229 ret <8 x i32> %not 230} 231 232define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) { 233; SSE2-LABEL: test_pcmpgtq_256: 234; SSE2: # BB#0: 235; SSE2-NEXT: psrad $31, %xmm1 236; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 237; SSE2-NEXT: psrad $31, %xmm0 238; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 239; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 240; SSE2-NEXT: pxor %xmm2, %xmm0 241; SSE2-NEXT: pxor %xmm2, %xmm1 242; SSE2-NEXT: retq 243; 244; SSE42-LABEL: test_pcmpgtq_256: 245; SSE42: # BB#0: 246; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 247; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 248; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 249; SSE42-NEXT: retq 250; 251; AVX1-LABEL: test_pcmpgtq_256: 252; AVX1: # BB#0: 253; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 254; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 255; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 256; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 257; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 258; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 259; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 260; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 261; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 262; AVX1-NEXT: retq 263; 264; AVX2-LABEL: test_pcmpgtq_256: 265; AVX2: # BB#0: 266; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 267; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 268; AVX2-NEXT: retq 269; 270 %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63> 271 %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1> 272 ret <4 x i64> %not 273} 274 275define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) { 276; SSE-LABEL: cmpeq_zext_v16i8: 277; SSE: # BB#0: 278; SSE-NEXT: pcmpeqb %xmm1, %xmm0 279; SSE-NEXT: pand {{.*}}(%rip), %xmm0 280; SSE-NEXT: retq 281; 282; AVX-LABEL: cmpeq_zext_v16i8: 283; AVX: # BB#0: 284; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 285; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 286; AVX-NEXT: retq 287; 288 %cmp = icmp eq <16 x i8> %a, %b 289 %zext = zext <16 x i1> %cmp to <16 x i8> 290 ret <16 x i8> %zext 291} 292 293define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) { 294; SSE-LABEL: cmpeq_zext_v16i16: 295; SSE: # BB#0: 296; SSE-NEXT: pcmpeqw %xmm2, %xmm0 297; SSE-NEXT: psrlw $15, %xmm0 298; SSE-NEXT: pcmpeqw %xmm3, %xmm1 299; SSE-NEXT: psrlw $15, %xmm1 300; SSE-NEXT: retq 301; 302; AVX1-LABEL: cmpeq_zext_v16i16: 303; AVX1: # BB#0: 304; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 305; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 306; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 307; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 308; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 309; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 310; AVX1-NEXT: retq 311; 312; AVX2-LABEL: cmpeq_zext_v16i16: 313; AVX2: # BB#0: 314; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 315; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 316; AVX2-NEXT: retq 317; 318 %cmp = icmp eq <16 x i16> %a, %b 319 %zext = zext <16 x i1> %cmp to <16 x i16> 320 ret <16 x i16> %zext 321} 322 323define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) { 324; SSE-LABEL: cmpeq_zext_v4i32: 325; SSE: # BB#0: 326; SSE-NEXT: pcmpeqd %xmm1, %xmm0 327; SSE-NEXT: psrld $31, %xmm0 328; SSE-NEXT: retq 329; 330; AVX-LABEL: cmpeq_zext_v4i32: 331; AVX: # BB#0: 332; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 333; AVX-NEXT: vpsrld $31, %xmm0, %xmm0 334; AVX-NEXT: retq 335; 336 %cmp = icmp eq <4 x i32> %a, %b 337 %zext = zext <4 x i1> %cmp to <4 x i32> 338 ret <4 x i32> %zext 339} 340 341define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) { 342; SSE2-LABEL: cmpeq_zext_v4i64: 343; SSE2: # BB#0: 344; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 345; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] 346; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1] 347; SSE2-NEXT: pand %xmm4, %xmm2 348; SSE2-NEXT: pand %xmm2, %xmm0 349; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 350; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 351; SSE2-NEXT: pand %xmm4, %xmm2 352; SSE2-NEXT: pand %xmm2, %xmm1 353; SSE2-NEXT: retq 354; 355; SSE42-LABEL: cmpeq_zext_v4i64: 356; SSE42: # BB#0: 357; SSE42-NEXT: pcmpeqq %xmm2, %xmm0 358; SSE42-NEXT: psrlq $63, %xmm0 359; SSE42-NEXT: pcmpeqq %xmm3, %xmm1 360; SSE42-NEXT: psrlq $63, %xmm1 361; SSE42-NEXT: retq 362; 363; AVX1-LABEL: cmpeq_zext_v4i64: 364; AVX1: # BB#0: 365; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 366; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 367; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2 368; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 369; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 370; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 371; AVX1-NEXT: retq 372; 373; AVX2-LABEL: cmpeq_zext_v4i64: 374; AVX2: # BB#0: 375; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 376; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 377; AVX2-NEXT: retq 378; 379 %cmp = icmp eq <4 x i64> %a, %b 380 %zext = zext <4 x i1> %cmp to <4 x i64> 381 ret <4 x i64> %zext 382} 383 384define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) { 385; SSE-LABEL: cmpgt_zext_v32i8: 386; SSE: # BB#0: 387; SSE-NEXT: pcmpgtb %xmm2, %xmm0 388; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 389; SSE-NEXT: pand %xmm2, %xmm0 390; SSE-NEXT: pcmpgtb %xmm3, %xmm1 391; SSE-NEXT: pand %xmm2, %xmm1 392; SSE-NEXT: retq 393; 394; AVX1-LABEL: cmpgt_zext_v32i8: 395; AVX1: # BB#0: 396; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 397; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 398; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 399; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 400; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 401; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 402; AVX1-NEXT: retq 403; 404; AVX2-LABEL: cmpgt_zext_v32i8: 405; AVX2: # BB#0: 406; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 407; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 408; AVX2-NEXT: retq 409; 410 %cmp = icmp sgt <32 x i8> %a, %b 411 %zext = zext <32 x i1> %cmp to <32 x i8> 412 ret <32 x i8> %zext 413} 414 415define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) { 416; SSE-LABEL: cmpgt_zext_v8i16: 417; SSE: # BB#0: 418; SSE-NEXT: pcmpgtw %xmm1, %xmm0 419; SSE-NEXT: psrlw $15, %xmm0 420; SSE-NEXT: retq 421; 422; AVX-LABEL: cmpgt_zext_v8i16: 423; AVX: # BB#0: 424; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 425; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 426; AVX-NEXT: retq 427; 428 %cmp = icmp sgt <8 x i16> %a, %b 429 %zext = zext <8 x i1> %cmp to <8 x i16> 430 ret <8 x i16> %zext 431} 432 433define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) { 434; SSE-LABEL: cmpgt_zext_v8i32: 435; SSE: # BB#0: 436; SSE-NEXT: pcmpgtd %xmm2, %xmm0 437; SSE-NEXT: psrld $31, %xmm0 438; SSE-NEXT: pcmpgtd %xmm3, %xmm1 439; SSE-NEXT: psrld $31, %xmm1 440; SSE-NEXT: retq 441; 442; AVX1-LABEL: cmpgt_zext_v8i32: 443; AVX1: # BB#0: 444; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 445; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 446; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 447; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 448; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 449; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 450; AVX1-NEXT: retq 451; 452; AVX2-LABEL: cmpgt_zext_v8i32: 453; AVX2: # BB#0: 454; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 455; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 456; AVX2-NEXT: retq 457; 458 %cmp = icmp sgt <8 x i32> %a, %b 459 %zext = zext <8 x i1> %cmp to <8 x i32> 460 ret <8 x i32> %zext 461} 462 463define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) { 464; SSE2-LABEL: cmpgt_zext_v2i64: 465; SSE2: # BB#0: 466; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 467; SSE2-NEXT: pxor %xmm2, %xmm1 468; SSE2-NEXT: pxor %xmm2, %xmm0 469; SSE2-NEXT: movdqa %xmm0, %xmm2 470; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 471; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 472; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 473; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 474; SSE2-NEXT: pand %xmm3, %xmm1 475; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 476; SSE2-NEXT: por %xmm1, %xmm0 477; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 478; SSE2-NEXT: retq 479; 480; SSE42-LABEL: cmpgt_zext_v2i64: 481; SSE42: # BB#0: 482; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 483; SSE42-NEXT: psrlq $63, %xmm0 484; SSE42-NEXT: retq 485; 486; AVX-LABEL: cmpgt_zext_v2i64: 487; AVX: # BB#0: 488; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 489; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0 490; AVX-NEXT: retq 491; 492 %cmp = icmp sgt <2 x i64> %a, %b 493 %zext = zext <2 x i1> %cmp to <2 x i64> 494 ret <2 x i64> %zext 495} 496