1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 8 9define i32 @PR15215_bad(<4 x i32> %input) { 10; X86-LABEL: PR15215_bad: 11; X86: # %bb.0: # %entry 12; X86-NEXT: movb {{[0-9]+}}(%esp), %al 13; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 14; X86-NEXT: movb {{[0-9]+}}(%esp), %dl 15; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 16; X86-NEXT: addb %ah, %ah 17; X86-NEXT: andb $1, %dl 18; X86-NEXT: orb %ah, %dl 19; X86-NEXT: shlb $2, %dl 20; X86-NEXT: addb %cl, %cl 21; X86-NEXT: andb $1, %al 22; X86-NEXT: orb %cl, %al 23; X86-NEXT: andb $3, %al 24; X86-NEXT: orb %dl, %al 25; X86-NEXT: movzbl %al, %eax 26; X86-NEXT: andl $15, %eax 27; X86-NEXT: retl 28; 29; X64-LABEL: PR15215_bad: 30; X64: # %bb.0: # %entry 31; X64-NEXT: addb %cl, %cl 32; X64-NEXT: andb $1, %dl 33; X64-NEXT: orb %cl, %dl 34; X64-NEXT: shlb $2, %dl 35; X64-NEXT: addb %sil, %sil 36; X64-NEXT: andb $1, %dil 37; X64-NEXT: orb %sil, %dil 38; X64-NEXT: andb $3, %dil 39; X64-NEXT: orb %dl, %dil 40; X64-NEXT: movzbl %dil, %eax 41; X64-NEXT: andl $15, %eax 42; X64-NEXT: retq 43; 44; SSE2-LABEL: PR15215_bad: 45; SSE2: # %bb.0: # %entry 46; SSE2-NEXT: pslld $31, %xmm0 47; SSE2-NEXT: movmskps %xmm0, %eax 48; SSE2-NEXT: ret{{[l|q]}} 49; 50; AVX2-LABEL: PR15215_bad: 51; AVX2: # %bb.0: # %entry 52; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 53; AVX2-NEXT: vmovmskps %xmm0, %eax 54; AVX2-NEXT: ret{{[l|q]}} 55entry: 56 %0 = trunc <4 x i32> %input to <4 x i1> 57 %1 = bitcast <4 x i1> %0 to i4 58 %2 = zext i4 %1 to i32 59 ret i32 %2 60} 61 62define i32 @PR15215_good(<4 x i32> %input) { 63; X86-LABEL: PR15215_good: 64; X86: # %bb.0: # %entry 65; X86-NEXT: pushl %esi 66; X86-NEXT: .cfi_def_cfa_offset 8 67; X86-NEXT: .cfi_offset %esi, -8 68; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 69; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 70; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 71; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 72; X86-NEXT: andl $1, %esi 73; X86-NEXT: andl $1, %edx 74; X86-NEXT: andl $1, %ecx 75; X86-NEXT: andl $1, %eax 76; X86-NEXT: leal (%esi,%edx,2), %edx 77; X86-NEXT: leal (%edx,%ecx,4), %ecx 78; X86-NEXT: leal (%ecx,%eax,8), %eax 79; X86-NEXT: popl %esi 80; X86-NEXT: .cfi_def_cfa_offset 4 81; X86-NEXT: retl 82; 83; X64-LABEL: PR15215_good: 84; X64: # %bb.0: # %entry 85; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 86; X64-NEXT: # kill: def $edx killed $edx def $rdx 87; X64-NEXT: # kill: def $esi killed $esi def $rsi 88; X64-NEXT: # kill: def $edi killed $edi def $rdi 89; X64-NEXT: andl $1, %edi 90; X64-NEXT: andl $1, %esi 91; X64-NEXT: andl $1, %edx 92; X64-NEXT: andl $1, %ecx 93; X64-NEXT: leal (%rdi,%rsi,2), %eax 94; X64-NEXT: leal (%rax,%rdx,4), %eax 95; X64-NEXT: leal (%rax,%rcx,8), %eax 96; X64-NEXT: retq 97; 98; SSE2-LABEL: PR15215_good: 99; SSE2: # %bb.0: # %entry 100; SSE2-NEXT: pslld $31, %xmm0 101; SSE2-NEXT: movmskps %xmm0, %eax 102; SSE2-NEXT: ret{{[l|q]}} 103; 104; AVX2-LABEL: PR15215_good: 105; AVX2: # %bb.0: # %entry 106; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 107; AVX2-NEXT: vmovmskps %xmm0, %eax 108; AVX2-NEXT: ret{{[l|q]}} 109entry: 110 %0 = trunc <4 x i32> %input to <4 x i1> 111 %1 = extractelement <4 x i1> %0, i32 0 112 %e1 = select i1 %1, i32 1, i32 0 113 %2 = extractelement <4 x i1> %0, i32 1 114 %e2 = select i1 %2, i32 2, i32 0 115 %3 = extractelement <4 x i1> %0, i32 2 116 %e3 = select i1 %3, i32 4, i32 0 117 %4 = extractelement <4 x i1> %0, i32 3 118 %e4 = select i1 %4, i32 8, i32 0 119 %5 = or i32 %e1, %e2 120 %6 = or i32 %5, %e3 121 %7 = or i32 %6, %e4 122 ret i32 %7 123} 124