1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64 4 5define i32 @test0(<1 x i64>* %v4) nounwind { 6; X32-LABEL: test0: 7; X32: # BB#0: # %entry 8; X32-NEXT: pushl %ebp 9; X32-NEXT: movl %esp, %ebp 10; X32-NEXT: andl $-8, %esp 11; X32-NEXT: subl $24, %esp 12; X32-NEXT: movl 8(%ebp), %eax 13; X32-NEXT: movl (%eax), %ecx 14; X32-NEXT: movl 4(%eax), %eax 15; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 16; X32-NEXT: movl %ecx, (%esp) 17; X32-NEXT: pshufw $238, (%esp), %mm0 # mm0 = mem[2,3,2,3] 18; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp) 19; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 20; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 21; X32-NEXT: movd %xmm0, %eax 22; X32-NEXT: addl $32, %eax 23; X32-NEXT: movl %ebp, %esp 24; X32-NEXT: popl %ebp 25; X32-NEXT: retl 26; 27; X64-LABEL: test0: 28; X64: # BB#0: # %entry 29; X64-NEXT: pshufw $238, (%rdi), %mm0 # mm0 = mem[2,3,2,3] 30; X64-NEXT: movd %mm0, %eax 31; X64-NEXT: addl $32, %eax 32; X64-NEXT: retq 33entry: 34 %v5 = load <1 x i64>, <1 x i64>* %v4, align 8 35 %v12 = bitcast <1 x i64> %v5 to <4 x i16> 36 %v13 = bitcast <4 x i16> %v12 to x86_mmx 37 %v14 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v13, i8 -18) 38 %v15 = bitcast x86_mmx %v14 to <4 x i16> 39 %v16 = bitcast <4 x i16> %v15 to <1 x i64> 40 %v17 = extractelement <1 x i64> %v16, i32 0 41 %v18 = bitcast i64 %v17 to <2 x i32> 42 %v19 = extractelement <2 x i32> %v18, i32 0 43 %v20 = add i32 %v19, 32 44 ret i32 %v20 45} 46 47define i32 @test1(i32* nocapture readonly %ptr) nounwind { 48; X32-LABEL: test1: 49; X32: # BB#0: # %entry 50; X32-NEXT: pushl %ebp 51; X32-NEXT: movl %esp, %ebp 52; X32-NEXT: andl $-8, %esp 53; X32-NEXT: subl $16, %esp 54; X32-NEXT: movl 8(%ebp), %eax 55; X32-NEXT: movd (%eax), %mm0 56; X32-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3] 57; X32-NEXT: movq %mm0, (%esp) 58; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 59; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 60; X32-NEXT: movd %xmm0, %eax 61; X32-NEXT: emms 62; X32-NEXT: movl %ebp, %esp 63; X32-NEXT: popl %ebp 64; X32-NEXT: retl 65; 66; X64-LABEL: test1: 67; X64: # BB#0: # %entry 68; X64-NEXT: movd (%rdi), %mm0 69; X64-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3] 70; X64-NEXT: movd %mm0, %eax 71; X64-NEXT: emms 72; X64-NEXT: retq 73entry: 74 %0 = load i32, i32* %ptr, align 4 75 %1 = insertelement <2 x i32> undef, i32 %0, i32 0 76 %2 = insertelement <2 x i32> %1, i32 0, i32 1 77 %3 = bitcast <2 x i32> %2 to x86_mmx 78 %4 = bitcast x86_mmx %3 to i64 79 %5 = bitcast i64 %4 to <4 x i16> 80 %6 = bitcast <4 x i16> %5 to x86_mmx 81 %7 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %6, i8 -24) 82 %8 = bitcast x86_mmx %7 to <4 x i16> 83 %9 = bitcast <4 x i16> %8 to <1 x i64> 84 %10 = extractelement <1 x i64> %9, i32 0 85 %11 = bitcast i64 %10 to <2 x i32> 86 %12 = extractelement <2 x i32> %11, i32 0 87 tail call void @llvm.x86.mmx.emms() 88 ret i32 %12 89} 90 91define i32 @test2(i32* nocapture readonly %ptr) nounwind { 92; X32-LABEL: test2: 93; X32: # BB#0: # %entry 94; X32-NEXT: pushl %ebp 95; X32-NEXT: movl %esp, %ebp 96; X32-NEXT: andl $-8, %esp 97; X32-NEXT: subl $16, %esp 98; X32-NEXT: movl 8(%ebp), %eax 99; X32-NEXT: pshufw $232, (%eax), %mm0 # mm0 = mem[0,2,2,3] 100; X32-NEXT: movq %mm0, (%esp) 101; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 102; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 103; X32-NEXT: movd %xmm0, %eax 104; X32-NEXT: emms 105; X32-NEXT: movl %ebp, %esp 106; X32-NEXT: popl %ebp 107; X32-NEXT: retl 108; 109; X64-LABEL: test2: 110; X64: # BB#0: # %entry 111; X64-NEXT: pshufw $232, (%rdi), %mm0 # mm0 = mem[0,2,2,3] 112; X64-NEXT: movd %mm0, %eax 113; X64-NEXT: emms 114; X64-NEXT: retq 115entry: 116 %0 = bitcast i32* %ptr to x86_mmx* 117 %1 = load x86_mmx, x86_mmx* %0, align 8 118 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 -24) 119 %3 = bitcast x86_mmx %2 to <4 x i16> 120 %4 = bitcast <4 x i16> %3 to <1 x i64> 121 %5 = extractelement <1 x i64> %4, i32 0 122 %6 = bitcast i64 %5 to <2 x i32> 123 %7 = extractelement <2 x i32> %6, i32 0 124 tail call void @llvm.x86.mmx.emms() 125 ret i32 %7 126} 127 128define i32 @test3(x86_mmx %a) nounwind { 129; X32-LABEL: test3: 130; X32: # BB#0: 131; X32-NEXT: movd %mm0, %eax 132; X32-NEXT: retl 133; 134; X64-LABEL: test3: 135; X64: # BB#0: 136; X64-NEXT: movd %mm0, %eax 137; X64-NEXT: retq 138 %tmp0 = bitcast x86_mmx %a to <2 x i32> 139 %tmp1 = extractelement <2 x i32> %tmp0, i32 0 140 ret i32 %tmp1 141} 142 143; Verify we don't muck with extractelts from the upper lane. 144define i32 @test4(x86_mmx %a) nounwind { 145; X32-LABEL: test4: 146; X32: # BB#0: 147; X32-NEXT: pushl %ebp 148; X32-NEXT: movl %esp, %ebp 149; X32-NEXT: andl $-8, %esp 150; X32-NEXT: subl $8, %esp 151; X32-NEXT: movq %mm0, (%esp) 152; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 153; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,0,1] 154; X32-NEXT: movd %xmm0, %eax 155; X32-NEXT: movl %ebp, %esp 156; X32-NEXT: popl %ebp 157; X32-NEXT: retl 158; 159; X64-LABEL: test4: 160; X64: # BB#0: 161; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 162; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 163; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,0,1] 164; X64-NEXT: movd %xmm0, %eax 165; X64-NEXT: retq 166 %tmp0 = bitcast x86_mmx %a to <2 x i32> 167 %tmp1 = extractelement <2 x i32> %tmp0, i32 1 168 ret i32 %tmp1 169} 170 171declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) 172declare void @llvm.x86.mmx.emms() 173