1; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64 2; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64 3; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32 4 5; PR7518 6define void @test1(<2 x float> %Q, float *%P2) nounwind { 7 %a = extractelement <2 x float> %Q, i32 0 8 %b = extractelement <2 x float> %Q, i32 1 9 %c = fadd float %a, %b 10 11 store float %c, float* %P2 12 ret void 13; X64: test1: 14; X64-NEXT: pshufd $1, %xmm0, %xmm1 15; X64-NEXT: addss %xmm0, %xmm1 16; X64-NEXT: movss %xmm1, (%rdi) 17; X64-NEXT: ret 18 19; W64: test1: 20; W64-NEXT: movdqa (%rcx), %xmm0 21; W64-NEXT: pshufd $1, %xmm0, %xmm1 22; W64-NEXT: addss %xmm0, %xmm1 23; W64-NEXT: movss %xmm1, (%rdx) 24; W64-NEXT: ret 25 26; X32: test1: 27; X32-NEXT: pshufd $1, %xmm0, %xmm1 28; X32-NEXT: addss %xmm0, %xmm1 29; X32-NEXT: movl 4(%esp), %eax 30; X32-NEXT: movss %xmm1, (%eax) 31; X32-NEXT: ret 32} 33 34 35define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind { 36 %Z = fadd <2 x float> %Q, %R 37 ret <2 x float> %Z 38 39; X64: test2: 40; X64-NEXT: addps %xmm1, %xmm0 41; X64-NEXT: ret 42 43; W64: test2: 44; W64-NEXT: movaps (%rcx), %xmm0 45; W64-NEXT: addps (%rdx), %xmm0 46; W64-NEXT: ret 47 48; X32: test2: 49; X32: addps %xmm1, %xmm0 50} 51 52 53define <2 x float> @test3(<4 x float> %A) nounwind { 54 %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> 55 %C = fadd <2 x float> %B, %B 56 ret <2 x float> %C 57; X64: test3: 58; X64-NEXT: addps %xmm0, %xmm0 59; X64-NEXT: ret 60 61; W64: test3: 62; W64-NEXT: movaps (%rcx), %xmm0 63; W64-NEXT: addps %xmm0, %xmm0 64; W64-NEXT: ret 65 66; X32: test3: 67; X32-NEXT: addps %xmm0, %xmm0 68; X32-NEXT: ret 69} 70 71define <2 x float> @test4(<2 x float> %A) nounwind { 72 %C = fadd <2 x float> %A, %A 73 ret <2 x float> %C 74; X64: test4: 75; X64-NEXT: addps %xmm0, %xmm0 76; X64-NEXT: ret 77 78; W64: test4: 79; W64-NEXT: movaps (%rcx), %xmm0 80; W64-NEXT: addps %xmm0, %xmm0 81; W64-NEXT: ret 82 83; X32: test4: 84; X32-NEXT: addps %xmm0, %xmm0 85; X32-NEXT: ret 86} 87 88define <4 x float> @test5(<4 x float> %A) nounwind { 89 %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> 90 %C = fadd <2 x float> %B, %B 91 br label %BB 92 93BB: 94 %D = fadd <2 x float> %C, %C 95 %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 96 ret <4 x float> %E 97 98; X64: test5: 99; X64-NEXT: addps %xmm0, %xmm0 100; X64-NEXT: addps %xmm0, %xmm0 101; X64-NEXT: ret 102 103; W64: test5: 104; W64-NEXT: movaps (%rcx), %xmm0 105; W64-NEXT: addps %xmm0, %xmm0 106; W64-NEXT: addps %xmm0, %xmm0 107; W64-NEXT: ret 108 109; X32: test5: 110; X32-NEXT: addps %xmm0, %xmm0 111; X32-NEXT: addps %xmm0, %xmm0 112; X32-NEXT: ret 113} 114 115 116