1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 | FileCheck %s 3 4; FIXME: Ideally we should be able to fold the entire body of @test1 into a 5; single paddd instruction. At the moment we produce the sequence 6; pshufd+paddq+pshufd. This is fixed with the widening legalization. 7 8define double @test1(double %A) { 9; CHECK-LABEL: test1: 10; CHECK: # %bb.0: 11; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 12; CHECK-NEXT: retq 13 %1 = bitcast double %A to <2 x i32> 14 %add = add <2 x i32> %1, <i32 3, i32 5> 15 %2 = bitcast <2 x i32> %add to double 16 ret double %2 17} 18 19define double @test2(double %A, double %B) { 20; CHECK-LABEL: test2: 21; CHECK: # %bb.0: 22; CHECK-NEXT: paddd %xmm1, %xmm0 23; CHECK-NEXT: retq 24 %1 = bitcast double %A to <2 x i32> 25 %2 = bitcast double %B to <2 x i32> 26 %add = add <2 x i32> %1, %2 27 %3 = bitcast <2 x i32> %add to double 28 ret double %3 29} 30 31define i64 @test3(i64 %A) { 32; CHECK-LABEL: test3: 33; CHECK: # %bb.0: 34; CHECK-NEXT: movq %rdi, %xmm0 35; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 36; CHECK-NEXT: movq %xmm0, %rax 37; CHECK-NEXT: retq 38 %1 = bitcast i64 %A to <2 x float> 39 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 40 %2 = bitcast <2 x float> %add to i64 41 ret i64 %2 42} 43 44; FIXME: Ideally we should be able to fold the entire body of @test4 into a 45; single paddd instruction. This is fixed with the widening legalization. 46 47define i64 @test4(i64 %A) { 48; CHECK-LABEL: test4: 49; CHECK: # %bb.0: 50; CHECK-NEXT: movq %rdi, %xmm0 51; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 52; CHECK-NEXT: movq %xmm0, %rax 53; CHECK-NEXT: retq 54 %1 = bitcast i64 %A to <2 x i32> 55 %add = add <2 x i32> %1, <i32 3, i32 5> 56 %2 = bitcast <2 x i32> %add to i64 57 ret i64 %2 58} 59 60define double @test5(double %A) { 61; CHECK-LABEL: test5: 62; CHECK: # %bb.0: 63; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 64; CHECK-NEXT: retq 65 %1 = bitcast double %A to <2 x float> 66 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 67 %2 = bitcast <2 x float> %add to double 68 ret double %2 69} 70 71; FIXME: Ideally we should be able to fold the entire body of @test6 into a 72; single paddw instruction. This is fixed with the widening legalization. 73 74define double @test6(double %A) { 75; CHECK-LABEL: test6: 76; CHECK: # %bb.0: 77; CHECK-NEXT: paddw {{.*}}(%rip), %xmm0 78; CHECK-NEXT: retq 79 %1 = bitcast double %A to <4 x i16> 80 %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6> 81 %2 = bitcast <4 x i16> %add to double 82 ret double %2 83} 84 85define double @test7(double %A, double %B) { 86; CHECK-LABEL: test7: 87; CHECK: # %bb.0: 88; CHECK-NEXT: paddw %xmm1, %xmm0 89; CHECK-NEXT: retq 90 %1 = bitcast double %A to <4 x i16> 91 %2 = bitcast double %B to <4 x i16> 92 %add = add <4 x i16> %1, %2 93 %3 = bitcast <4 x i16> %add to double 94 ret double %3 95} 96 97; FIXME: Ideally we should be able to fold the entire body of @test8 into a 98; single paddb instruction. At the moment we produce the sequence 99; pshufd+paddw+pshufd. This is fixed with the widening legalization. 100 101define double @test8(double %A) { 102; CHECK-LABEL: test8: 103; CHECK: # %bb.0: 104; CHECK-NEXT: paddb {{.*}}(%rip), %xmm0 105; CHECK-NEXT: retq 106 %1 = bitcast double %A to <8 x i8> 107 %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10> 108 %2 = bitcast <8 x i8> %add to double 109 ret double %2 110} 111 112define double @test9(double %A, double %B) { 113; CHECK-LABEL: test9: 114; CHECK: # %bb.0: 115; CHECK-NEXT: paddb %xmm1, %xmm0 116; CHECK-NEXT: retq 117 %1 = bitcast double %A to <8 x i8> 118 %2 = bitcast double %B to <8 x i8> 119 %add = add <8 x i8> %1, %2 120 %3 = bitcast <8 x i8> %add to double 121 ret double %3 122} 123