1; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s 2; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE 3 4 5define double @test1(double %A) { 6 %1 = bitcast double %A to <2 x i32> 7 %add = add <2 x i32> %1, <i32 3, i32 5> 8 %2 = bitcast <2 x i32> %add to double 9 ret double %2 10} 11; FIXME: Ideally we should be able to fold the entire body of @test1 into a 12; single paddd instruction. At the moment we produce the sequence 13; pshufd+paddq+pshufd. This is fixed with the widening legalization. 14; 15; CHECK-LABEL: test1 16; CHECK-NOT: movsd 17; CHECK: pshufd 18; CHECK-NEXT: paddd 19; CHECK-NEXT: pshufd 20; CHECK-NEXT: ret 21; 22; CHECK-WIDE-LABEL: test1 23; CHECK-WIDE-NOT: movsd 24; CHECK-WIDE: paddd 25; CHECK-WIDE-NEXT: ret 26 27 28define double @test2(double %A, double %B) { 29 %1 = bitcast double %A to <2 x i32> 30 %2 = bitcast double %B to <2 x i32> 31 %add = add <2 x i32> %1, %2 32 %3 = bitcast <2 x i32> %add to double 33 ret double %3 34} 35; CHECK-LABEL: test2 36; CHECK-NOT: movsd 37; CHECK: paddd 38; CHECK-NEXT: ret 39; 40; CHECK-WIDE-LABEL: test2 41; CHECK-WIDE-NOT: movsd 42; CHECK-WIDE: paddd 43; CHECK-WIDE-NEXT: ret 44 45 46define i64 @test3(i64 %A) { 47 %1 = bitcast i64 %A to <2 x float> 48 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 49 %2 = bitcast <2 x float> %add to i64 50 ret i64 %2 51} 52; CHECK-LABEL: test3 53; CHECK-NOT: pshufd 54; CHECK: addps 55; CHECK-NOT: pshufd 56; CHECK: ret 57; 58; CHECK-WIDE-LABEL: test3 59; CHECK-WIDE-NOT: pshufd 60; CHECK-WIDE: addps 61; CHECK-WIDE-NOT: pshufd 62; CHECK-WIDE: ret 63 64 65define i64 @test4(i64 %A) { 66 %1 = bitcast i64 %A to <2 x i32> 67 %add = add <2 x i32> %1, <i32 3, i32 5> 68 %2 = bitcast <2 x i32> %add to i64 69 ret i64 %2 70} 71; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd. 72; Ideally, we should fold that sequence into a single paddd. This is fixed with 73; the widening legalization. 74; 75; CHECK-LABEL: test4 76; CHECK: pshufd 77; CHECK-NEXT: paddd 78; CHECK-NEXT: pshufd 79; CHECK: ret 80; 81; CHECK-WIDE-LABEL: test4 82; CHECK-WIDE: movd %{{rdi|rcx}}, 83; CHECK-WIDE-NEXT: paddd 84; CHECK-WIDE-NEXT: movd {{.*}}, %rax 85; CHECK-WIDE: ret 86 87 88define double @test5(double %A) { 89 %1 = bitcast double %A to <2 x float> 90 %add = fadd <2 x float> %1, <float 3.0, float 5.0> 91 %2 = bitcast <2 x float> %add to double 92 ret double %2 93} 94; CHECK-LABEL: test5 95; CHECK: addps 96; CHECK-NEXT: ret 97; 98; CHECK-WIDE-LABEL: test5 99; CHECK-WIDE: addps 100; CHECK-WIDE-NEXT: ret 101 102 103define double @test6(double %A) { 104 %1 = bitcast double %A to <4 x i16> 105 %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6> 106 %2 = bitcast <4 x i16> %add to double 107 ret double %2 108} 109; FIXME: Ideally we should be able to fold the entire body of @test6 into a 110; single paddw instruction. This is fixed with the widening legalization. 111; 112; CHECK-LABEL: test6 113; CHECK-NOT: movsd 114; CHECK: punpcklwd 115; CHECK-NEXT: paddw 116; CHECK-NEXT: pshufb 117; CHECK-NEXT: ret 118; 119; CHECK-WIDE-LABEL: test6 120; CHECK-WIDE-NOT: mov 121; CHECK-WIDE-NOT: punpcklwd 122; CHECK-WIDE: paddw 123; CHECK-WIDE-NEXT: ret 124 125 126define double @test7(double %A, double %B) { 127 %1 = bitcast double %A to <4 x i16> 128 %2 = bitcast double %B to <4 x i16> 129 %add = add <4 x i16> %1, %2 130 %3 = bitcast <4 x i16> %add to double 131 ret double %3 132} 133; CHECK-LABEL: test7 134; CHECK-NOT: movsd 135; CHECK-NOT: punpcklwd 136; CHECK: paddw 137; CHECK-NEXT: ret 138; 139; CHECK-WIDE-LABEL: test7 140; CHECK-WIDE-NOT: movsd 141; CHECK-WIDE-NOT: punpcklwd 142; CHECK-WIDE: paddw 143; CHECK-WIDE-NEXT: ret 144 145 146define double @test8(double %A) { 147 %1 = bitcast double %A to <8 x i8> 148 %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10> 149 %2 = bitcast <8 x i8> %add to double 150 ret double %2 151} 152; FIXME: Ideally we should be able to fold the entire body of @test8 into a 153; single paddb instruction. At the moment we produce the sequence 154; pshufd+paddw+pshufd. This is fixed with the widening legalization. 155; 156; CHECK-LABEL: test8 157; CHECK-NOT: movsd 158; CHECK: punpcklbw 159; CHECK-NEXT: paddb 160; CHECK-NEXT: pshufb 161; CHECK-NEXT: ret 162; 163; CHECK-WIDE-LABEL: test8 164; CHECK-WIDE-NOT: movsd 165; CHECK-WIDE-NOT: punpcklbw 166; CHECK-WIDE: paddb 167; CHECK-WIDE-NEXT: ret 168 169 170define double @test9(double %A, double %B) { 171 %1 = bitcast double %A to <8 x i8> 172 %2 = bitcast double %B to <8 x i8> 173 %add = add <8 x i8> %1, %2 174 %3 = bitcast <8 x i8> %add to double 175 ret double %3 176} 177; CHECK-LABEL: test9 178; CHECK-NOT: movsd 179; CHECK-NOT: punpcklbw 180; CHECK: paddb 181; CHECK-NEXT: ret 182; 183; CHECK-WIDE-LABEL: test9 184; CHECK-WIDE-NOT: movsd 185; CHECK-WIDE-NOT: punpcklbw 186; CHECK-WIDE: paddb 187; CHECK-WIDE-NEXT: ret 188 189