1; Tests for SSE2 and below, without SSE3+. 2; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s 3 4define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { 5 %tmp3 = load <2 x double>* %A, align 16 6 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 7 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > 8 store <2 x double> %tmp9, <2 x double>* %r, align 16 9 ret void 10 11; CHECK-LABEL: test1: 12; CHECK: movl 4(%esp), %eax 13; CHECK-NEXT: movl 8(%esp), %ecx 14; CHECK-NEXT: movapd (%ecx), %xmm0 15; CHECK-NEXT: movlpd 12(%esp), %xmm0 16; CHECK-NEXT: movapd %xmm0, (%eax) 17; CHECK-NEXT: ret 18} 19 20define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { 21 %tmp3 = load <2 x double>* %A, align 16 22 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 23 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > 24 store <2 x double> %tmp9, <2 x double>* %r, align 16 25 ret void 26 27; CHECK-LABEL: test2: 28; CHECK: movl 4(%esp), %eax 29; CHECK: movl 8(%esp), %ecx 30; CHECK-NEXT: movapd (%ecx), %xmm0 31; CHECK-NEXT: movhpd 12(%esp), %xmm0 32; CHECK-NEXT: movapd %xmm0, (%eax) 33; CHECK-NEXT: ret 34} 35 36 37define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind { 38 %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] 39 %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2] 40 %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] 41 %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] 42 %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] 43 %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] 44 %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] 45 %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] 46 %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] 47 %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] 48 store <4 x float> %tmp13, <4 x float>* %res 49 ret void 50; CHECK: @test3 51; CHECK: unpcklps 52} 53 54define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { 55 %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] 56 store <4 x float> %tmp5, <4 x float>* %res 57 ret void 58; CHECK: @test4 59; CHECK: pshufd $50, %xmm0, %xmm0 60} 61 62define <4 x i32> @test5(i8** %ptr) nounwind { 63; CHECK-LABEL: test5: 64; CHECK: pxor 65; CHECK: punpcklbw 66; CHECK: punpcklwd 67 68 %tmp = load i8** %ptr ; <i8*> [#uses=1] 69 %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] 70 %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] 71 %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] 72 %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] 73 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] 74 %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] 75 %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] 76 %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] 77 %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] 78 %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] 79 %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] 80 ret <4 x i32> %tmp36 81} 82 83define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { 84 %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1] 85 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 86 store <4 x float> %tmp2, <4 x float>* %res 87 ret void 88 89; CHECK-LABEL: test6: 90; CHECK: movaps (%ecx), %xmm0 91; CHECK: movaps %xmm0, (%eax) 92} 93 94define void @test7() nounwind { 95 bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] 96 shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] 97 store <4 x float> %2, <4 x float>* null 98 ret void 99 100; CHECK-LABEL: test7: 101; CHECK: xorps %xmm0, %xmm0 102; CHECK: movaps %xmm0, 0 103} 104 105@x = external global [4 x i32] 106 107define <2 x i64> @test8() nounwind { 108 %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] 109 %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] 110 %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] 111 %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] 112 %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] 113 %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 114 %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] 115 %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] 116 %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] 117 ret <2 x i64> %tmp16 118; CHECK-LABEL: test8: 119; CHECK: movups (%eax), %xmm0 120} 121 122define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind { 123 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] 124 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] 125 %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] 126 %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] 127 ret <4 x float> %tmp13 128; CHECK-LABEL: test9: 129; CHECK: movups 8(%esp), %xmm0 130} 131 132define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind { 133 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] 134 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] 135 %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] 136 %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] 137 ret <4 x float> %tmp13 138; CHECK-LABEL: test10: 139; CHECK: movaps 4(%esp), %xmm0 140} 141 142define <2 x double> @test11(double %a, double %b) nounwind { 143 %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] 144 %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] 145 ret <2 x double> %tmp7 146; CHECK-LABEL: test11: 147; CHECK: movaps 4(%esp), %xmm0 148} 149 150define void @test12() nounwind { 151 %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2] 152 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 153 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 154 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] 155 store <4 x float> %tmp4, <4 x float>* null 156 ret void 157; CHECK-LABEL: test12: 158; CHECK: movhlps 159; CHECK: shufps 160} 161 162define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { 163 %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1] 164 %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1] 165 %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] 166 store <4 x float> %tmp11, <4 x float>* %res 167 ret void 168; CHECK: test13 169; CHECK: shufps $69, (%ecx), %xmm0 170; CHECK: pshufd $-40, %xmm0, %xmm0 171} 172 173define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { 174 %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2] 175 %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2] 176 %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] 177 %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] 178 %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] 179 ret <4 x float> %tmp27 180; CHECK-LABEL: test14: 181; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]] 182; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]] 183; CHECK: movlhps [[X2]], [[X0]] 184} 185 186define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind { 187entry: 188 %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1] 189 %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1] 190 %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 191 ret <4 x float> %tmp4 192; CHECK-LABEL: test15: 193; CHECK: movhlps %xmm1, %xmm0 194} 195 196; PR8900 197; CHECK-LABEL: test16: 198; CHECK: unpcklpd 199; CHECK: ret 200 201define <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) { 202 %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3 203 %i6 = load <4 x double>* %i5, align 32 204 %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2> 205 ret <2 x double> %i7 206} 207 208; PR9009 209define fastcc void @test17() nounwind { 210entry: 211 %0 = insertelement <4 x i32> undef, i32 undef, i32 1 212 %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 213 %2 = bitcast <4 x i32> %1 to <4 x float> 214 store <4 x float> %2, <4 x float> * undef 215 ret void 216} 217 218; PR9210 219define <4 x float> @f(<4 x double>) nounwind { 220entry: 221 %double2float.i = fptrunc <4 x double> %0 to <4 x float> 222 ret <4 x float> %double2float.i 223} 224 225define <2 x i64> @test_insert_64_zext(<2 x i64> %i) { 226; CHECK-LABEL: test_insert_64_zext 227; CHECK-NOT: xor 228; CHECK: movq 229 %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2> 230 ret <2 x i64> %1 231} 232 233define <4 x i32> @PR19721(<4 x i32> %i) { 234 %bc = bitcast <4 x i32> %i to i128 235 %insert = and i128 %bc, -4294967296 236 %bc2 = bitcast i128 %insert to <4 x i32> 237 ret <4 x i32> %bc2 238 239; CHECK-LABEL: PR19721 240; CHECK: punpckldq 241} 242