• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
3; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
4
5target triple = "x86_64-unknown-unknown"
6
7; Ensure that the backend no longer emits unnecessary vector insert
8; instructions immediately after SSE scalar fp instructions
9; like addss or mulss.
10
11define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
12; SSE-LABEL: test_add_ss:
13; SSE:       # BB#0:
14; SSE-NEXT:    addss %xmm1, %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: test_add_ss:
18; AVX:       # BB#0:
19; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
20; AVX-NEXT:    retq
21  %1 = extractelement <4 x float> %b, i32 0
22  %2 = extractelement <4 x float> %a, i32 0
23  %add = fadd float %2, %1
24  %3 = insertelement <4 x float> %a, float %add, i32 0
25  ret <4 x float> %3
26}
27
28define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
29; SSE-LABEL: test_sub_ss:
30; SSE:       # BB#0:
31; SSE-NEXT:    subss %xmm1, %xmm0
32; SSE-NEXT:    retq
33;
34; AVX-LABEL: test_sub_ss:
35; AVX:       # BB#0:
36; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
37; AVX-NEXT:    retq
38  %1 = extractelement <4 x float> %b, i32 0
39  %2 = extractelement <4 x float> %a, i32 0
40  %sub = fsub float %2, %1
41  %3 = insertelement <4 x float> %a, float %sub, i32 0
42  ret <4 x float> %3
43}
44
45define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
46; SSE-LABEL: test_mul_ss:
47; SSE:       # BB#0:
48; SSE-NEXT:    mulss %xmm1, %xmm0
49; SSE-NEXT:    retq
50;
51; AVX-LABEL: test_mul_ss:
52; AVX:       # BB#0:
53; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
54; AVX-NEXT:    retq
55  %1 = extractelement <4 x float> %b, i32 0
56  %2 = extractelement <4 x float> %a, i32 0
57  %mul = fmul float %2, %1
58  %3 = insertelement <4 x float> %a, float %mul, i32 0
59  ret <4 x float> %3
60}
61
62define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
63; SSE-LABEL: test_div_ss:
64; SSE:       # BB#0:
65; SSE-NEXT:    divss %xmm1, %xmm0
66; SSE-NEXT:    retq
67;
68; AVX-LABEL: test_div_ss:
69; AVX:       # BB#0:
70; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
71; AVX-NEXT:    retq
72  %1 = extractelement <4 x float> %b, i32 0
73  %2 = extractelement <4 x float> %a, i32 0
74  %div = fdiv float %2, %1
75  %3 = insertelement <4 x float> %a, float %div, i32 0
76  ret <4 x float> %3
77}
78
79define <4 x float> @test_sqrt_ss(<4 x float> %a) {
80; SSE2-LABEL: test_sqrt_ss:
81; SSE2:       # BB#0:
82; SSE2-NEXT:   sqrtss %xmm0, %xmm1
83; SSE2-NEXT:   movss %xmm1, %xmm0
84; SSE2-NEXT:   retq
85;
86; SSE41-LABEL: test_sqrt_ss:
87; SSE41:       # BB#0:
88; SSE41-NEXT:  sqrtss %xmm0, %xmm1
89; SSE41-NEXT:  blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
90; SSE41-NEXT:  retq
91;
92; AVX-LABEL: test_sqrt_ss:
93; AVX:       # BB#0:
94; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm1
95; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
96; AVX-NEXT:    retq
97  %1 = extractelement <4 x float> %a, i32 0
98  %2 = call float @llvm.sqrt.f32(float %1)
99  %3 = insertelement <4 x float> %a, float %2, i32 0
100  ret <4 x float> %3
101}
102declare float @llvm.sqrt.f32(float)
103
104define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
105; SSE-LABEL: test_add_sd:
106; SSE:       # BB#0:
107; SSE-NEXT:    addsd %xmm1, %xmm0
108; SSE-NEXT:    retq
109;
110; AVX-LABEL: test_add_sd:
111; AVX:       # BB#0:
112; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
113; AVX-NEXT:    retq
114  %1 = extractelement <2 x double> %b, i32 0
115  %2 = extractelement <2 x double> %a, i32 0
116  %add = fadd double %2, %1
117  %3 = insertelement <2 x double> %a, double %add, i32 0
118  ret <2 x double> %3
119}
120
121define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
122; SSE-LABEL: test_sub_sd:
123; SSE:       # BB#0:
124; SSE-NEXT:    subsd %xmm1, %xmm0
125; SSE-NEXT:    retq
126;
127; AVX-LABEL: test_sub_sd:
128; AVX:       # BB#0:
129; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
130; AVX-NEXT:    retq
131  %1 = extractelement <2 x double> %b, i32 0
132  %2 = extractelement <2 x double> %a, i32 0
133  %sub = fsub double %2, %1
134  %3 = insertelement <2 x double> %a, double %sub, i32 0
135  ret <2 x double> %3
136}
137
138define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
139; SSE-LABEL: test_mul_sd:
140; SSE:       # BB#0:
141; SSE-NEXT:    mulsd %xmm1, %xmm0
142; SSE-NEXT:    retq
143;
144; AVX-LABEL: test_mul_sd:
145; AVX:       # BB#0:
146; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
147; AVX-NEXT:    retq
148  %1 = extractelement <2 x double> %b, i32 0
149  %2 = extractelement <2 x double> %a, i32 0
150  %mul = fmul double %2, %1
151  %3 = insertelement <2 x double> %a, double %mul, i32 0
152  ret <2 x double> %3
153}
154
155define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
156; SSE-LABEL: test_div_sd:
157; SSE:       # BB#0:
158; SSE-NEXT:    divsd %xmm1, %xmm0
159; SSE-NEXT:    retq
160;
161; AVX-LABEL: test_div_sd:
162; AVX:       # BB#0:
163; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
164; AVX-NEXT:    retq
165  %1 = extractelement <2 x double> %b, i32 0
166  %2 = extractelement <2 x double> %a, i32 0
167  %div = fdiv double %2, %1
168  %3 = insertelement <2 x double> %a, double %div, i32 0
169  ret <2 x double> %3
170}
171
172define <2 x double> @test_sqrt_sd(<2 x double> %a) {
173; SSE-LABEL: test_sqrt_sd:
174; SSE:       # BB#0:
175; SSE-NEXT:    sqrtsd %xmm0, %xmm1
176; SSE-NEXT:    movsd %xmm1, %xmm0
177; SSE-NEXT:    retq
178;
179; AVX-LABEL: test_sqrt_sd:
180; AVX:       # BB#0:
181; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm1
182; AVX-NEXT:    vmovsd %xmm1, %xmm0, %xmm0
183; AVX-NEXT:    retq
184  %1 = extractelement <2 x double> %a, i32 0
185  %2 = call double @llvm.sqrt.f64(double %1)
186  %3 = insertelement <2 x double> %a, double %2, i32 0
187  ret <2 x double> %3
188}
189declare double @llvm.sqrt.f64(double)
190
191define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
192; SSE-LABEL: test2_add_ss:
193; SSE:       # BB#0:
194; SSE-NEXT:    addss %xmm0, %xmm1
195; SSE-NEXT:    movaps %xmm1, %xmm0
196; SSE-NEXT:    retq
197;
198; AVX-LABEL: test2_add_ss:
199; AVX:       # BB#0:
200; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
201; AVX-NEXT:    retq
202  %1 = extractelement <4 x float> %a, i32 0
203  %2 = extractelement <4 x float> %b, i32 0
204  %add = fadd float %1, %2
205  %3 = insertelement <4 x float> %b, float %add, i32 0
206  ret <4 x float> %3
207}
208
209define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
210; SSE-LABEL: test2_sub_ss:
211; SSE:       # BB#0:
212; SSE-NEXT:    subss %xmm0, %xmm1
213; SSE-NEXT:    movaps %xmm1, %xmm0
214; SSE-NEXT:    retq
215;
216; AVX-LABEL: test2_sub_ss:
217; AVX:       # BB#0:
218; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
219; AVX-NEXT:    retq
220  %1 = extractelement <4 x float> %a, i32 0
221  %2 = extractelement <4 x float> %b, i32 0
222  %sub = fsub float %2, %1
223  %3 = insertelement <4 x float> %b, float %sub, i32 0
224  ret <4 x float> %3
225}
226
227define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
228; SSE-LABEL: test2_mul_ss:
229; SSE:       # BB#0:
230; SSE-NEXT:    mulss %xmm0, %xmm1
231; SSE-NEXT:    movaps %xmm1, %xmm0
232; SSE-NEXT:    retq
233;
234; AVX-LABEL: test2_mul_ss:
235; AVX:       # BB#0:
236; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
237; AVX-NEXT:    retq
238  %1 = extractelement <4 x float> %a, i32 0
239  %2 = extractelement <4 x float> %b, i32 0
240  %mul = fmul float %1, %2
241  %3 = insertelement <4 x float> %b, float %mul, i32 0
242  ret <4 x float> %3
243}
244
245define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
246; SSE-LABEL: test2_div_ss:
247; SSE:       # BB#0:
248; SSE-NEXT:    divss %xmm0, %xmm1
249; SSE-NEXT:    movaps %xmm1, %xmm0
250; SSE-NEXT:    retq
251;
252; AVX-LABEL: test2_div_ss:
253; AVX:       # BB#0:
254; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
255; AVX-NEXT:    retq
256  %1 = extractelement <4 x float> %a, i32 0
257  %2 = extractelement <4 x float> %b, i32 0
258  %div = fdiv float %2, %1
259  %3 = insertelement <4 x float> %b, float %div, i32 0
260  ret <4 x float> %3
261}
262
263define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
264; SSE-LABEL: test2_add_sd:
265; SSE:       # BB#0:
266; SSE-NEXT:    addsd %xmm0, %xmm1
267; SSE-NEXT:    movapd %xmm1, %xmm0
268; SSE-NEXT:    retq
269;
270; AVX-LABEL: test2_add_sd:
271; AVX:       # BB#0:
272; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
273; AVX-NEXT:    retq
274  %1 = extractelement <2 x double> %a, i32 0
275  %2 = extractelement <2 x double> %b, i32 0
276  %add = fadd double %1, %2
277  %3 = insertelement <2 x double> %b, double %add, i32 0
278  ret <2 x double> %3
279}
280
281define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
282; SSE-LABEL: test2_sub_sd:
283; SSE:       # BB#0:
284; SSE-NEXT:    subsd %xmm0, %xmm1
285; SSE-NEXT:    movapd %xmm1, %xmm0
286; SSE-NEXT:    retq
287;
288; AVX-LABEL: test2_sub_sd:
289; AVX:       # BB#0:
290; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
291; AVX-NEXT:    retq
292  %1 = extractelement <2 x double> %a, i32 0
293  %2 = extractelement <2 x double> %b, i32 0
294  %sub = fsub double %2, %1
295  %3 = insertelement <2 x double> %b, double %sub, i32 0
296  ret <2 x double> %3
297}
298
299define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
300; SSE-LABEL: test2_mul_sd:
301; SSE:       # BB#0:
302; SSE-NEXT:    mulsd %xmm0, %xmm1
303; SSE-NEXT:    movapd %xmm1, %xmm0
304; SSE-NEXT:    retq
305;
306; AVX-LABEL: test2_mul_sd:
307; AVX:       # BB#0:
308; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
309; AVX-NEXT:    retq
310  %1 = extractelement <2 x double> %a, i32 0
311  %2 = extractelement <2 x double> %b, i32 0
312  %mul = fmul double %1, %2
313  %3 = insertelement <2 x double> %b, double %mul, i32 0
314  ret <2 x double> %3
315}
316
317define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
318; SSE-LABEL: test2_div_sd:
319; SSE:       # BB#0:
320; SSE-NEXT:    divsd %xmm0, %xmm1
321; SSE-NEXT:    movapd %xmm1, %xmm0
322; SSE-NEXT:    retq
323;
324; AVX-LABEL: test2_div_sd:
325; AVX:       # BB#0:
326; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
327; AVX-NEXT:    retq
328  %1 = extractelement <2 x double> %a, i32 0
329  %2 = extractelement <2 x double> %b, i32 0
330  %div = fdiv double %2, %1
331  %3 = insertelement <2 x double> %b, double %div, i32 0
332  ret <2 x double> %3
333}
334
335define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
336; SSE-LABEL: test_multiple_add_ss:
337; SSE:       # BB#0:
338; SSE-NEXT:    addss %xmm0, %xmm1
339; SSE-NEXT:    addss %xmm1, %xmm0
340; SSE-NEXT:    retq
341;
342; AVX-LABEL: test_multiple_add_ss:
343; AVX:       # BB#0:
344; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
345; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
346; AVX-NEXT:    retq
347  %1 = extractelement <4 x float> %b, i32 0
348  %2 = extractelement <4 x float> %a, i32 0
349  %add = fadd float %2, %1
350  %add2 = fadd float %2, %add
351  %3 = insertelement <4 x float> %a, float %add2, i32 0
352  ret <4 x float> %3
353}
354
355define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
356; SSE-LABEL: test_multiple_sub_ss:
357; SSE:       # BB#0:
358; SSE-NEXT:    movaps %xmm0, %xmm2
359; SSE-NEXT:    subss %xmm1, %xmm2
360; SSE-NEXT:    subss %xmm2, %xmm0
361; SSE-NEXT:    retq
362;
363; AVX-LABEL: test_multiple_sub_ss:
364; AVX:       # BB#0:
365; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
366; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
367; AVX-NEXT:    retq
368  %1 = extractelement <4 x float> %b, i32 0
369  %2 = extractelement <4 x float> %a, i32 0
370  %sub = fsub float %2, %1
371  %sub2 = fsub float %2, %sub
372  %3 = insertelement <4 x float> %a, float %sub2, i32 0
373  ret <4 x float> %3
374}
375
376define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
377; SSE-LABEL: test_multiple_mul_ss:
378; SSE:       # BB#0:
379; SSE-NEXT:    mulss %xmm0, %xmm1
380; SSE-NEXT:    mulss %xmm1, %xmm0
381; SSE-NEXT:    retq
382;
383; AVX-LABEL: test_multiple_mul_ss:
384; AVX:       # BB#0:
385; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
386; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
387; AVX-NEXT:    retq
388  %1 = extractelement <4 x float> %b, i32 0
389  %2 = extractelement <4 x float> %a, i32 0
390  %mul = fmul float %2, %1
391  %mul2 = fmul float %2, %mul
392  %3 = insertelement <4 x float> %a, float %mul2, i32 0
393  ret <4 x float> %3
394}
395
396define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
397; SSE-LABEL: test_multiple_div_ss:
398; SSE:       # BB#0:
399; SSE-NEXT:    movaps %xmm0, %xmm2
400; SSE-NEXT:    divss %xmm1, %xmm2
401; SSE-NEXT:    divss %xmm2, %xmm0
402; SSE-NEXT:    retq
403;
404; AVX-LABEL: test_multiple_div_ss:
405; AVX:       # BB#0:
406; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
407; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
408; AVX-NEXT:    retq
409  %1 = extractelement <4 x float> %b, i32 0
410  %2 = extractelement <4 x float> %a, i32 0
411  %div = fdiv float %2, %1
412  %div2 = fdiv float %2, %div
413  %3 = insertelement <4 x float> %a, float %div2, i32 0
414  ret <4 x float> %3
415}
416
417; With SSE4.1 or greater, the shuffles in the following tests may
418; be lowered to X86Blendi nodes.
419
420define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
421; SSE-LABEL: blend_add_ss:
422; SSE:       # BB#0:
423; SSE-NEXT:    addss %xmm1, %xmm0
424; SSE-NEXT:    retq
425;
426; AVX-LABEL: blend_add_ss:
427; AVX:       # BB#0:
428; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
429; AVX-NEXT:    retq
430
431  %ext = extractelement <4 x float> %a, i32 0
432  %op = fadd float %b, %ext
433  %ins = insertelement <4 x float> undef, float %op, i32 0
434  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
435  ret <4 x float> %shuf
436}
437
438define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) {
439; SSE-LABEL: blend_sub_ss:
440; SSE:       # BB#0:
441; SSE-NEXT:    subss %xmm1, %xmm0
442; SSE-NEXT:    retq
443;
444; AVX-LABEL: blend_sub_ss:
445; AVX:       # BB#0:
446; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
447; AVX-NEXT:    retq
448
449  %ext = extractelement <4 x float> %a, i32 0
450  %op = fsub float %ext, %b
451  %ins = insertelement <4 x float> undef, float %op, i32 0
452  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
453  ret <4 x float> %shuf
454}
455
456define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) {
457; SSE-LABEL: blend_mul_ss:
458; SSE:       # BB#0:
459; SSE-NEXT:    mulss %xmm1, %xmm0
460; SSE-NEXT:    retq
461;
462; AVX-LABEL: blend_mul_ss:
463; AVX:       # BB#0:
464; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
465; AVX-NEXT:    retq
466
467  %ext = extractelement <4 x float> %a, i32 0
468  %op = fmul float %b, %ext
469  %ins = insertelement <4 x float> undef, float %op, i32 0
470  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
471  ret <4 x float> %shuf
472}
473
474define <4 x float> @blend_div_ss(<4 x float> %a, float %b) {
475; SSE-LABEL: blend_div_ss:
476; SSE:       # BB#0:
477; SSE-NEXT:    divss %xmm1, %xmm0
478; SSE-NEXT:    retq
479;
480; AVX-LABEL: blend_div_ss:
481; AVX:       # BB#0:
482; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
483; AVX-NEXT:    retq
484
485  %ext = extractelement <4 x float> %a, i32 0
486  %op = fdiv float %ext, %b
487  %ins = insertelement <4 x float> undef, float %op, i32 0
488  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
489  ret <4 x float> %shuf
490}
491
492define <2 x double> @blend_add_sd(<2 x double> %a, double %b) {
493; SSE-LABEL: blend_add_sd:
494; SSE:       # BB#0:
495; SSE-NEXT:    addsd %xmm1, %xmm0
496; SSE-NEXT:    retq
497;
498; AVX-LABEL: blend_add_sd:
499; AVX:       # BB#0:
500; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
501; AVX-NEXT:    retq
502
503  %ext = extractelement <2 x double> %a, i32 0
504  %op = fadd double %b, %ext
505  %ins = insertelement <2 x double> undef, double %op, i32 0
506  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
507  ret <2 x double> %shuf
508}
509
510define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) {
511; SSE-LABEL: blend_sub_sd:
512; SSE:       # BB#0:
513; SSE-NEXT:    subsd %xmm1, %xmm0
514; SSE-NEXT:    retq
515;
516; AVX-LABEL: blend_sub_sd:
517; AVX:       # BB#0:
518; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
519; AVX-NEXT:    retq
520
521  %ext = extractelement <2 x double> %a, i32 0
522  %op = fsub double %ext, %b
523  %ins = insertelement <2 x double> undef, double %op, i32 0
524  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
525  ret <2 x double> %shuf
526}
527
528define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) {
529; SSE-LABEL: blend_mul_sd:
530; SSE:       # BB#0:
531; SSE-NEXT:    mulsd %xmm1, %xmm0
532; SSE-NEXT:    retq
533;
534; AVX-LABEL: blend_mul_sd:
535; AVX:       # BB#0:
536; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
537; AVX-NEXT:    retq
538
539  %ext = extractelement <2 x double> %a, i32 0
540  %op = fmul double %b, %ext
541  %ins = insertelement <2 x double> undef, double %op, i32 0
542  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
543  ret <2 x double> %shuf
544}
545
546define <2 x double> @blend_div_sd(<2 x double> %a, double %b) {
547; SSE-LABEL: blend_div_sd:
548; SSE:       # BB#0:
549; SSE-NEXT:    divsd %xmm1, %xmm0
550; SSE-NEXT:    retq
551;
552; AVX-LABEL: blend_div_sd:
553; AVX:       # BB#0:
554; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
555; AVX-NEXT:    retq
556
557  %ext = extractelement <2 x double> %a, i32 0
558  %op = fdiv double %ext, %b
559  %ins = insertelement <2 x double> undef, double %op, i32 0
560  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
561  ret <2 x double> %shuf
562}
563
564; Ensure that the backend selects SSE/AVX scalar fp instructions
565; from a packed fp instruction plus a vector insert.
566
567define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
568; SSE-LABEL: insert_test_add_ss:
569; SSE:       # BB#0:
570; SSE-NEXT:    addss %xmm1, %xmm0
571; SSE-NEXT:    retq
572;
573; AVX-LABEL: insert_test_add_ss:
574; AVX:       # BB#0:
575; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
576; AVX-NEXT:    retq
577  %1 = fadd <4 x float> %a, %b
578  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
579  ret <4 x float> %2
580}
581
582define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
583; SSE-LABEL: insert_test_sub_ss:
584; SSE:       # BB#0:
585; SSE-NEXT:    subss %xmm1, %xmm0
586; SSE-NEXT:    retq
587;
588; AVX-LABEL: insert_test_sub_ss:
589; AVX:       # BB#0:
590; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
591; AVX-NEXT:    retq
592  %1 = fsub <4 x float> %a, %b
593  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
594  ret <4 x float> %2
595}
596
597define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
598; SSE-LABEL: insert_test_mul_ss:
599; SSE:       # BB#0:
600; SSE-NEXT:    mulss %xmm1, %xmm0
601; SSE-NEXT:    retq
602;
603; AVX-LABEL: insert_test_mul_ss:
604; AVX:       # BB#0:
605; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
606; AVX-NEXT:    retq
607  %1 = fmul <4 x float> %a, %b
608  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
609  ret <4 x float> %2
610}
611
612define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
613; SSE-LABEL: insert_test_div_ss:
614; SSE:       # BB#0:
615; SSE-NEXT:    divss %xmm1, %xmm0
616; SSE-NEXT:    retq
617;
618; AVX-LABEL: insert_test_div_ss:
619; AVX:       # BB#0:
620; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
621; AVX-NEXT:    retq
622  %1 = fdiv <4 x float> %a, %b
623  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
624  ret <4 x float> %2
625}
626
627define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
628; SSE-LABEL: insert_test_add_sd:
629; SSE:       # BB#0:
630; SSE-NEXT:    addsd %xmm1, %xmm0
631; SSE-NEXT:    retq
632;
633; AVX-LABEL: insert_test_add_sd:
634; AVX:       # BB#0:
635; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
636; AVX-NEXT:    retq
637  %1 = fadd <2 x double> %a, %b
638  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
639  ret <2 x double> %2
640}
641
642define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
643; SSE-LABEL: insert_test_sub_sd:
644; SSE:       # BB#0:
645; SSE-NEXT:    subsd %xmm1, %xmm0
646; SSE-NEXT:    retq
647;
648; AVX-LABEL: insert_test_sub_sd:
649; AVX:       # BB#0:
650; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
651; AVX-NEXT:    retq
652  %1 = fsub <2 x double> %a, %b
653  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
654  ret <2 x double> %2
655}
656
657define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
658; SSE-LABEL: insert_test_mul_sd:
659; SSE:       # BB#0:
660; SSE-NEXT:    mulsd %xmm1, %xmm0
661; SSE-NEXT:    retq
662;
663; AVX-LABEL: insert_test_mul_sd:
664; AVX:       # BB#0:
665; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
666; AVX-NEXT:    retq
667  %1 = fmul <2 x double> %a, %b
668  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
669  ret <2 x double> %2
670}
671
672define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
673; SSE-LABEL: insert_test_div_sd:
674; SSE:       # BB#0:
675; SSE-NEXT:    divsd %xmm1, %xmm0
676; SSE-NEXT:    retq
677;
678; AVX-LABEL: insert_test_div_sd:
679; AVX:       # BB#0:
680; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
681; AVX-NEXT:    retq
682  %1 = fdiv <2 x double> %a, %b
683  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
684  ret <2 x double> %2
685}
686
687define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
688; SSE-LABEL: insert_test2_add_ss:
689; SSE:       # BB#0:
690; SSE-NEXT:    addss %xmm0, %xmm1
691; SSE-NEXT:    movaps %xmm1, %xmm0
692; SSE-NEXT:    retq
693;
694; AVX-LABEL: insert_test2_add_ss:
695; AVX:       # BB#0:
696; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
697; AVX-NEXT:    retq
698  %1 = fadd <4 x float> %b, %a
699  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
700  ret <4 x float> %2
701}
702
703define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
704; SSE-LABEL: insert_test2_sub_ss:
705; SSE:       # BB#0:
706; SSE-NEXT:    subss %xmm0, %xmm1
707; SSE-NEXT:    movaps %xmm1, %xmm0
708; SSE-NEXT:    retq
709;
710; AVX-LABEL: insert_test2_sub_ss:
711; AVX:       # BB#0:
712; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
713; AVX-NEXT:    retq
714  %1 = fsub <4 x float> %b, %a
715  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
716  ret <4 x float> %2
717}
718
719define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
720; SSE-LABEL: insert_test2_mul_ss:
721; SSE:       # BB#0:
722; SSE-NEXT:    mulss %xmm0, %xmm1
723; SSE-NEXT:    movaps %xmm1, %xmm0
724; SSE-NEXT:    retq
725;
726; AVX-LABEL: insert_test2_mul_ss:
727; AVX:       # BB#0:
728; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
729; AVX-NEXT:    retq
730  %1 = fmul <4 x float> %b, %a
731  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
732  ret <4 x float> %2
733}
734
735define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
736; SSE-LABEL: insert_test2_div_ss:
737; SSE:       # BB#0:
738; SSE-NEXT:    divss %xmm0, %xmm1
739; SSE-NEXT:    movaps %xmm1, %xmm0
740; SSE-NEXT:    retq
741;
742; AVX-LABEL: insert_test2_div_ss:
743; AVX:       # BB#0:
744; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
745; AVX-NEXT:    retq
746  %1 = fdiv <4 x float> %b, %a
747  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
748  ret <4 x float> %2
749}
750
751define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
752; SSE-LABEL: insert_test2_add_sd:
753; SSE:       # BB#0:
754; SSE-NEXT:    addsd %xmm0, %xmm1
755; SSE-NEXT:    movapd %xmm1, %xmm0
756; SSE-NEXT:    retq
757;
758; AVX-LABEL: insert_test2_add_sd:
759; AVX:       # BB#0:
760; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
761; AVX-NEXT:    retq
762  %1 = fadd <2 x double> %b, %a
763  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
764  ret <2 x double> %2
765}
766
767define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
768; SSE-LABEL: insert_test2_sub_sd:
769; SSE:       # BB#0:
770; SSE-NEXT:    subsd %xmm0, %xmm1
771; SSE-NEXT:    movapd %xmm1, %xmm0
772; SSE-NEXT:    retq
773;
774; AVX-LABEL: insert_test2_sub_sd:
775; AVX:       # BB#0:
776; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
777; AVX-NEXT:    retq
778  %1 = fsub <2 x double> %b, %a
779  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
780  ret <2 x double> %2
781}
782
783define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
784; SSE-LABEL: insert_test2_mul_sd:
785; SSE:       # BB#0:
786; SSE-NEXT:    mulsd %xmm0, %xmm1
787; SSE-NEXT:    movapd %xmm1, %xmm0
788; SSE-NEXT:    retq
789;
790; AVX-LABEL: insert_test2_mul_sd:
791; AVX:       # BB#0:
792; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
793; AVX-NEXT:    retq
794  %1 = fmul <2 x double> %b, %a
795  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
796  ret <2 x double> %2
797}
798
799define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
800; SSE-LABEL: insert_test2_div_sd:
801; SSE:       # BB#0:
802; SSE-NEXT:    divsd %xmm0, %xmm1
803; SSE-NEXT:    movapd %xmm1, %xmm0
804; SSE-NEXT:    retq
805;
806; AVX-LABEL: insert_test2_div_sd:
807; AVX:       # BB#0:
808; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
809; AVX-NEXT:    retq
810  %1 = fdiv <2 x double> %b, %a
811  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
812  ret <2 x double> %2
813}
814
815define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
816; SSE-LABEL: insert_test3_add_ss:
817; SSE:       # BB#0:
818; SSE-NEXT:    addss %xmm1, %xmm0
819; SSE-NEXT:    retq
820;
821; AVX-LABEL: insert_test3_add_ss:
822; AVX:       # BB#0:
823; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
824; AVX-NEXT:    retq
825  %1 = fadd <4 x float> %a, %b
826  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
827  ret <4 x float> %2
828}
829
830define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
831; SSE-LABEL: insert_test3_sub_ss:
832; SSE:       # BB#0:
833; SSE-NEXT:    subss %xmm1, %xmm0
834; SSE-NEXT:    retq
835;
836; AVX-LABEL: insert_test3_sub_ss:
837; AVX:       # BB#0:
838; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
839; AVX-NEXT:    retq
840  %1 = fsub <4 x float> %a, %b
841  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
842  ret <4 x float> %2
843}
844
845define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
846; SSE-LABEL: insert_test3_mul_ss:
847; SSE:       # BB#0:
848; SSE-NEXT:    mulss %xmm1, %xmm0
849; SSE-NEXT:    retq
850;
851; AVX-LABEL: insert_test3_mul_ss:
852; AVX:       # BB#0:
853; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
854; AVX-NEXT:    retq
855  %1 = fmul <4 x float> %a, %b
856  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
857  ret <4 x float> %2
858}
859
860define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
861; SSE-LABEL: insert_test3_div_ss:
862; SSE:       # BB#0:
863; SSE-NEXT:    divss %xmm1, %xmm0
864; SSE-NEXT:    retq
865;
866; AVX-LABEL: insert_test3_div_ss:
867; AVX:       # BB#0:
868; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
869; AVX-NEXT:    retq
870  %1 = fdiv <4 x float> %a, %b
871  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
872  ret <4 x float> %2
873}
874
875define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
876; SSE-LABEL: insert_test3_add_sd:
877; SSE:       # BB#0:
878; SSE-NEXT:    addsd %xmm1, %xmm0
879; SSE-NEXT:    retq
880;
881; AVX-LABEL: insert_test3_add_sd:
882; AVX:       # BB#0:
883; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
884; AVX-NEXT:    retq
885  %1 = fadd <2 x double> %a, %b
886  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
887  ret <2 x double> %2
888}
889
890define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
891; SSE-LABEL: insert_test3_sub_sd:
892; SSE:       # BB#0:
893; SSE-NEXT:    subsd %xmm1, %xmm0
894; SSE-NEXT:    retq
895;
896; AVX-LABEL: insert_test3_sub_sd:
897; AVX:       # BB#0:
898; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
899; AVX-NEXT:    retq
900  %1 = fsub <2 x double> %a, %b
901  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
902  ret <2 x double> %2
903}
904
905define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
906; SSE-LABEL: insert_test3_mul_sd:
907; SSE:       # BB#0:
908; SSE-NEXT:    mulsd %xmm1, %xmm0
909; SSE-NEXT:    retq
910;
911; AVX-LABEL: insert_test3_mul_sd:
912; AVX:       # BB#0:
913; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
914; AVX-NEXT:    retq
915  %1 = fmul <2 x double> %a, %b
916  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
917  ret <2 x double> %2
918}
919
920define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
921; SSE-LABEL: insert_test3_div_sd:
922; SSE:       # BB#0:
923; SSE-NEXT:    divsd %xmm1, %xmm0
924; SSE-NEXT:    retq
925;
926; AVX-LABEL: insert_test3_div_sd:
927; AVX:       # BB#0:
928; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
929; AVX-NEXT:    retq
930  %1 = fdiv <2 x double> %a, %b
931  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
932  ret <2 x double> %2
933}
934
935define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
936; SSE-LABEL: insert_test4_add_ss:
937; SSE:       # BB#0:
938; SSE-NEXT:    addss %xmm0, %xmm1
939; SSE-NEXT:    movaps %xmm1, %xmm0
940; SSE-NEXT:    retq
941;
942; AVX-LABEL: insert_test4_add_ss:
943; AVX:       # BB#0:
944; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
945; AVX-NEXT:    retq
946  %1 = fadd <4 x float> %b, %a
947  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
948  ret <4 x float> %2
949}
950
951define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
952; SSE-LABEL: insert_test4_sub_ss:
953; SSE:       # BB#0:
954; SSE-NEXT:    subss %xmm0, %xmm1
955; SSE-NEXT:    movaps %xmm1, %xmm0
956; SSE-NEXT:    retq
957;
958; AVX-LABEL: insert_test4_sub_ss:
959; AVX:       # BB#0:
960; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
961; AVX-NEXT:    retq
962  %1 = fsub <4 x float> %b, %a
963  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
964  ret <4 x float> %2
965}
966
967define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
968; SSE-LABEL: insert_test4_mul_ss:
969; SSE:       # BB#0:
970; SSE-NEXT:    mulss %xmm0, %xmm1
971; SSE-NEXT:    movaps %xmm1, %xmm0
972; SSE-NEXT:    retq
973;
974; AVX-LABEL: insert_test4_mul_ss:
975; AVX:       # BB#0:
976; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
977; AVX-NEXT:    retq
978  %1 = fmul <4 x float> %b, %a
979  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
980  ret <4 x float> %2
981}
982
983define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
984; SSE-LABEL: insert_test4_div_ss:
985; SSE:       # BB#0:
986; SSE-NEXT:    divss %xmm0, %xmm1
987; SSE-NEXT:    movaps %xmm1, %xmm0
988; SSE-NEXT:    retq
989;
990; AVX-LABEL: insert_test4_div_ss:
991; AVX:       # BB#0:
992; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
993; AVX-NEXT:    retq
994  %1 = fdiv <4 x float> %b, %a
995  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
996  ret <4 x float> %2
997}
998
999define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
1000; SSE-LABEL: insert_test4_add_sd:
1001; SSE:       # BB#0:
1002; SSE-NEXT:    addsd %xmm0, %xmm1
1003; SSE-NEXT:    movapd %xmm1, %xmm0
1004; SSE-NEXT:    retq
1005;
1006; AVX-LABEL: insert_test4_add_sd:
1007; AVX:       # BB#0:
1008; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
1009; AVX-NEXT:    retq
1010  %1 = fadd <2 x double> %b, %a
1011  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1012  ret <2 x double> %2
1013}
1014
1015define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
1016; SSE-LABEL: insert_test4_sub_sd:
1017; SSE:       # BB#0:
1018; SSE-NEXT:    subsd %xmm0, %xmm1
1019; SSE-NEXT:    movapd %xmm1, %xmm0
1020; SSE-NEXT:    retq
1021;
1022; AVX-LABEL: insert_test4_sub_sd:
1023; AVX:       # BB#0:
1024; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
1025; AVX-NEXT:    retq
1026  %1 = fsub <2 x double> %b, %a
1027  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1028  ret <2 x double> %2
1029}
1030
1031define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
1032; SSE-LABEL: insert_test4_mul_sd:
1033; SSE:       # BB#0:
1034; SSE-NEXT:    mulsd %xmm0, %xmm1
1035; SSE-NEXT:    movapd %xmm1, %xmm0
1036; SSE-NEXT:    retq
1037;
1038; AVX-LABEL: insert_test4_mul_sd:
1039; AVX:       # BB#0:
1040; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
1041; AVX-NEXT:    retq
1042  %1 = fmul <2 x double> %b, %a
1043  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1044  ret <2 x double> %2
1045}
1046
1047define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
1048; SSE-LABEL: insert_test4_div_sd:
1049; SSE:       # BB#0:
1050; SSE-NEXT:    divsd %xmm0, %xmm1
1051; SSE-NEXT:    movapd %xmm1, %xmm0
1052; SSE-NEXT:    retq
1053;
1054; AVX-LABEL: insert_test4_div_sd:
1055; AVX:       # BB#0:
1056; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
1057; AVX-NEXT:    retq
1058  %1 = fdiv <2 x double> %b, %a
1059  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1060  ret <2 x double> %2
1061}
1062