• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
3; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
4
5target triple = "x86_64-unknown-unknown"
6
7; Ensure that the backend no longer emits unnecessary vector insert
8; instructions immediately after SSE scalar fp instructions
9; like addss or mulss.
10
11define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
12; SSE-LABEL: test_add_ss:
13; SSE:       # BB#0:
14; SSE-NEXT:    addss %xmm1, %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: test_add_ss:
18; AVX:       # BB#0:
19; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
20; AVX-NEXT:    retq
21  %1 = extractelement <4 x float> %b, i32 0
22  %2 = extractelement <4 x float> %a, i32 0
23  %add = fadd float %2, %1
24  %3 = insertelement <4 x float> %a, float %add, i32 0
25  ret <4 x float> %3
26}
27
28define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
29; SSE-LABEL: test_sub_ss:
30; SSE:       # BB#0:
31; SSE-NEXT:    subss %xmm1, %xmm0
32; SSE-NEXT:    retq
33;
34; AVX-LABEL: test_sub_ss:
35; AVX:       # BB#0:
36; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
37; AVX-NEXT:    retq
38  %1 = extractelement <4 x float> %b, i32 0
39  %2 = extractelement <4 x float> %a, i32 0
40  %sub = fsub float %2, %1
41  %3 = insertelement <4 x float> %a, float %sub, i32 0
42  ret <4 x float> %3
43}
44
45define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
46; SSE-LABEL: test_mul_ss:
47; SSE:       # BB#0:
48; SSE-NEXT:    mulss %xmm1, %xmm0
49; SSE-NEXT:    retq
50;
51; AVX-LABEL: test_mul_ss:
52; AVX:       # BB#0:
53; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
54; AVX-NEXT:    retq
55  %1 = extractelement <4 x float> %b, i32 0
56  %2 = extractelement <4 x float> %a, i32 0
57  %mul = fmul float %2, %1
58  %3 = insertelement <4 x float> %a, float %mul, i32 0
59  ret <4 x float> %3
60}
61
62define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
63; SSE-LABEL: test_div_ss:
64; SSE:       # BB#0:
65; SSE-NEXT:    divss %xmm1, %xmm0
66; SSE-NEXT:    retq
67;
68; AVX-LABEL: test_div_ss:
69; AVX:       # BB#0:
70; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
71; AVX-NEXT:    retq
72  %1 = extractelement <4 x float> %b, i32 0
73  %2 = extractelement <4 x float> %a, i32 0
74  %div = fdiv float %2, %1
75  %3 = insertelement <4 x float> %a, float %div, i32 0
76  ret <4 x float> %3
77}
78
79define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
80; SSE-LABEL: test_add_sd:
81; SSE:       # BB#0:
82; SSE-NEXT:    addsd %xmm1, %xmm0
83; SSE-NEXT:    retq
84;
85; AVX-LABEL: test_add_sd:
86; AVX:       # BB#0:
87; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
88; AVX-NEXT:    retq
89  %1 = extractelement <2 x double> %b, i32 0
90  %2 = extractelement <2 x double> %a, i32 0
91  %add = fadd double %2, %1
92  %3 = insertelement <2 x double> %a, double %add, i32 0
93  ret <2 x double> %3
94}
95
96define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
97; SSE-LABEL: test_sub_sd:
98; SSE:       # BB#0:
99; SSE-NEXT:    subsd %xmm1, %xmm0
100; SSE-NEXT:    retq
101;
102; AVX-LABEL: test_sub_sd:
103; AVX:       # BB#0:
104; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
105; AVX-NEXT:    retq
106  %1 = extractelement <2 x double> %b, i32 0
107  %2 = extractelement <2 x double> %a, i32 0
108  %sub = fsub double %2, %1
109  %3 = insertelement <2 x double> %a, double %sub, i32 0
110  ret <2 x double> %3
111}
112
113define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
114; SSE-LABEL: test_mul_sd:
115; SSE:       # BB#0:
116; SSE-NEXT:    mulsd %xmm1, %xmm0
117; SSE-NEXT:    retq
118;
119; AVX-LABEL: test_mul_sd:
120; AVX:       # BB#0:
121; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
122; AVX-NEXT:    retq
123  %1 = extractelement <2 x double> %b, i32 0
124  %2 = extractelement <2 x double> %a, i32 0
125  %mul = fmul double %2, %1
126  %3 = insertelement <2 x double> %a, double %mul, i32 0
127  ret <2 x double> %3
128}
129
130define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
131; SSE-LABEL: test_div_sd:
132; SSE:       # BB#0:
133; SSE-NEXT:    divsd %xmm1, %xmm0
134; SSE-NEXT:    retq
135;
136; AVX-LABEL: test_div_sd:
137; AVX:       # BB#0:
138; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
139; AVX-NEXT:    retq
140  %1 = extractelement <2 x double> %b, i32 0
141  %2 = extractelement <2 x double> %a, i32 0
142  %div = fdiv double %2, %1
143  %3 = insertelement <2 x double> %a, double %div, i32 0
144  ret <2 x double> %3
145}
146
147define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
148; SSE-LABEL: test2_add_ss:
149; SSE:       # BB#0:
150; SSE-NEXT:    addss %xmm0, %xmm1
151; SSE-NEXT:    movaps %xmm1, %xmm0
152; SSE-NEXT:    retq
153;
154; AVX-LABEL: test2_add_ss:
155; AVX:       # BB#0:
156; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
157; AVX-NEXT:    retq
158  %1 = extractelement <4 x float> %a, i32 0
159  %2 = extractelement <4 x float> %b, i32 0
160  %add = fadd float %1, %2
161  %3 = insertelement <4 x float> %b, float %add, i32 0
162  ret <4 x float> %3
163}
164
165define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
166; SSE-LABEL: test2_sub_ss:
167; SSE:       # BB#0:
168; SSE-NEXT:    subss %xmm0, %xmm1
169; SSE-NEXT:    movaps %xmm1, %xmm0
170; SSE-NEXT:    retq
171;
172; AVX-LABEL: test2_sub_ss:
173; AVX:       # BB#0:
174; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
175; AVX-NEXT:    retq
176  %1 = extractelement <4 x float> %a, i32 0
177  %2 = extractelement <4 x float> %b, i32 0
178  %sub = fsub float %2, %1
179  %3 = insertelement <4 x float> %b, float %sub, i32 0
180  ret <4 x float> %3
181}
182
183define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
184; SSE-LABEL: test2_mul_ss:
185; SSE:       # BB#0:
186; SSE-NEXT:    mulss %xmm0, %xmm1
187; SSE-NEXT:    movaps %xmm1, %xmm0
188; SSE-NEXT:    retq
189;
190; AVX-LABEL: test2_mul_ss:
191; AVX:       # BB#0:
192; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
193; AVX-NEXT:    retq
194  %1 = extractelement <4 x float> %a, i32 0
195  %2 = extractelement <4 x float> %b, i32 0
196  %mul = fmul float %1, %2
197  %3 = insertelement <4 x float> %b, float %mul, i32 0
198  ret <4 x float> %3
199}
200
201define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
202; SSE-LABEL: test2_div_ss:
203; SSE:       # BB#0:
204; SSE-NEXT:    divss %xmm0, %xmm1
205; SSE-NEXT:    movaps %xmm1, %xmm0
206; SSE-NEXT:    retq
207;
208; AVX-LABEL: test2_div_ss:
209; AVX:       # BB#0:
210; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
211; AVX-NEXT:    retq
212  %1 = extractelement <4 x float> %a, i32 0
213  %2 = extractelement <4 x float> %b, i32 0
214  %div = fdiv float %2, %1
215  %3 = insertelement <4 x float> %b, float %div, i32 0
216  ret <4 x float> %3
217}
218
219define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
220; SSE-LABEL: test2_add_sd:
221; SSE:       # BB#0:
222; SSE-NEXT:    addsd %xmm0, %xmm1
223; SSE-NEXT:    movaps %xmm1, %xmm0
224; SSE-NEXT:    retq
225;
226; AVX-LABEL: test2_add_sd:
227; AVX:       # BB#0:
228; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
229; AVX-NEXT:    retq
230  %1 = extractelement <2 x double> %a, i32 0
231  %2 = extractelement <2 x double> %b, i32 0
232  %add = fadd double %1, %2
233  %3 = insertelement <2 x double> %b, double %add, i32 0
234  ret <2 x double> %3
235}
236
237define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
238; SSE-LABEL: test2_sub_sd:
239; SSE:       # BB#0:
240; SSE-NEXT:    subsd %xmm0, %xmm1
241; SSE-NEXT:    movaps %xmm1, %xmm0
242; SSE-NEXT:    retq
243;
244; AVX-LABEL: test2_sub_sd:
245; AVX:       # BB#0:
246; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
247; AVX-NEXT:    retq
248  %1 = extractelement <2 x double> %a, i32 0
249  %2 = extractelement <2 x double> %b, i32 0
250  %sub = fsub double %2, %1
251  %3 = insertelement <2 x double> %b, double %sub, i32 0
252  ret <2 x double> %3
253}
254
255define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
256; SSE-LABEL: test2_mul_sd:
257; SSE:       # BB#0:
258; SSE-NEXT:    mulsd %xmm0, %xmm1
259; SSE-NEXT:    movaps %xmm1, %xmm0
260; SSE-NEXT:    retq
261;
262; AVX-LABEL: test2_mul_sd:
263; AVX:       # BB#0:
264; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
265; AVX-NEXT:    retq
266  %1 = extractelement <2 x double> %a, i32 0
267  %2 = extractelement <2 x double> %b, i32 0
268  %mul = fmul double %1, %2
269  %3 = insertelement <2 x double> %b, double %mul, i32 0
270  ret <2 x double> %3
271}
272
273define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
274; SSE-LABEL: test2_div_sd:
275; SSE:       # BB#0:
276; SSE-NEXT:    divsd %xmm0, %xmm1
277; SSE-NEXT:    movaps %xmm1, %xmm0
278; SSE-NEXT:    retq
279;
280; AVX-LABEL: test2_div_sd:
281; AVX:       # BB#0:
282; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
283; AVX-NEXT:    retq
284  %1 = extractelement <2 x double> %a, i32 0
285  %2 = extractelement <2 x double> %b, i32 0
286  %div = fdiv double %2, %1
287  %3 = insertelement <2 x double> %b, double %div, i32 0
288  ret <2 x double> %3
289}
290
291define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
292; SSE-LABEL: test_multiple_add_ss:
293; SSE:       # BB#0:
294; SSE-NEXT:    addss %xmm0, %xmm1
295; SSE-NEXT:    addss %xmm1, %xmm0
296; SSE-NEXT:    retq
297;
298; AVX-LABEL: test_multiple_add_ss:
299; AVX:       # BB#0:
300; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
301; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
302; AVX-NEXT:    retq
303  %1 = extractelement <4 x float> %b, i32 0
304  %2 = extractelement <4 x float> %a, i32 0
305  %add = fadd float %2, %1
306  %add2 = fadd float %2, %add
307  %3 = insertelement <4 x float> %a, float %add2, i32 0
308  ret <4 x float> %3
309}
310
311define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
312; SSE-LABEL: test_multiple_sub_ss:
313; SSE:       # BB#0:
314; SSE-NEXT:    movaps %xmm0, %xmm2
315; SSE-NEXT:    subss %xmm1, %xmm2
316; SSE-NEXT:    subss %xmm2, %xmm0
317; SSE-NEXT:    retq
318;
319; AVX-LABEL: test_multiple_sub_ss:
320; AVX:       # BB#0:
321; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
322; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
323; AVX-NEXT:    retq
324  %1 = extractelement <4 x float> %b, i32 0
325  %2 = extractelement <4 x float> %a, i32 0
326  %sub = fsub float %2, %1
327  %sub2 = fsub float %2, %sub
328  %3 = insertelement <4 x float> %a, float %sub2, i32 0
329  ret <4 x float> %3
330}
331
332define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
333; SSE-LABEL: test_multiple_mul_ss:
334; SSE:       # BB#0:
335; SSE-NEXT:    mulss %xmm0, %xmm1
336; SSE-NEXT:    mulss %xmm1, %xmm0
337; SSE-NEXT:    retq
338;
339; AVX-LABEL: test_multiple_mul_ss:
340; AVX:       # BB#0:
341; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
342; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
343; AVX-NEXT:    retq
344  %1 = extractelement <4 x float> %b, i32 0
345  %2 = extractelement <4 x float> %a, i32 0
346  %mul = fmul float %2, %1
347  %mul2 = fmul float %2, %mul
348  %3 = insertelement <4 x float> %a, float %mul2, i32 0
349  ret <4 x float> %3
350}
351
352define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
353; SSE-LABEL: test_multiple_div_ss:
354; SSE:       # BB#0:
355; SSE-NEXT:    movaps %xmm0, %xmm2
356; SSE-NEXT:    divss %xmm1, %xmm2
357; SSE-NEXT:    divss %xmm2, %xmm0
358; SSE-NEXT:    retq
359;
360; AVX-LABEL: test_multiple_div_ss:
361; AVX:       # BB#0:
362; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
363; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
364; AVX-NEXT:    retq
365  %1 = extractelement <4 x float> %b, i32 0
366  %2 = extractelement <4 x float> %a, i32 0
367  %div = fdiv float %2, %1
368  %div2 = fdiv float %2, %div
369  %3 = insertelement <4 x float> %a, float %div2, i32 0
370  ret <4 x float> %3
371}
372
373; With SSE4.1 or greater, the shuffles in the following tests may
374; be lowered to X86Blendi nodes.
375
376define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
377; SSE-LABEL: blend_add_ss:
378; SSE:       # BB#0:
379; SSE-NEXT:    addss %xmm1, %xmm0
380; SSE-NEXT:    retq
381;
382; AVX-LABEL: blend_add_ss:
383; AVX:       # BB#0:
384; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
385; AVX-NEXT:    retq
386
387  %ext = extractelement <4 x float> %a, i32 0
388  %op = fadd float %b, %ext
389  %ins = insertelement <4 x float> undef, float %op, i32 0
390  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
391  ret <4 x float> %shuf
392}
393
394define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) {
395; SSE-LABEL: blend_sub_ss:
396; SSE:       # BB#0:
397; SSE-NEXT:    subss %xmm1, %xmm0
398; SSE-NEXT:    retq
399;
400; AVX-LABEL: blend_sub_ss:
401; AVX:       # BB#0:
402; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
403; AVX-NEXT:    retq
404
405  %ext = extractelement <4 x float> %a, i32 0
406  %op = fsub float %ext, %b
407  %ins = insertelement <4 x float> undef, float %op, i32 0
408  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
409  ret <4 x float> %shuf
410}
411
412define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) {
413; SSE-LABEL: blend_mul_ss:
414; SSE:       # BB#0:
415; SSE-NEXT:    mulss %xmm1, %xmm0
416; SSE-NEXT:    retq
417;
418; AVX-LABEL: blend_mul_ss:
419; AVX:       # BB#0:
420; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
421; AVX-NEXT:    retq
422
423  %ext = extractelement <4 x float> %a, i32 0
424  %op = fmul float %b, %ext
425  %ins = insertelement <4 x float> undef, float %op, i32 0
426  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
427  ret <4 x float> %shuf
428}
429
430define <4 x float> @blend_div_ss(<4 x float> %a, float %b) {
431; SSE-LABEL: blend_div_ss:
432; SSE:       # BB#0:
433; SSE-NEXT:    divss %xmm1, %xmm0
434; SSE-NEXT:    retq
435;
436; AVX-LABEL: blend_div_ss:
437; AVX:       # BB#0:
438; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
439; AVX-NEXT:    retq
440
441  %ext = extractelement <4 x float> %a, i32 0
442  %op = fdiv float %ext, %b
443  %ins = insertelement <4 x float> undef, float %op, i32 0
444  %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
445  ret <4 x float> %shuf
446}
447
448define <2 x double> @blend_add_sd(<2 x double> %a, double %b) {
449; SSE-LABEL: blend_add_sd:
450; SSE:       # BB#0:
451; SSE-NEXT:    addsd %xmm1, %xmm0
452; SSE-NEXT:    retq
453;
454; AVX-LABEL: blend_add_sd:
455; AVX:       # BB#0:
456; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
457; AVX-NEXT:    retq
458
459  %ext = extractelement <2 x double> %a, i32 0
460  %op = fadd double %b, %ext
461  %ins = insertelement <2 x double> undef, double %op, i32 0
462  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
463  ret <2 x double> %shuf
464}
465
466define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) {
467; SSE-LABEL: blend_sub_sd:
468; SSE:       # BB#0:
469; SSE-NEXT:    subsd %xmm1, %xmm0
470; SSE-NEXT:    retq
471;
472; AVX-LABEL: blend_sub_sd:
473; AVX:       # BB#0:
474; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
475; AVX-NEXT:    retq
476
477  %ext = extractelement <2 x double> %a, i32 0
478  %op = fsub double %ext, %b
479  %ins = insertelement <2 x double> undef, double %op, i32 0
480  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
481  ret <2 x double> %shuf
482}
483
484define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) {
485; SSE-LABEL: blend_mul_sd:
486; SSE:       # BB#0:
487; SSE-NEXT:    mulsd %xmm1, %xmm0
488; SSE-NEXT:    retq
489;
490; AVX-LABEL: blend_mul_sd:
491; AVX:       # BB#0:
492; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
493; AVX-NEXT:    retq
494
495  %ext = extractelement <2 x double> %a, i32 0
496  %op = fmul double %b, %ext
497  %ins = insertelement <2 x double> undef, double %op, i32 0
498  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
499  ret <2 x double> %shuf
500}
501
502define <2 x double> @blend_div_sd(<2 x double> %a, double %b) {
503; SSE-LABEL: blend_div_sd:
504; SSE:       # BB#0:
505; SSE-NEXT:    divsd %xmm1, %xmm0
506; SSE-NEXT:    retq
507;
508; AVX-LABEL: blend_div_sd:
509; AVX:       # BB#0:
510; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
511; AVX-NEXT:    retq
512
513  %ext = extractelement <2 x double> %a, i32 0
514  %op = fdiv double %ext, %b
515  %ins = insertelement <2 x double> undef, double %op, i32 0
516  %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
517  ret <2 x double> %shuf
518}
519
520; Ensure that the backend selects SSE/AVX scalar fp instructions
521; from a packed fp instruction plus a vector insert.
522
523define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
524; SSE-LABEL: insert_test_add_ss:
525; SSE:       # BB#0:
526; SSE-NEXT:    addss %xmm1, %xmm0
527; SSE-NEXT:    retq
528;
529; AVX-LABEL: insert_test_add_ss:
530; AVX:       # BB#0:
531; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
532; AVX-NEXT:    retq
533  %1 = fadd <4 x float> %a, %b
534  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
535  ret <4 x float> %2
536}
537
538define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
539; SSE-LABEL: insert_test_sub_ss:
540; SSE:       # BB#0:
541; SSE-NEXT:    subss %xmm1, %xmm0
542; SSE-NEXT:    retq
543;
544; AVX-LABEL: insert_test_sub_ss:
545; AVX:       # BB#0:
546; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
547; AVX-NEXT:    retq
548  %1 = fsub <4 x float> %a, %b
549  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
550  ret <4 x float> %2
551}
552
553define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
554; SSE-LABEL: insert_test_mul_ss:
555; SSE:       # BB#0:
556; SSE-NEXT:    mulss %xmm1, %xmm0
557; SSE-NEXT:    retq
558;
559; AVX-LABEL: insert_test_mul_ss:
560; AVX:       # BB#0:
561; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
562; AVX-NEXT:    retq
563  %1 = fmul <4 x float> %a, %b
564  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
565  ret <4 x float> %2
566}
567
568define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
569; SSE-LABEL: insert_test_div_ss:
570; SSE:       # BB#0:
571; SSE-NEXT:    divss %xmm1, %xmm0
572; SSE-NEXT:    retq
573;
574; AVX-LABEL: insert_test_div_ss:
575; AVX:       # BB#0:
576; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
577; AVX-NEXT:    retq
578  %1 = fdiv <4 x float> %a, %b
579  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
580  ret <4 x float> %2
581}
582
583define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
584; SSE-LABEL: insert_test_add_sd:
585; SSE:       # BB#0:
586; SSE-NEXT:    addsd %xmm1, %xmm0
587; SSE-NEXT:    retq
588;
589; AVX-LABEL: insert_test_add_sd:
590; AVX:       # BB#0:
591; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
592; AVX-NEXT:    retq
593  %1 = fadd <2 x double> %a, %b
594  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
595  ret <2 x double> %2
596}
597
598define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
599; SSE-LABEL: insert_test_sub_sd:
600; SSE:       # BB#0:
601; SSE-NEXT:    subsd %xmm1, %xmm0
602; SSE-NEXT:    retq
603;
604; AVX-LABEL: insert_test_sub_sd:
605; AVX:       # BB#0:
606; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
607; AVX-NEXT:    retq
608  %1 = fsub <2 x double> %a, %b
609  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
610  ret <2 x double> %2
611}
612
613define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
614; SSE-LABEL: insert_test_mul_sd:
615; SSE:       # BB#0:
616; SSE-NEXT:    mulsd %xmm1, %xmm0
617; SSE-NEXT:    retq
618;
619; AVX-LABEL: insert_test_mul_sd:
620; AVX:       # BB#0:
621; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
622; AVX-NEXT:    retq
623  %1 = fmul <2 x double> %a, %b
624  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
625  ret <2 x double> %2
626}
627
628define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
629; SSE-LABEL: insert_test_div_sd:
630; SSE:       # BB#0:
631; SSE-NEXT:    divsd %xmm1, %xmm0
632; SSE-NEXT:    retq
633;
634; AVX-LABEL: insert_test_div_sd:
635; AVX:       # BB#0:
636; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
637; AVX-NEXT:    retq
638  %1 = fdiv <2 x double> %a, %b
639  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
640  ret <2 x double> %2
641}
642
643define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
644; SSE-LABEL: insert_test2_add_ss:
645; SSE:       # BB#0:
646; SSE-NEXT:    addss %xmm0, %xmm1
647; SSE-NEXT:    movaps %xmm1, %xmm0
648; SSE-NEXT:    retq
649;
650; AVX-LABEL: insert_test2_add_ss:
651; AVX:       # BB#0:
652; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
653; AVX-NEXT:    retq
654  %1 = fadd <4 x float> %b, %a
655  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
656  ret <4 x float> %2
657}
658
659define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
660; SSE-LABEL: insert_test2_sub_ss:
661; SSE:       # BB#0:
662; SSE-NEXT:    subss %xmm0, %xmm1
663; SSE-NEXT:    movaps %xmm1, %xmm0
664; SSE-NEXT:    retq
665;
666; AVX-LABEL: insert_test2_sub_ss:
667; AVX:       # BB#0:
668; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
669; AVX-NEXT:    retq
670  %1 = fsub <4 x float> %b, %a
671  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
672  ret <4 x float> %2
673}
674
675define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
676; SSE-LABEL: insert_test2_mul_ss:
677; SSE:       # BB#0:
678; SSE-NEXT:    mulss %xmm0, %xmm1
679; SSE-NEXT:    movaps %xmm1, %xmm0
680; SSE-NEXT:    retq
681;
682; AVX-LABEL: insert_test2_mul_ss:
683; AVX:       # BB#0:
684; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
685; AVX-NEXT:    retq
686  %1 = fmul <4 x float> %b, %a
687  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
688  ret <4 x float> %2
689}
690
691define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
692; SSE-LABEL: insert_test2_div_ss:
693; SSE:       # BB#0:
694; SSE-NEXT:    divss %xmm0, %xmm1
695; SSE-NEXT:    movaps %xmm1, %xmm0
696; SSE-NEXT:    retq
697;
698; AVX-LABEL: insert_test2_div_ss:
699; AVX:       # BB#0:
700; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
701; AVX-NEXT:    retq
702  %1 = fdiv <4 x float> %b, %a
703  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
704  ret <4 x float> %2
705}
706
707define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
708; SSE-LABEL: insert_test2_add_sd:
709; SSE:       # BB#0:
710; SSE-NEXT:    addsd %xmm0, %xmm1
711; SSE-NEXT:    movaps %xmm1, %xmm0
712; SSE-NEXT:    retq
713;
714; AVX-LABEL: insert_test2_add_sd:
715; AVX:       # BB#0:
716; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
717; AVX-NEXT:    retq
718  %1 = fadd <2 x double> %b, %a
719  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
720  ret <2 x double> %2
721}
722
723define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
724; SSE-LABEL: insert_test2_sub_sd:
725; SSE:       # BB#0:
726; SSE-NEXT:    subsd %xmm0, %xmm1
727; SSE-NEXT:    movaps %xmm1, %xmm0
728; SSE-NEXT:    retq
729;
730; AVX-LABEL: insert_test2_sub_sd:
731; AVX:       # BB#0:
732; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
733; AVX-NEXT:    retq
734  %1 = fsub <2 x double> %b, %a
735  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
736  ret <2 x double> %2
737}
738
739define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
740; SSE-LABEL: insert_test2_mul_sd:
741; SSE:       # BB#0:
742; SSE-NEXT:    mulsd %xmm0, %xmm1
743; SSE-NEXT:    movaps %xmm1, %xmm0
744; SSE-NEXT:    retq
745;
746; AVX-LABEL: insert_test2_mul_sd:
747; AVX:       # BB#0:
748; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
749; AVX-NEXT:    retq
750  %1 = fmul <2 x double> %b, %a
751  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
752  ret <2 x double> %2
753}
754
755define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
756; SSE-LABEL: insert_test2_div_sd:
757; SSE:       # BB#0:
758; SSE-NEXT:    divsd %xmm0, %xmm1
759; SSE-NEXT:    movaps %xmm1, %xmm0
760; SSE-NEXT:    retq
761;
762; AVX-LABEL: insert_test2_div_sd:
763; AVX:       # BB#0:
764; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
765; AVX-NEXT:    retq
766  %1 = fdiv <2 x double> %b, %a
767  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
768  ret <2 x double> %2
769}
770
771define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
772; SSE-LABEL: insert_test3_add_ss:
773; SSE:       # BB#0:
774; SSE-NEXT:    addss %xmm1, %xmm0
775; SSE-NEXT:    retq
776;
777; AVX-LABEL: insert_test3_add_ss:
778; AVX:       # BB#0:
779; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
780; AVX-NEXT:    retq
781  %1 = fadd <4 x float> %a, %b
782  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
783  ret <4 x float> %2
784}
785
786define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
787; SSE-LABEL: insert_test3_sub_ss:
788; SSE:       # BB#0:
789; SSE-NEXT:    subss %xmm1, %xmm0
790; SSE-NEXT:    retq
791;
792; AVX-LABEL: insert_test3_sub_ss:
793; AVX:       # BB#0:
794; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
795; AVX-NEXT:    retq
796  %1 = fsub <4 x float> %a, %b
797  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
798  ret <4 x float> %2
799}
800
801define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
802; SSE-LABEL: insert_test3_mul_ss:
803; SSE:       # BB#0:
804; SSE-NEXT:    mulss %xmm1, %xmm0
805; SSE-NEXT:    retq
806;
807; AVX-LABEL: insert_test3_mul_ss:
808; AVX:       # BB#0:
809; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
810; AVX-NEXT:    retq
811  %1 = fmul <4 x float> %a, %b
812  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
813  ret <4 x float> %2
814}
815
816define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
817; SSE-LABEL: insert_test3_div_ss:
818; SSE:       # BB#0:
819; SSE-NEXT:    divss %xmm1, %xmm0
820; SSE-NEXT:    retq
821;
822; AVX-LABEL: insert_test3_div_ss:
823; AVX:       # BB#0:
824; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
825; AVX-NEXT:    retq
826  %1 = fdiv <4 x float> %a, %b
827  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
828  ret <4 x float> %2
829}
830
831define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
832; SSE-LABEL: insert_test3_add_sd:
833; SSE:       # BB#0:
834; SSE-NEXT:    addsd %xmm1, %xmm0
835; SSE-NEXT:    retq
836;
837; AVX-LABEL: insert_test3_add_sd:
838; AVX:       # BB#0:
839; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
840; AVX-NEXT:    retq
841  %1 = fadd <2 x double> %a, %b
842  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
843  ret <2 x double> %2
844}
845
846define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
847; SSE-LABEL: insert_test3_sub_sd:
848; SSE:       # BB#0:
849; SSE-NEXT:    subsd %xmm1, %xmm0
850; SSE-NEXT:    retq
851;
852; AVX-LABEL: insert_test3_sub_sd:
853; AVX:       # BB#0:
854; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
855; AVX-NEXT:    retq
856  %1 = fsub <2 x double> %a, %b
857  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
858  ret <2 x double> %2
859}
860
861define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
862; SSE-LABEL: insert_test3_mul_sd:
863; SSE:       # BB#0:
864; SSE-NEXT:    mulsd %xmm1, %xmm0
865; SSE-NEXT:    retq
866;
867; AVX-LABEL: insert_test3_mul_sd:
868; AVX:       # BB#0:
869; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
870; AVX-NEXT:    retq
871  %1 = fmul <2 x double> %a, %b
872  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
873  ret <2 x double> %2
874}
875
876define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
877; SSE-LABEL: insert_test3_div_sd:
878; SSE:       # BB#0:
879; SSE-NEXT:    divsd %xmm1, %xmm0
880; SSE-NEXT:    retq
881;
882; AVX-LABEL: insert_test3_div_sd:
883; AVX:       # BB#0:
884; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
885; AVX-NEXT:    retq
886  %1 = fdiv <2 x double> %a, %b
887  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
888  ret <2 x double> %2
889}
890
891define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
892; SSE-LABEL: insert_test4_add_ss:
893; SSE:       # BB#0:
894; SSE-NEXT:    addss %xmm0, %xmm1
895; SSE-NEXT:    movaps %xmm1, %xmm0
896; SSE-NEXT:    retq
897;
898; AVX-LABEL: insert_test4_add_ss:
899; AVX:       # BB#0:
900; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
901; AVX-NEXT:    retq
902  %1 = fadd <4 x float> %b, %a
903  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
904  ret <4 x float> %2
905}
906
907define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
908; SSE-LABEL: insert_test4_sub_ss:
909; SSE:       # BB#0:
910; SSE-NEXT:    subss %xmm0, %xmm1
911; SSE-NEXT:    movaps %xmm1, %xmm0
912; SSE-NEXT:    retq
913;
914; AVX-LABEL: insert_test4_sub_ss:
915; AVX:       # BB#0:
916; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
917; AVX-NEXT:    retq
918  %1 = fsub <4 x float> %b, %a
919  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
920  ret <4 x float> %2
921}
922
923define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
924; SSE-LABEL: insert_test4_mul_ss:
925; SSE:       # BB#0:
926; SSE-NEXT:    mulss %xmm0, %xmm1
927; SSE-NEXT:    movaps %xmm1, %xmm0
928; SSE-NEXT:    retq
929;
930; AVX-LABEL: insert_test4_mul_ss:
931; AVX:       # BB#0:
932; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
933; AVX-NEXT:    retq
934  %1 = fmul <4 x float> %b, %a
935  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
936  ret <4 x float> %2
937}
938
939define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
940; SSE-LABEL: insert_test4_div_ss:
941; SSE:       # BB#0:
942; SSE-NEXT:    divss %xmm0, %xmm1
943; SSE-NEXT:    movaps %xmm1, %xmm0
944; SSE-NEXT:    retq
945;
946; AVX-LABEL: insert_test4_div_ss:
947; AVX:       # BB#0:
948; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
949; AVX-NEXT:    retq
950  %1 = fdiv <4 x float> %b, %a
951  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
952  ret <4 x float> %2
953}
954
955define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
956; SSE-LABEL: insert_test4_add_sd:
957; SSE:       # BB#0:
958; SSE-NEXT:    addsd %xmm0, %xmm1
959; SSE-NEXT:    movaps %xmm1, %xmm0
960; SSE-NEXT:    retq
961;
962; AVX-LABEL: insert_test4_add_sd:
963; AVX:       # BB#0:
964; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
965; AVX-NEXT:    retq
966  %1 = fadd <2 x double> %b, %a
967  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
968  ret <2 x double> %2
969}
970
971define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
972; SSE-LABEL: insert_test4_sub_sd:
973; SSE:       # BB#0:
974; SSE-NEXT:    subsd %xmm0, %xmm1
975; SSE-NEXT:    movaps %xmm1, %xmm0
976; SSE-NEXT:    retq
977;
978; AVX-LABEL: insert_test4_sub_sd:
979; AVX:       # BB#0:
980; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
981; AVX-NEXT:    retq
982  %1 = fsub <2 x double> %b, %a
983  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
984  ret <2 x double> %2
985}
986
987define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
988; SSE-LABEL: insert_test4_mul_sd:
989; SSE:       # BB#0:
990; SSE-NEXT:    mulsd %xmm0, %xmm1
991; SSE-NEXT:    movaps %xmm1, %xmm0
992; SSE-NEXT:    retq
993;
994; AVX-LABEL: insert_test4_mul_sd:
995; AVX:       # BB#0:
996; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
997; AVX-NEXT:    retq
998  %1 = fmul <2 x double> %b, %a
999  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1000  ret <2 x double> %2
1001}
1002
1003define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
1004; SSE-LABEL: insert_test4_div_sd:
1005; SSE:       # BB#0:
1006; SSE-NEXT:    divsd %xmm0, %xmm1
1007; SSE-NEXT:    movaps %xmm1, %xmm0
1008; SSE-NEXT:    retq
1009;
1010; AVX-LABEL: insert_test4_div_sd:
1011; AVX:       # BB#0:
1012; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
1013; AVX-NEXT:    retq
1014  %1 = fdiv <2 x double> %b, %a
1015  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1016  ret <2 x double> %2
1017}
1018