• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
9
10target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11target triple = "x86_64-unknown-unknown"
12
13define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
14; SSE-LABEL: shuffle_v2i64_00:
15; SSE:       # BB#0:
16; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
17; SSE-NEXT:    retq
18;
19; AVX1-LABEL: shuffle_v2i64_00:
20; AVX1:       # BB#0:
21; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
22; AVX1-NEXT:    retq
23;
24; AVX2-LABEL: shuffle_v2i64_00:
25; AVX2:       # BB#0:
26; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
27; AVX2-NEXT:    retq
28;
29; AVX512VL-LABEL: shuffle_v2i64_00:
30; AVX512VL:       # BB#0:
31; AVX512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
32; AVX512VL-NEXT:    retq
33  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
34  ret <2 x i64> %shuffle
35}
36define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
37; SSE-LABEL: shuffle_v2i64_10:
38; SSE:       # BB#0:
39; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
40; SSE-NEXT:    retq
41;
42; AVX-LABEL: shuffle_v2i64_10:
43; AVX:       # BB#0:
44; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
45; AVX-NEXT:    retq
46  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
47  ret <2 x i64> %shuffle
48}
49define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
50; SSE-LABEL: shuffle_v2i64_11:
51; SSE:       # BB#0:
52; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
53; SSE-NEXT:    retq
54;
55; AVX-LABEL: shuffle_v2i64_11:
56; AVX:       # BB#0:
57; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
58; AVX-NEXT:    retq
59  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
60  ret <2 x i64> %shuffle
61}
62define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
63; SSE-LABEL: shuffle_v2i64_22:
64; SSE:       # BB#0:
65; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
66; SSE-NEXT:    retq
67;
68; AVX1-LABEL: shuffle_v2i64_22:
69; AVX1:       # BB#0:
70; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
71; AVX1-NEXT:    retq
72;
73; AVX2-LABEL: shuffle_v2i64_22:
74; AVX2:       # BB#0:
75; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
76; AVX2-NEXT:    retq
77;
78; AVX512VL-LABEL: shuffle_v2i64_22:
79; AVX512VL:       # BB#0:
80; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm0
81; AVX512VL-NEXT:    retq
82  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
83  ret <2 x i64> %shuffle
84}
85define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
86; SSE-LABEL: shuffle_v2i64_32:
87; SSE:       # BB#0:
88; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
89; SSE-NEXT:    retq
90;
91; AVX-LABEL: shuffle_v2i64_32:
92; AVX:       # BB#0:
93; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
94; AVX-NEXT:    retq
95  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
96  ret <2 x i64> %shuffle
97}
98define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
99; SSE-LABEL: shuffle_v2i64_33:
100; SSE:       # BB#0:
101; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
102; SSE-NEXT:    retq
103;
104; AVX-LABEL: shuffle_v2i64_33:
105; AVX:       # BB#0:
106; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
107; AVX-NEXT:    retq
108  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
109  ret <2 x i64> %shuffle
110}
111
112define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
113; SSE2-LABEL: shuffle_v2f64_00:
114; SSE2:       # BB#0:
115; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
116; SSE2-NEXT:    retq
117;
118; SSE3-LABEL: shuffle_v2f64_00:
119; SSE3:       # BB#0:
120; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
121; SSE3-NEXT:    retq
122;
123; SSSE3-LABEL: shuffle_v2f64_00:
124; SSSE3:       # BB#0:
125; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
126; SSSE3-NEXT:    retq
127;
128; SSE41-LABEL: shuffle_v2f64_00:
129; SSE41:       # BB#0:
130; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
131; SSE41-NEXT:    retq
132;
133; AVX-LABEL: shuffle_v2f64_00:
134; AVX:       # BB#0:
135; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
136; AVX-NEXT:    retq
137  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
138  ret <2 x double> %shuffle
139}
140define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
141; SSE-LABEL: shuffle_v2f64_10:
142; SSE:       # BB#0:
143; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
144; SSE-NEXT:    retq
145;
146; AVX-LABEL: shuffle_v2f64_10:
147; AVX:       # BB#0:
148; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
149; AVX-NEXT:    retq
150
151  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
152  ret <2 x double> %shuffle
153}
154define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
155; SSE-LABEL: shuffle_v2f64_11:
156; SSE:       # BB#0:
157; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
158; SSE-NEXT:    retq
159;
160; AVX-LABEL: shuffle_v2f64_11:
161; AVX:       # BB#0:
162; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
163; AVX-NEXT:    retq
164  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
165  ret <2 x double> %shuffle
166}
167define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
168; SSE2-LABEL: shuffle_v2f64_22:
169; SSE2:       # BB#0:
170; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
171; SSE2-NEXT:    movaps %xmm1, %xmm0
172; SSE2-NEXT:    retq
173;
174; SSE3-LABEL: shuffle_v2f64_22:
175; SSE3:       # BB#0:
176; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
177; SSE3-NEXT:    retq
178;
179; SSSE3-LABEL: shuffle_v2f64_22:
180; SSSE3:       # BB#0:
181; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
182; SSSE3-NEXT:    retq
183;
184; SSE41-LABEL: shuffle_v2f64_22:
185; SSE41:       # BB#0:
186; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
187; SSE41-NEXT:    retq
188;
189; AVX-LABEL: shuffle_v2f64_22:
190; AVX:       # BB#0:
191; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
192; AVX-NEXT:    retq
193  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
194  ret <2 x double> %shuffle
195}
196define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
197; SSE-LABEL: shuffle_v2f64_32:
198; SSE:       # BB#0:
199; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
200; SSE-NEXT:    movapd %xmm1, %xmm0
201; SSE-NEXT:    retq
202;
203; AVX-LABEL: shuffle_v2f64_32:
204; AVX:       # BB#0:
205; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
206; AVX-NEXT:    retq
207
208  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
209  ret <2 x double> %shuffle
210}
211define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
212; SSE-LABEL: shuffle_v2f64_33:
213; SSE:       # BB#0:
214; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
215; SSE-NEXT:    movaps %xmm1, %xmm0
216; SSE-NEXT:    retq
217;
218; AVX-LABEL: shuffle_v2f64_33:
219; AVX:       # BB#0:
220; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
221; AVX-NEXT:    retq
222  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
223  ret <2 x double> %shuffle
224}
225define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
226; SSE2-LABEL: shuffle_v2f64_03:
227; SSE2:       # BB#0:
228; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
229; SSE2-NEXT:    movapd %xmm1, %xmm0
230; SSE2-NEXT:    retq
231;
232; SSE3-LABEL: shuffle_v2f64_03:
233; SSE3:       # BB#0:
234; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
235; SSE3-NEXT:    movapd %xmm1, %xmm0
236; SSE3-NEXT:    retq
237;
238; SSSE3-LABEL: shuffle_v2f64_03:
239; SSSE3:       # BB#0:
240; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
241; SSSE3-NEXT:    movapd %xmm1, %xmm0
242; SSSE3-NEXT:    retq
243;
244; SSE41-LABEL: shuffle_v2f64_03:
245; SSE41:       # BB#0:
246; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
247; SSE41-NEXT:    retq
248;
249; AVX-LABEL: shuffle_v2f64_03:
250; AVX:       # BB#0:
251; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
252; AVX-NEXT:    retq
253  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
254  ret <2 x double> %shuffle
255}
256define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
257; SSE2-LABEL: shuffle_v2f64_21:
258; SSE2:       # BB#0:
259; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
260; SSE2-NEXT:    retq
261;
262; SSE3-LABEL: shuffle_v2f64_21:
263; SSE3:       # BB#0:
264; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
265; SSE3-NEXT:    retq
266;
267; SSSE3-LABEL: shuffle_v2f64_21:
268; SSSE3:       # BB#0:
269; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
270; SSSE3-NEXT:    retq
271;
272; SSE41-LABEL: shuffle_v2f64_21:
273; SSE41:       # BB#0:
274; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
275; SSE41-NEXT:    retq
276;
277; AVX-LABEL: shuffle_v2f64_21:
278; AVX:       # BB#0:
279; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
280; AVX-NEXT:    retq
281  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
282  ret <2 x double> %shuffle
283}
284
285
286define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
287; SSE-LABEL: shuffle_v2i64_02:
288; SSE:       # BB#0:
289; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
290; SSE-NEXT:    retq
291;
292; AVX-LABEL: shuffle_v2i64_02:
293; AVX:       # BB#0:
294; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
295; AVX-NEXT:    retq
296  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
297  ret <2 x i64> %shuffle
298}
299define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
300; SSE-LABEL: shuffle_v2i64_02_copy:
301; SSE:       # BB#0:
302; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
303; SSE-NEXT:    movdqa %xmm1, %xmm0
304; SSE-NEXT:    retq
305;
306; AVX-LABEL: shuffle_v2i64_02_copy:
307; AVX:       # BB#0:
308; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
309; AVX-NEXT:    retq
310  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
311  ret <2 x i64> %shuffle
312}
313define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
314; SSE2-LABEL: shuffle_v2i64_03:
315; SSE2:       # BB#0:
316; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
317; SSE2-NEXT:    movapd %xmm1, %xmm0
318; SSE2-NEXT:    retq
319;
320; SSE3-LABEL: shuffle_v2i64_03:
321; SSE3:       # BB#0:
322; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
323; SSE3-NEXT:    movapd %xmm1, %xmm0
324; SSE3-NEXT:    retq
325;
326; SSSE3-LABEL: shuffle_v2i64_03:
327; SSSE3:       # BB#0:
328; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
329; SSSE3-NEXT:    movapd %xmm1, %xmm0
330; SSSE3-NEXT:    retq
331;
332; SSE41-LABEL: shuffle_v2i64_03:
333; SSE41:       # BB#0:
334; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
335; SSE41-NEXT:    retq
336;
337; AVX1-LABEL: shuffle_v2i64_03:
338; AVX1:       # BB#0:
339; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
340; AVX1-NEXT:    retq
341;
342; AVX2-LABEL: shuffle_v2i64_03:
343; AVX2:       # BB#0:
344; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
345; AVX2-NEXT:    retq
346;
347; AVX512VL-LABEL: shuffle_v2i64_03:
348; AVX512VL:       # BB#0:
349; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
350; AVX512VL-NEXT:    retq
351  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
352  ret <2 x i64> %shuffle
353}
354define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
355; SSE2-LABEL: shuffle_v2i64_03_copy:
356; SSE2:       # BB#0:
357; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
358; SSE2-NEXT:    movapd %xmm2, %xmm0
359; SSE2-NEXT:    retq
360;
361; SSE3-LABEL: shuffle_v2i64_03_copy:
362; SSE3:       # BB#0:
363; SSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
364; SSE3-NEXT:    movapd %xmm2, %xmm0
365; SSE3-NEXT:    retq
366;
367; SSSE3-LABEL: shuffle_v2i64_03_copy:
368; SSSE3:       # BB#0:
369; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
370; SSSE3-NEXT:    movapd %xmm2, %xmm0
371; SSSE3-NEXT:    retq
372;
373; SSE41-LABEL: shuffle_v2i64_03_copy:
374; SSE41:       # BB#0:
375; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
376; SSE41-NEXT:    movdqa %xmm1, %xmm0
377; SSE41-NEXT:    retq
378;
379; AVX1-LABEL: shuffle_v2i64_03_copy:
380; AVX1:       # BB#0:
381; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
382; AVX1-NEXT:    retq
383;
384; AVX2-LABEL: shuffle_v2i64_03_copy:
385; AVX2:       # BB#0:
386; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
387; AVX2-NEXT:    retq
388;
389; AVX512VL-LABEL: shuffle_v2i64_03_copy:
390; AVX512VL:       # BB#0:
391; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
392; AVX512VL-NEXT:    retq
393  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
394  ret <2 x i64> %shuffle
395}
396define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
397; SSE2-LABEL: shuffle_v2i64_12:
398; SSE2:       # BB#0:
399; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
400; SSE2-NEXT:    retq
401;
402; SSE3-LABEL: shuffle_v2i64_12:
403; SSE3:       # BB#0:
404; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
405; SSE3-NEXT:    retq
406;
407; SSSE3-LABEL: shuffle_v2i64_12:
408; SSSE3:       # BB#0:
409; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
410; SSSE3-NEXT:    movdqa %xmm1, %xmm0
411; SSSE3-NEXT:    retq
412;
413; SSE41-LABEL: shuffle_v2i64_12:
414; SSE41:       # BB#0:
415; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
416; SSE41-NEXT:    movdqa %xmm1, %xmm0
417; SSE41-NEXT:    retq
418;
419; AVX-LABEL: shuffle_v2i64_12:
420; AVX:       # BB#0:
421; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
422; AVX-NEXT:    retq
423  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
424  ret <2 x i64> %shuffle
425}
426define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
427; SSE2-LABEL: shuffle_v2i64_12_copy:
428; SSE2:       # BB#0:
429; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
430; SSE2-NEXT:    movapd %xmm1, %xmm0
431; SSE2-NEXT:    retq
432;
433; SSE3-LABEL: shuffle_v2i64_12_copy:
434; SSE3:       # BB#0:
435; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
436; SSE3-NEXT:    movapd %xmm1, %xmm0
437; SSE3-NEXT:    retq
438;
439; SSSE3-LABEL: shuffle_v2i64_12_copy:
440; SSSE3:       # BB#0:
441; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
442; SSSE3-NEXT:    movdqa %xmm2, %xmm0
443; SSSE3-NEXT:    retq
444;
445; SSE41-LABEL: shuffle_v2i64_12_copy:
446; SSE41:       # BB#0:
447; SSE41-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
448; SSE41-NEXT:    movdqa %xmm2, %xmm0
449; SSE41-NEXT:    retq
450;
451; AVX-LABEL: shuffle_v2i64_12_copy:
452; AVX:       # BB#0:
453; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
454; AVX-NEXT:    retq
455  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
456  ret <2 x i64> %shuffle
457}
458define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
459; SSE-LABEL: shuffle_v2i64_13:
460; SSE:       # BB#0:
461; SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
462; SSE-NEXT:    retq
463;
464; AVX-LABEL: shuffle_v2i64_13:
465; AVX:       # BB#0:
466; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
467; AVX-NEXT:    retq
468  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
469  ret <2 x i64> %shuffle
470}
471define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
472; SSE-LABEL: shuffle_v2i64_13_copy:
473; SSE:       # BB#0:
474; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
475; SSE-NEXT:    movdqa %xmm1, %xmm0
476; SSE-NEXT:    retq
477;
478; AVX-LABEL: shuffle_v2i64_13_copy:
479; AVX:       # BB#0:
480; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
481; AVX-NEXT:    retq
482  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
483  ret <2 x i64> %shuffle
484}
485define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
486; SSE-LABEL: shuffle_v2i64_20:
487; SSE:       # BB#0:
488; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
489; SSE-NEXT:    movdqa %xmm1, %xmm0
490; SSE-NEXT:    retq
491;
492; AVX-LABEL: shuffle_v2i64_20:
493; AVX:       # BB#0:
494; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
495; AVX-NEXT:    retq
496  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
497  ret <2 x i64> %shuffle
498}
499define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
500; SSE-LABEL: shuffle_v2i64_20_copy:
501; SSE:       # BB#0:
502; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
503; SSE-NEXT:    movdqa %xmm2, %xmm0
504; SSE-NEXT:    retq
505;
506; AVX-LABEL: shuffle_v2i64_20_copy:
507; AVX:       # BB#0:
508; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
509; AVX-NEXT:    retq
510  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
511  ret <2 x i64> %shuffle
512}
513define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
514; SSE2-LABEL: shuffle_v2i64_21:
515; SSE2:       # BB#0:
516; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
517; SSE2-NEXT:    retq
518;
519; SSE3-LABEL: shuffle_v2i64_21:
520; SSE3:       # BB#0:
521; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
522; SSE3-NEXT:    retq
523;
524; SSSE3-LABEL: shuffle_v2i64_21:
525; SSSE3:       # BB#0:
526; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
527; SSSE3-NEXT:    retq
528;
529; SSE41-LABEL: shuffle_v2i64_21:
530; SSE41:       # BB#0:
531; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
532; SSE41-NEXT:    retq
533;
534; AVX1-LABEL: shuffle_v2i64_21:
535; AVX1:       # BB#0:
536; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
537; AVX1-NEXT:    retq
538;
539; AVX2-LABEL: shuffle_v2i64_21:
540; AVX2:       # BB#0:
541; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
542; AVX2-NEXT:    retq
543;
544; AVX512VL-LABEL: shuffle_v2i64_21:
545; AVX512VL:       # BB#0:
546; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
547; AVX512VL-NEXT:    retq
548  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
549  ret <2 x i64> %shuffle
550}
551define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
552; SSE2-LABEL: shuffle_v2i64_21_copy:
553; SSE2:       # BB#0:
554; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
555; SSE2-NEXT:    movapd %xmm1, %xmm0
556; SSE2-NEXT:    retq
557;
558; SSE3-LABEL: shuffle_v2i64_21_copy:
559; SSE3:       # BB#0:
560; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
561; SSE3-NEXT:    movapd %xmm1, %xmm0
562; SSE3-NEXT:    retq
563;
564; SSSE3-LABEL: shuffle_v2i64_21_copy:
565; SSSE3:       # BB#0:
566; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
567; SSSE3-NEXT:    movapd %xmm1, %xmm0
568; SSSE3-NEXT:    retq
569;
570; SSE41-LABEL: shuffle_v2i64_21_copy:
571; SSE41:       # BB#0:
572; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
573; SSE41-NEXT:    movdqa %xmm1, %xmm0
574; SSE41-NEXT:    retq
575;
576; AVX1-LABEL: shuffle_v2i64_21_copy:
577; AVX1:       # BB#0:
578; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
579; AVX1-NEXT:    retq
580;
581; AVX2-LABEL: shuffle_v2i64_21_copy:
582; AVX2:       # BB#0:
583; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
584; AVX2-NEXT:    retq
585;
586; AVX512VL-LABEL: shuffle_v2i64_21_copy:
587; AVX512VL:       # BB#0:
588; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
589; AVX512VL-NEXT:    retq
590  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
591  ret <2 x i64> %shuffle
592}
593define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
594; SSE2-LABEL: shuffle_v2i64_30:
595; SSE2:       # BB#0:
596; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
597; SSE2-NEXT:    movapd %xmm1, %xmm0
598; SSE2-NEXT:    retq
599;
600; SSE3-LABEL: shuffle_v2i64_30:
601; SSE3:       # BB#0:
602; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
603; SSE3-NEXT:    movapd %xmm1, %xmm0
604; SSE3-NEXT:    retq
605;
606; SSSE3-LABEL: shuffle_v2i64_30:
607; SSSE3:       # BB#0:
608; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
609; SSSE3-NEXT:    retq
610;
611; SSE41-LABEL: shuffle_v2i64_30:
612; SSE41:       # BB#0:
613; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
614; SSE41-NEXT:    retq
615;
616; AVX-LABEL: shuffle_v2i64_30:
617; AVX:       # BB#0:
618; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
619; AVX-NEXT:    retq
620  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
621  ret <2 x i64> %shuffle
622}
623define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
624; SSE2-LABEL: shuffle_v2i64_30_copy:
625; SSE2:       # BB#0:
626; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
627; SSE2-NEXT:    movapd %xmm2, %xmm0
628; SSE2-NEXT:    retq
629;
630; SSE3-LABEL: shuffle_v2i64_30_copy:
631; SSE3:       # BB#0:
632; SSE3-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
633; SSE3-NEXT:    movapd %xmm2, %xmm0
634; SSE3-NEXT:    retq
635;
636; SSSE3-LABEL: shuffle_v2i64_30_copy:
637; SSSE3:       # BB#0:
638; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
639; SSSE3-NEXT:    movdqa %xmm1, %xmm0
640; SSSE3-NEXT:    retq
641;
642; SSE41-LABEL: shuffle_v2i64_30_copy:
643; SSE41:       # BB#0:
644; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
645; SSE41-NEXT:    movdqa %xmm1, %xmm0
646; SSE41-NEXT:    retq
647;
648; AVX-LABEL: shuffle_v2i64_30_copy:
649; AVX:       # BB#0:
650; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
651; AVX-NEXT:    retq
652  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
653  ret <2 x i64> %shuffle
654}
655define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
656; SSE-LABEL: shuffle_v2i64_31:
657; SSE:       # BB#0:
658; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
659; SSE-NEXT:    movdqa %xmm1, %xmm0
660; SSE-NEXT:    retq
661;
662; AVX-LABEL: shuffle_v2i64_31:
663; AVX:       # BB#0:
664; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
665; AVX-NEXT:    retq
666  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
667  ret <2 x i64> %shuffle
668}
669define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
670; SSE-LABEL: shuffle_v2i64_31_copy:
671; SSE:       # BB#0:
672; SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
673; SSE-NEXT:    movdqa %xmm2, %xmm0
674; SSE-NEXT:    retq
675;
676; AVX-LABEL: shuffle_v2i64_31_copy:
677; AVX:       # BB#0:
678; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
679; AVX-NEXT:    retq
680  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
681  ret <2 x i64> %shuffle
682}
683
684define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
685; SSE-LABEL: shuffle_v2i64_0z:
686; SSE:       # BB#0:
687; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
688; SSE-NEXT:    retq
689;
690; AVX-LABEL: shuffle_v2i64_0z:
691; AVX:       # BB#0:
692; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
693; AVX-NEXT:    retq
694  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
695  ret <2 x i64> %shuffle
696}
697
698define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
699; SSE-LABEL: shuffle_v2i64_1z:
700; SSE:       # BB#0:
701; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
702; SSE-NEXT:    retq
703;
704; AVX-LABEL: shuffle_v2i64_1z:
705; AVX:       # BB#0:
706; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
707; AVX-NEXT:    retq
708  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
709  ret <2 x i64> %shuffle
710}
711
712define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
713; SSE-LABEL: shuffle_v2i64_z0:
714; SSE:       # BB#0:
715; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
716; SSE-NEXT:    retq
717;
718; AVX-LABEL: shuffle_v2i64_z0:
719; AVX:       # BB#0:
720; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
721; AVX-NEXT:    retq
722  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
723  ret <2 x i64> %shuffle
724}
725
726define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
727; SSE2-LABEL: shuffle_v2i64_z1:
728; SSE2:       # BB#0:
729; SSE2-NEXT:    xorpd %xmm1, %xmm1
730; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
731; SSE2-NEXT:    retq
732;
733; SSE3-LABEL: shuffle_v2i64_z1:
734; SSE3:       # BB#0:
735; SSE3-NEXT:    xorpd %xmm1, %xmm1
736; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
737; SSE3-NEXT:    retq
738;
739; SSSE3-LABEL: shuffle_v2i64_z1:
740; SSSE3:       # BB#0:
741; SSSE3-NEXT:    xorpd %xmm1, %xmm1
742; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
743; SSSE3-NEXT:    retq
744;
745; SSE41-LABEL: shuffle_v2i64_z1:
746; SSE41:       # BB#0:
747; SSE41-NEXT:    pxor %xmm1, %xmm1
748; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
749; SSE41-NEXT:    retq
750;
751; AVX1-LABEL: shuffle_v2i64_z1:
752; AVX1:       # BB#0:
753; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
754; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
755; AVX1-NEXT:    retq
756;
757; AVX2-LABEL: shuffle_v2i64_z1:
758; AVX2:       # BB#0:
759; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
760; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
761; AVX2-NEXT:    retq
762;
763; AVX512VL-LABEL: shuffle_v2i64_z1:
764; AVX512VL:       # BB#0:
765; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
766; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
767; AVX512VL-NEXT:    retq
768  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
769  ret <2 x i64> %shuffle
770}
771
772define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
773; SSE-LABEL: shuffle_v2f64_0z:
774; SSE:       # BB#0:
775; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
776; SSE-NEXT:    retq
777;
778; AVX-LABEL: shuffle_v2f64_0z:
779; AVX:       # BB#0:
780; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
781; AVX-NEXT:    retq
782  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
783  ret <2 x double> %shuffle
784}
785
786define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
787; SSE-LABEL: shuffle_v2f64_1z:
788; SSE:       # BB#0:
789; SSE-NEXT:    xorpd %xmm1, %xmm1
790; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
791; SSE-NEXT:    retq
792;
793; AVX1-LABEL: shuffle_v2f64_1z:
794; AVX1:       # BB#0:
795; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
796; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
797; AVX1-NEXT:    retq
798;
799; AVX2-LABEL: shuffle_v2f64_1z:
800; AVX2:       # BB#0:
801; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
802; AVX2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
803; AVX2-NEXT:    retq
804;
805; AVX512VL-LABEL: shuffle_v2f64_1z:
806; AVX512VL:       # BB#0:
807; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
808; AVX512VL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
809; AVX512VL-NEXT:    retq
810  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
811  ret <2 x double> %shuffle
812}
813
814define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
815; SSE-LABEL: shuffle_v2f64_z0:
816; SSE:       # BB#0:
817; SSE-NEXT:    xorpd %xmm1, %xmm1
818; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
819; SSE-NEXT:    movapd %xmm1, %xmm0
820; SSE-NEXT:    retq
821;
822; AVX1-LABEL: shuffle_v2f64_z0:
823; AVX1:       # BB#0:
824; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
825; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
826; AVX1-NEXT:    retq
827;
828; AVX2-LABEL: shuffle_v2f64_z0:
829; AVX2:       # BB#0:
830; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
831; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
832; AVX2-NEXT:    retq
833;
834; AVX512VL-LABEL: shuffle_v2f64_z0:
835; AVX512VL:       # BB#0:
836; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
837; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
838; AVX512VL-NEXT:    retq
839  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
840  ret <2 x double> %shuffle
841}
842
843define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
844; SSE2-LABEL: shuffle_v2f64_z1:
845; SSE2:       # BB#0:
846; SSE2-NEXT:    xorpd %xmm1, %xmm1
847; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
848; SSE2-NEXT:    retq
849;
850; SSE3-LABEL: shuffle_v2f64_z1:
851; SSE3:       # BB#0:
852; SSE3-NEXT:    xorpd %xmm1, %xmm1
853; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
854; SSE3-NEXT:    retq
855;
856; SSSE3-LABEL: shuffle_v2f64_z1:
857; SSSE3:       # BB#0:
858; SSSE3-NEXT:    xorpd %xmm1, %xmm1
859; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
860; SSSE3-NEXT:    retq
861;
862; SSE41-LABEL: shuffle_v2f64_z1:
863; SSE41:       # BB#0:
864; SSE41-NEXT:    xorpd %xmm1, %xmm1
865; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
866; SSE41-NEXT:    retq
867;
868; AVX-LABEL: shuffle_v2f64_z1:
869; AVX:       # BB#0:
870; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
871; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
872; AVX-NEXT:    retq
873  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
874  ret <2 x double> %shuffle
875}
876
877define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
878; SSE-LABEL: shuffle_v2f64_bitcast_1z:
879; SSE:       # BB#0:
880; SSE-NEXT:    xorpd %xmm1, %xmm1
881; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
882; SSE-NEXT:    retq
883;
884; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
885; AVX1:       # BB#0:
886; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
887; AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
888; AVX1-NEXT:    retq
889;
890; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
891; AVX2:       # BB#0:
892; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
893; AVX2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
894; AVX2-NEXT:    retq
895;
896; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
897; AVX512VL:       # BB#0:
898; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
899; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
900; AVX512VL-NEXT:    retq
901  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
902  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
903  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
904  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
905  ret <2 x double> %bitcast64
906}
907
908define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
909; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
910; SSE2:       # BB#0:
911; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
912; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
913; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
914; SSE2-NEXT:    retq
915;
916; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
917; SSE3:       # BB#0:
918; SSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
919; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
920; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
921; SSE3-NEXT:    retq
922;
923; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
924; SSSE3:       # BB#0:
925; SSSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
926; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
927; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
928; SSSE3-NEXT:    retq
929;
930; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
931; SSE41:       # BB#0:
932; SSE41-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
933; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
934; SSE41-NEXT:    xorps %xmm1, %xmm1
935; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
936; SSE41-NEXT:    retq
937;
938; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
939; AVX1:       # BB#0:
940; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
941; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
942; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
943; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
944; AVX1-NEXT:    retq
945;
946; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
947; AVX2:       # BB#0:
948; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
949; AVX2-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
950; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
951; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
952; AVX2-NEXT:    retq
953;
954; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
955; AVX512VL:       # BB#0:
956; AVX512VL-NEXT:    vmovss {{.*}}(%rip), %xmm1
957; AVX512VL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
958; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
959; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
960; AVX512VL-NEXT:    retq
961  %bitcast32 = bitcast <2 x i64> %x to <4 x float>
962  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
963  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
964  %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
965  ret <2 x i64> %and
966}
967
968define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
969; SSE-LABEL: insert_reg_and_zero_v2i64:
970; SSE:       # BB#0:
971; SSE-NEXT:    movd %rdi, %xmm0
972; SSE-NEXT:    retq
973;
974; AVX-LABEL: insert_reg_and_zero_v2i64:
975; AVX:       # BB#0:
976; AVX-NEXT:    vmovq %rdi, %xmm0
977; AVX-NEXT:    retq
978  %v = insertelement <2 x i64> undef, i64 %a, i32 0
979  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
980  ret <2 x i64> %shuffle
981}
982
983define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
984; SSE-LABEL: insert_mem_and_zero_v2i64:
985; SSE:       # BB#0:
986; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
987; SSE-NEXT:    retq
988;
989; AVX1-LABEL: insert_mem_and_zero_v2i64:
990; AVX1:       # BB#0:
991; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
992; AVX1-NEXT:    retq
993;
994; AVX2-LABEL: insert_mem_and_zero_v2i64:
995; AVX2:       # BB#0:
996; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
997; AVX2-NEXT:    retq
998;
999; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1000; AVX512VL:       # BB#0:
1001; AVX512VL-NEXT:    vmovq (%rdi), %xmm0
1002; AVX512VL-NEXT:    retq
1003  %a = load i64, i64* %ptr
1004  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1005  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1006  ret <2 x i64> %shuffle
1007}
1008
1009define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1010; SSE-LABEL: insert_reg_and_zero_v2f64:
1011; SSE:       # BB#0:
1012; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1013; SSE-NEXT:    retq
1014;
1015; AVX-LABEL: insert_reg_and_zero_v2f64:
1016; AVX:       # BB#0:
1017; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1018; AVX-NEXT:    retq
1019  %v = insertelement <2 x double> undef, double %a, i32 0
1020  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1021  ret <2 x double> %shuffle
1022}
1023
1024define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1025; SSE-LABEL: insert_mem_and_zero_v2f64:
1026; SSE:       # BB#0:
1027; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1028; SSE-NEXT:    retq
1029;
1030; AVX1-LABEL: insert_mem_and_zero_v2f64:
1031; AVX1:       # BB#0:
1032; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1033; AVX1-NEXT:    retq
1034;
1035; AVX2-LABEL: insert_mem_and_zero_v2f64:
1036; AVX2:       # BB#0:
1037; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1038; AVX2-NEXT:    retq
1039;
1040; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1041; AVX512VL:       # BB#0:
1042; AVX512VL-NEXT:    vmovsd (%rdi), %xmm0
1043; AVX512VL-NEXT:    retq
1044  %a = load double, double* %ptr
1045  %v = insertelement <2 x double> undef, double %a, i32 0
1046  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1047  ret <2 x double> %shuffle
1048}
1049
1050define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1051; SSE2-LABEL: insert_reg_lo_v2i64:
1052; SSE2:       # BB#0:
1053; SSE2-NEXT:    movd %rdi, %xmm1
1054; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1055; SSE2-NEXT:    retq
1056;
1057; SSE3-LABEL: insert_reg_lo_v2i64:
1058; SSE3:       # BB#0:
1059; SSE3-NEXT:    movd %rdi, %xmm1
1060; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1061; SSE3-NEXT:    retq
1062;
1063; SSSE3-LABEL: insert_reg_lo_v2i64:
1064; SSSE3:       # BB#0:
1065; SSSE3-NEXT:    movd %rdi, %xmm1
1066; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1067; SSSE3-NEXT:    retq
1068;
1069; SSE41-LABEL: insert_reg_lo_v2i64:
1070; SSE41:       # BB#0:
1071; SSE41-NEXT:    movd %rdi, %xmm1
1072; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1073; SSE41-NEXT:    retq
1074;
1075; AVX1-LABEL: insert_reg_lo_v2i64:
1076; AVX1:       # BB#0:
1077; AVX1-NEXT:    vmovq %rdi, %xmm1
1078; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1079; AVX1-NEXT:    retq
1080;
1081; AVX2-LABEL: insert_reg_lo_v2i64:
1082; AVX2:       # BB#0:
1083; AVX2-NEXT:    vmovq %rdi, %xmm1
1084; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1085; AVX2-NEXT:    retq
1086;
1087; AVX512VL-LABEL: insert_reg_lo_v2i64:
1088; AVX512VL:       # BB#0:
1089; AVX512VL-NEXT:    vmovq %rdi, %xmm1
1090; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1091; AVX512VL-NEXT:    retq
1092  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1093  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1094  ret <2 x i64> %shuffle
1095}
1096
1097define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1098; SSE2-LABEL: insert_mem_lo_v2i64:
1099; SSE2:       # BB#0:
1100; SSE2-NEXT:    movlpd (%rdi), %xmm0
1101; SSE2-NEXT:    retq
1102;
1103; SSE3-LABEL: insert_mem_lo_v2i64:
1104; SSE3:       # BB#0:
1105; SSE3-NEXT:    movlpd (%rdi), %xmm0
1106; SSE3-NEXT:    retq
1107;
1108; SSSE3-LABEL: insert_mem_lo_v2i64:
1109; SSSE3:       # BB#0:
1110; SSSE3-NEXT:    movlpd (%rdi), %xmm0
1111; SSSE3-NEXT:    retq
1112;
1113; SSE41-LABEL: insert_mem_lo_v2i64:
1114; SSE41:       # BB#0:
1115; SSE41-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1116; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1117; SSE41-NEXT:    retq
1118;
1119; AVX1-LABEL: insert_mem_lo_v2i64:
1120; AVX1:       # BB#0:
1121; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1122; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1123; AVX1-NEXT:    retq
1124;
1125; AVX2-LABEL: insert_mem_lo_v2i64:
1126; AVX2:       # BB#0:
1127; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1128; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1129; AVX2-NEXT:    retq
1130;
1131; AVX512VL-LABEL: insert_mem_lo_v2i64:
1132; AVX512VL:       # BB#0:
1133; AVX512VL-NEXT:    vmovq (%rdi), %xmm1
1134; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1135; AVX512VL-NEXT:    retq
1136  %a = load i64, i64* %ptr
1137  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1138  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1139  ret <2 x i64> %shuffle
1140}
1141
1142define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1143; SSE-LABEL: insert_reg_hi_v2i64:
1144; SSE:       # BB#0:
1145; SSE-NEXT:    movd %rdi, %xmm1
1146; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1147; SSE-NEXT:    retq
1148;
1149; AVX-LABEL: insert_reg_hi_v2i64:
1150; AVX:       # BB#0:
1151; AVX-NEXT:    vmovq %rdi, %xmm1
1152; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1153; AVX-NEXT:    retq
1154  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1155  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1156  ret <2 x i64> %shuffle
1157}
1158
1159define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1160; SSE-LABEL: insert_mem_hi_v2i64:
1161; SSE:       # BB#0:
1162; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1163; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1164; SSE-NEXT:    retq
1165;
1166; AVX1-LABEL: insert_mem_hi_v2i64:
1167; AVX1:       # BB#0:
1168; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1169; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1170; AVX1-NEXT:    retq
1171;
1172; AVX2-LABEL: insert_mem_hi_v2i64:
1173; AVX2:       # BB#0:
1174; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1175; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1176; AVX2-NEXT:    retq
1177;
1178; AVX512VL-LABEL: insert_mem_hi_v2i64:
1179; AVX512VL:       # BB#0:
1180; AVX512VL-NEXT:    vmovq (%rdi), %xmm1
1181; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1182; AVX512VL-NEXT:    retq
1183  %a = load i64, i64* %ptr
1184  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1185  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1186  ret <2 x i64> %shuffle
1187}
1188
1189define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1190; SSE-LABEL: insert_reg_lo_v2f64:
1191; SSE:       # BB#0:
1192; SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1193; SSE-NEXT:    movapd %xmm1, %xmm0
1194; SSE-NEXT:    retq
1195;
1196; AVX1-LABEL: insert_reg_lo_v2f64:
1197; AVX1:       # BB#0:
1198; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1199; AVX1-NEXT:    retq
1200;
1201; AVX2-LABEL: insert_reg_lo_v2f64:
1202; AVX2:       # BB#0:
1203; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1204; AVX2-NEXT:    retq
1205;
1206; AVX512VL-LABEL: insert_reg_lo_v2f64:
1207; AVX512VL:       # BB#0:
1208; AVX512VL-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
1209; AVX512VL-NEXT:    retq
1210  %v = insertelement <2 x double> undef, double %a, i32 0
1211  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1212  ret <2 x double> %shuffle
1213}
1214
1215define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1216; SSE-LABEL: insert_mem_lo_v2f64:
1217; SSE:       # BB#0:
1218; SSE-NEXT:    movlpd (%rdi), %xmm0
1219; SSE-NEXT:    retq
1220;
1221; AVX-LABEL: insert_mem_lo_v2f64:
1222; AVX:       # BB#0:
1223; AVX-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0
1224; AVX-NEXT:    retq
1225  %a = load double, double* %ptr
1226  %v = insertelement <2 x double> undef, double %a, i32 0
1227  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1228  ret <2 x double> %shuffle
1229}
1230
1231define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1232; SSE-LABEL: insert_reg_hi_v2f64:
1233; SSE:       # BB#0:
1234; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1235; SSE-NEXT:    movapd %xmm1, %xmm0
1236; SSE-NEXT:    retq
1237;
1238; AVX-LABEL: insert_reg_hi_v2f64:
1239; AVX:       # BB#0:
1240; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1241; AVX-NEXT:    retq
1242  %v = insertelement <2 x double> undef, double %a, i32 0
1243  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1244  ret <2 x double> %shuffle
1245}
1246
1247define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1248; SSE-LABEL: insert_mem_hi_v2f64:
1249; SSE:       # BB#0:
1250; SSE-NEXT:    movhpd (%rdi), %xmm0
1251; SSE-NEXT:    retq
1252;
1253; AVX-LABEL: insert_mem_hi_v2f64:
1254; AVX:       # BB#0:
1255; AVX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0
1256; AVX-NEXT:    retq
1257  %a = load double, double* %ptr
1258  %v = insertelement <2 x double> undef, double %a, i32 0
1259  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1260  ret <2 x double> %shuffle
1261}
1262
1263define <2 x double> @insert_dup_reg_v2f64(double %a) {
1264; SSE2-LABEL: insert_dup_reg_v2f64:
1265; SSE2:       # BB#0:
1266; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1267; SSE2-NEXT:    retq
1268;
1269; SSE3-LABEL: insert_dup_reg_v2f64:
1270; SSE3:       # BB#0:
1271; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1272; SSE3-NEXT:    retq
1273;
1274; SSSE3-LABEL: insert_dup_reg_v2f64:
1275; SSSE3:       # BB#0:
1276; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1277; SSSE3-NEXT:    retq
1278;
1279; SSE41-LABEL: insert_dup_reg_v2f64:
1280; SSE41:       # BB#0:
1281; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1282; SSE41-NEXT:    retq
1283;
1284; AVX-LABEL: insert_dup_reg_v2f64:
1285; AVX:       # BB#0:
1286; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1287; AVX-NEXT:    retq
1288  %v = insertelement <2 x double> undef, double %a, i32 0
1289  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1290  ret <2 x double> %shuffle
1291}
1292
1293define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1294; SSE2-LABEL: insert_dup_mem_v2f64:
1295; SSE2:       # BB#0:
1296; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1297; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1298; SSE2-NEXT:    retq
1299;
1300; SSE3-LABEL: insert_dup_mem_v2f64:
1301; SSE3:       # BB#0:
1302; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1303; SSE3-NEXT:    retq
1304;
1305; SSSE3-LABEL: insert_dup_mem_v2f64:
1306; SSSE3:       # BB#0:
1307; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1308; SSSE3-NEXT:    retq
1309;
1310; SSE41-LABEL: insert_dup_mem_v2f64:
1311; SSE41:       # BB#0:
1312; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1313; SSE41-NEXT:    retq
1314;
1315; AVX-LABEL: insert_dup_mem_v2f64:
1316; AVX:       # BB#0:
1317; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1318; AVX-NEXT:    retq
1319  %a = load double, double* %ptr
1320  %v = insertelement <2 x double> undef, double %a, i32 0
1321  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1322  ret <2 x double> %shuffle
1323}
1324
1325define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1326; SSE2-LABEL: insert_dup_mem128_v2f64:
1327; SSE2:       # BB#0:
1328; SSE2-NEXT:    movaps (%rdi), %xmm0
1329; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1330; SSE2-NEXT:    retq
1331;
1332; SSE3-LABEL: insert_dup_mem128_v2f64:
1333; SSE3:       # BB#0:
1334; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1335; SSE3-NEXT:    retq
1336;
1337; SSSE3-LABEL: insert_dup_mem128_v2f64:
1338; SSSE3:       # BB#0:
1339; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1340; SSSE3-NEXT:    retq
1341;
1342; SSE41-LABEL: insert_dup_mem128_v2f64:
1343; SSE41:       # BB#0:
1344; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1345; SSE41-NEXT:    retq
1346;
1347; AVX-LABEL: insert_dup_mem128_v2f64:
1348; AVX:       # BB#0:
1349; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1350; AVX-NEXT:    retq
1351  %v = load  <2 x double>,  <2 x double>* %ptr
1352  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1353  ret <2 x double> %shuffle
1354}
1355
1356
1357define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1358; SSE-LABEL: insert_dup_mem_v2i64:
1359; SSE:       # BB#0:
1360; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1361; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1362; SSE-NEXT:    retq
1363;
1364; AVX1-LABEL: insert_dup_mem_v2i64:
1365; AVX1:       # BB#0:
1366; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
1367; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1368; AVX1-NEXT:    retq
1369;
1370; AVX2-LABEL: insert_dup_mem_v2i64:
1371; AVX2:       # BB#0:
1372; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
1373; AVX2-NEXT:    retq
1374;
1375; AVX512VL-LABEL: insert_dup_mem_v2i64:
1376; AVX512VL:       # BB#0:
1377; AVX512VL-NEXT:    vpbroadcastq (%rdi), %xmm0
1378; AVX512VL-NEXT:    retq
1379  %tmp = load i64, i64* %ptr, align 1
1380  %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1381  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1382  ret <2 x i64> %tmp2
1383}
1384
1385define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1386; SSE-LABEL: shuffle_mem_v2f64_10:
1387; SSE:       # BB#0:
1388; SSE-NEXT:    movapd (%rdi), %xmm0
1389; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
1390; SSE-NEXT:    retq
1391;
1392; AVX-LABEL: shuffle_mem_v2f64_10:
1393; AVX:       # BB#0:
1394; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
1395; AVX-NEXT:    retq
1396
1397  %a = load <2 x double>, <2 x double>* %ptr
1398  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1399  ret <2 x double> %shuffle
1400}
1401