• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
9
10target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11target triple = "x86_64-unknown-unknown"
12
13define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
14; SSE-LABEL: shuffle_v2i64_00:
15; SSE:       # BB#0:
16; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
17; SSE-NEXT:    retq
18;
19; AVX1-LABEL: shuffle_v2i64_00:
20; AVX1:       # BB#0:
21; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
22; AVX1-NEXT:    retq
23;
24; AVX2-LABEL: shuffle_v2i64_00:
25; AVX2:       # BB#0:
26; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
27; AVX2-NEXT:    retq
28;
29; AVX512VL-LABEL: shuffle_v2i64_00:
30; AVX512VL:       # BB#0:
31; AVX512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
32; AVX512VL-NEXT:    retq
33  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
34  ret <2 x i64> %shuffle
35}
36define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
37; SSE-LABEL: shuffle_v2i64_10:
38; SSE:       # BB#0:
39; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
40; SSE-NEXT:    retq
41;
42; AVX-LABEL: shuffle_v2i64_10:
43; AVX:       # BB#0:
44; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
45; AVX-NEXT:    retq
46  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
47  ret <2 x i64> %shuffle
48}
49define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
50; SSE-LABEL: shuffle_v2i64_11:
51; SSE:       # BB#0:
52; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
53; SSE-NEXT:    retq
54;
55; AVX-LABEL: shuffle_v2i64_11:
56; AVX:       # BB#0:
57; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
58; AVX-NEXT:    retq
59  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
60  ret <2 x i64> %shuffle
61}
62define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
63; SSE-LABEL: shuffle_v2i64_22:
64; SSE:       # BB#0:
65; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
66; SSE-NEXT:    retq
67;
68; AVX1-LABEL: shuffle_v2i64_22:
69; AVX1:       # BB#0:
70; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
71; AVX1-NEXT:    retq
72;
73; AVX2-LABEL: shuffle_v2i64_22:
74; AVX2:       # BB#0:
75; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
76; AVX2-NEXT:    retq
77;
78; AVX512VL-LABEL: shuffle_v2i64_22:
79; AVX512VL:       # BB#0:
80; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm0
81; AVX512VL-NEXT:    retq
82  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
83  ret <2 x i64> %shuffle
84}
85define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
86; SSE-LABEL: shuffle_v2i64_32:
87; SSE:       # BB#0:
88; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
89; SSE-NEXT:    retq
90;
91; AVX-LABEL: shuffle_v2i64_32:
92; AVX:       # BB#0:
93; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
94; AVX-NEXT:    retq
95  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
96  ret <2 x i64> %shuffle
97}
98define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
99; SSE-LABEL: shuffle_v2i64_33:
100; SSE:       # BB#0:
101; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
102; SSE-NEXT:    retq
103;
104; AVX-LABEL: shuffle_v2i64_33:
105; AVX:       # BB#0:
106; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
107; AVX-NEXT:    retq
108  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
109  ret <2 x i64> %shuffle
110}
111
112define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
113; SSE2-LABEL: shuffle_v2f64_00:
114; SSE2:       # BB#0:
115; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
116; SSE2-NEXT:    retq
117;
118; SSE3-LABEL: shuffle_v2f64_00:
119; SSE3:       # BB#0:
120; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
121; SSE3-NEXT:    retq
122;
123; SSSE3-LABEL: shuffle_v2f64_00:
124; SSSE3:       # BB#0:
125; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
126; SSSE3-NEXT:    retq
127;
128; SSE41-LABEL: shuffle_v2f64_00:
129; SSE41:       # BB#0:
130; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
131; SSE41-NEXT:    retq
132;
133; AVX-LABEL: shuffle_v2f64_00:
134; AVX:       # BB#0:
135; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
136; AVX-NEXT:    retq
137  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
138  ret <2 x double> %shuffle
139}
140define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
141; SSE-LABEL: shuffle_v2f64_10:
142; SSE:       # BB#0:
143; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
144; SSE-NEXT:    retq
145;
146; AVX-LABEL: shuffle_v2f64_10:
147; AVX:       # BB#0:
148; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
149; AVX-NEXT:    retq
150
151  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
152  ret <2 x double> %shuffle
153}
154define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
155; SSE-LABEL: shuffle_v2f64_11:
156; SSE:       # BB#0:
157; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
158; SSE-NEXT:    retq
159;
160; AVX-LABEL: shuffle_v2f64_11:
161; AVX:       # BB#0:
162; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
163; AVX-NEXT:    retq
164  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
165  ret <2 x double> %shuffle
166}
167define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
168; SSE2-LABEL: shuffle_v2f64_22:
169; SSE2:       # BB#0:
170; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
171; SSE2-NEXT:    movaps %xmm1, %xmm0
172; SSE2-NEXT:    retq
173;
174; SSE3-LABEL: shuffle_v2f64_22:
175; SSE3:       # BB#0:
176; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
177; SSE3-NEXT:    retq
178;
179; SSSE3-LABEL: shuffle_v2f64_22:
180; SSSE3:       # BB#0:
181; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
182; SSSE3-NEXT:    retq
183;
184; SSE41-LABEL: shuffle_v2f64_22:
185; SSE41:       # BB#0:
186; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
187; SSE41-NEXT:    retq
188;
189; AVX-LABEL: shuffle_v2f64_22:
190; AVX:       # BB#0:
191; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
192; AVX-NEXT:    retq
193  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
194  ret <2 x double> %shuffle
195}
196define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
197; SSE-LABEL: shuffle_v2f64_32:
198; SSE:       # BB#0:
199; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
200; SSE-NEXT:    movapd %xmm1, %xmm0
201; SSE-NEXT:    retq
202;
203; AVX-LABEL: shuffle_v2f64_32:
204; AVX:       # BB#0:
205; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
206; AVX-NEXT:    retq
207
208  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
209  ret <2 x double> %shuffle
210}
211define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
212; SSE-LABEL: shuffle_v2f64_33:
213; SSE:       # BB#0:
214; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
215; SSE-NEXT:    movaps %xmm1, %xmm0
216; SSE-NEXT:    retq
217;
218; AVX-LABEL: shuffle_v2f64_33:
219; AVX:       # BB#0:
220; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
221; AVX-NEXT:    retq
222  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
223  ret <2 x double> %shuffle
224}
225define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
226; SSE2-LABEL: shuffle_v2f64_03:
227; SSE2:       # BB#0:
228; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
229; SSE2-NEXT:    movapd %xmm1, %xmm0
230; SSE2-NEXT:    retq
231;
232; SSE3-LABEL: shuffle_v2f64_03:
233; SSE3:       # BB#0:
234; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
235; SSE3-NEXT:    movapd %xmm1, %xmm0
236; SSE3-NEXT:    retq
237;
238; SSSE3-LABEL: shuffle_v2f64_03:
239; SSSE3:       # BB#0:
240; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
241; SSSE3-NEXT:    movapd %xmm1, %xmm0
242; SSSE3-NEXT:    retq
243;
244; SSE41-LABEL: shuffle_v2f64_03:
245; SSE41:       # BB#0:
246; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
247; SSE41-NEXT:    retq
248;
249; AVX-LABEL: shuffle_v2f64_03:
250; AVX:       # BB#0:
251; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
252; AVX-NEXT:    retq
253  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
254  ret <2 x double> %shuffle
255}
256define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
257; SSE2-LABEL: shuffle_v2f64_21:
258; SSE2:       # BB#0:
259; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
260; SSE2-NEXT:    retq
261;
262; SSE3-LABEL: shuffle_v2f64_21:
263; SSE3:       # BB#0:
264; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
265; SSE3-NEXT:    retq
266;
267; SSSE3-LABEL: shuffle_v2f64_21:
268; SSSE3:       # BB#0:
269; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
270; SSSE3-NEXT:    retq
271;
272; SSE41-LABEL: shuffle_v2f64_21:
273; SSE41:       # BB#0:
274; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
275; SSE41-NEXT:    retq
276;
277; AVX-LABEL: shuffle_v2f64_21:
278; AVX:       # BB#0:
279; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
280; AVX-NEXT:    retq
281  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
282  ret <2 x double> %shuffle
283}
284
285
286define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
287; SSE-LABEL: shuffle_v2i64_02:
288; SSE:       # BB#0:
289; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
290; SSE-NEXT:    retq
291;
292; AVX-LABEL: shuffle_v2i64_02:
293; AVX:       # BB#0:
294; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
295; AVX-NEXT:    retq
296  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
297  ret <2 x i64> %shuffle
298}
299define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
300; SSE-LABEL: shuffle_v2i64_02_copy:
301; SSE:       # BB#0:
302; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
303; SSE-NEXT:    movdqa %xmm1, %xmm0
304; SSE-NEXT:    retq
305;
306; AVX-LABEL: shuffle_v2i64_02_copy:
307; AVX:       # BB#0:
308; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
309; AVX-NEXT:    retq
310  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
311  ret <2 x i64> %shuffle
312}
313define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
314; SSE2-LABEL: shuffle_v2i64_03:
315; SSE2:       # BB#0:
316; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
317; SSE2-NEXT:    movapd %xmm1, %xmm0
318; SSE2-NEXT:    retq
319;
320; SSE3-LABEL: shuffle_v2i64_03:
321; SSE3:       # BB#0:
322; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
323; SSE3-NEXT:    movapd %xmm1, %xmm0
324; SSE3-NEXT:    retq
325;
326; SSSE3-LABEL: shuffle_v2i64_03:
327; SSSE3:       # BB#0:
328; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
329; SSSE3-NEXT:    movapd %xmm1, %xmm0
330; SSSE3-NEXT:    retq
331;
332; SSE41-LABEL: shuffle_v2i64_03:
333; SSE41:       # BB#0:
334; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
335; SSE41-NEXT:    retq
336;
337; AVX1-LABEL: shuffle_v2i64_03:
338; AVX1:       # BB#0:
339; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
340; AVX1-NEXT:    retq
341;
342; AVX2-LABEL: shuffle_v2i64_03:
343; AVX2:       # BB#0:
344; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
345; AVX2-NEXT:    retq
346;
347; AVX512VL-LABEL: shuffle_v2i64_03:
348; AVX512VL:       # BB#0:
349; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
350; AVX512VL-NEXT:    retq
351  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
352  ret <2 x i64> %shuffle
353}
354define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
355; SSE2-LABEL: shuffle_v2i64_03_copy:
356; SSE2:       # BB#0:
357; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
358; SSE2-NEXT:    movapd %xmm2, %xmm0
359; SSE2-NEXT:    retq
360;
361; SSE3-LABEL: shuffle_v2i64_03_copy:
362; SSE3:       # BB#0:
363; SSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
364; SSE3-NEXT:    movapd %xmm2, %xmm0
365; SSE3-NEXT:    retq
366;
367; SSSE3-LABEL: shuffle_v2i64_03_copy:
368; SSSE3:       # BB#0:
369; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
370; SSSE3-NEXT:    movapd %xmm2, %xmm0
371; SSSE3-NEXT:    retq
372;
373; SSE41-LABEL: shuffle_v2i64_03_copy:
374; SSE41:       # BB#0:
375; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
376; SSE41-NEXT:    movdqa %xmm1, %xmm0
377; SSE41-NEXT:    retq
378;
379; AVX1-LABEL: shuffle_v2i64_03_copy:
380; AVX1:       # BB#0:
381; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
382; AVX1-NEXT:    retq
383;
384; AVX2-LABEL: shuffle_v2i64_03_copy:
385; AVX2:       # BB#0:
386; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
387; AVX2-NEXT:    retq
388;
389; AVX512VL-LABEL: shuffle_v2i64_03_copy:
390; AVX512VL:       # BB#0:
391; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
392; AVX512VL-NEXT:    retq
393  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
394  ret <2 x i64> %shuffle
395}
396define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
397; SSE2-LABEL: shuffle_v2i64_12:
398; SSE2:       # BB#0:
399; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
400; SSE2-NEXT:    retq
401;
402; SSE3-LABEL: shuffle_v2i64_12:
403; SSE3:       # BB#0:
404; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
405; SSE3-NEXT:    retq
406;
407; SSSE3-LABEL: shuffle_v2i64_12:
408; SSSE3:       # BB#0:
409; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
410; SSSE3-NEXT:    movdqa %xmm1, %xmm0
411; SSSE3-NEXT:    retq
412;
413; SSE41-LABEL: shuffle_v2i64_12:
414; SSE41:       # BB#0:
415; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
416; SSE41-NEXT:    movdqa %xmm1, %xmm0
417; SSE41-NEXT:    retq
418;
419; AVX-LABEL: shuffle_v2i64_12:
420; AVX:       # BB#0:
421; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
422; AVX-NEXT:    retq
423  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
424  ret <2 x i64> %shuffle
425}
426define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
427; SSE2-LABEL: shuffle_v2i64_12_copy:
428; SSE2:       # BB#0:
429; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
430; SSE2-NEXT:    movapd %xmm1, %xmm0
431; SSE2-NEXT:    retq
432;
433; SSE3-LABEL: shuffle_v2i64_12_copy:
434; SSE3:       # BB#0:
435; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
436; SSE3-NEXT:    movapd %xmm1, %xmm0
437; SSE3-NEXT:    retq
438;
439; SSSE3-LABEL: shuffle_v2i64_12_copy:
440; SSSE3:       # BB#0:
441; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
442; SSSE3-NEXT:    movdqa %xmm2, %xmm0
443; SSSE3-NEXT:    retq
444;
445; SSE41-LABEL: shuffle_v2i64_12_copy:
446; SSE41:       # BB#0:
447; SSE41-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
448; SSE41-NEXT:    movdqa %xmm2, %xmm0
449; SSE41-NEXT:    retq
450;
451; AVX-LABEL: shuffle_v2i64_12_copy:
452; AVX:       # BB#0:
453; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
454; AVX-NEXT:    retq
455  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
456  ret <2 x i64> %shuffle
457}
458define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
459; SSE-LABEL: shuffle_v2i64_13:
460; SSE:       # BB#0:
461; SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
462; SSE-NEXT:    retq
463;
464; AVX-LABEL: shuffle_v2i64_13:
465; AVX:       # BB#0:
466; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
467; AVX-NEXT:    retq
468  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
469  ret <2 x i64> %shuffle
470}
471define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
472; SSE-LABEL: shuffle_v2i64_13_copy:
473; SSE:       # BB#0:
474; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
475; SSE-NEXT:    movdqa %xmm1, %xmm0
476; SSE-NEXT:    retq
477;
478; AVX-LABEL: shuffle_v2i64_13_copy:
479; AVX:       # BB#0:
480; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
481; AVX-NEXT:    retq
482  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
483  ret <2 x i64> %shuffle
484}
485define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
486; SSE-LABEL: shuffle_v2i64_20:
487; SSE:       # BB#0:
488; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
489; SSE-NEXT:    movdqa %xmm1, %xmm0
490; SSE-NEXT:    retq
491;
492; AVX-LABEL: shuffle_v2i64_20:
493; AVX:       # BB#0:
494; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
495; AVX-NEXT:    retq
496  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
497  ret <2 x i64> %shuffle
498}
499define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
500; SSE-LABEL: shuffle_v2i64_20_copy:
501; SSE:       # BB#0:
502; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
503; SSE-NEXT:    movdqa %xmm2, %xmm0
504; SSE-NEXT:    retq
505;
506; AVX-LABEL: shuffle_v2i64_20_copy:
507; AVX:       # BB#0:
508; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
509; AVX-NEXT:    retq
510  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
511  ret <2 x i64> %shuffle
512}
513define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
514; SSE2-LABEL: shuffle_v2i64_21:
515; SSE2:       # BB#0:
516; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
517; SSE2-NEXT:    retq
518;
519; SSE3-LABEL: shuffle_v2i64_21:
520; SSE3:       # BB#0:
521; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
522; SSE3-NEXT:    retq
523;
524; SSSE3-LABEL: shuffle_v2i64_21:
525; SSSE3:       # BB#0:
526; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
527; SSSE3-NEXT:    retq
528;
529; SSE41-LABEL: shuffle_v2i64_21:
530; SSE41:       # BB#0:
531; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
532; SSE41-NEXT:    retq
533;
534; AVX1-LABEL: shuffle_v2i64_21:
535; AVX1:       # BB#0:
536; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
537; AVX1-NEXT:    retq
538;
539; AVX2-LABEL: shuffle_v2i64_21:
540; AVX2:       # BB#0:
541; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
542; AVX2-NEXT:    retq
543;
544; AVX512VL-LABEL: shuffle_v2i64_21:
545; AVX512VL:       # BB#0:
546; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
547; AVX512VL-NEXT:    retq
548  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
549  ret <2 x i64> %shuffle
550}
551define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
552; SSE2-LABEL: shuffle_v2i64_21_copy:
553; SSE2:       # BB#0:
554; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
555; SSE2-NEXT:    movapd %xmm1, %xmm0
556; SSE2-NEXT:    retq
557;
558; SSE3-LABEL: shuffle_v2i64_21_copy:
559; SSE3:       # BB#0:
560; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
561; SSE3-NEXT:    movapd %xmm1, %xmm0
562; SSE3-NEXT:    retq
563;
564; SSSE3-LABEL: shuffle_v2i64_21_copy:
565; SSSE3:       # BB#0:
566; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
567; SSSE3-NEXT:    movapd %xmm1, %xmm0
568; SSSE3-NEXT:    retq
569;
570; SSE41-LABEL: shuffle_v2i64_21_copy:
571; SSE41:       # BB#0:
572; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
573; SSE41-NEXT:    movdqa %xmm1, %xmm0
574; SSE41-NEXT:    retq
575;
576; AVX1-LABEL: shuffle_v2i64_21_copy:
577; AVX1:       # BB#0:
578; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
579; AVX1-NEXT:    retq
580;
581; AVX2-LABEL: shuffle_v2i64_21_copy:
582; AVX2:       # BB#0:
583; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
584; AVX2-NEXT:    retq
585;
586; AVX512VL-LABEL: shuffle_v2i64_21_copy:
587; AVX512VL:       # BB#0:
588; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
589; AVX512VL-NEXT:    retq
590  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
591  ret <2 x i64> %shuffle
592}
593define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
594; SSE2-LABEL: shuffle_v2i64_30:
595; SSE2:       # BB#0:
596; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
597; SSE2-NEXT:    movapd %xmm1, %xmm0
598; SSE2-NEXT:    retq
599;
600; SSE3-LABEL: shuffle_v2i64_30:
601; SSE3:       # BB#0:
602; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
603; SSE3-NEXT:    movapd %xmm1, %xmm0
604; SSE3-NEXT:    retq
605;
606; SSSE3-LABEL: shuffle_v2i64_30:
607; SSSE3:       # BB#0:
608; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
609; SSSE3-NEXT:    retq
610;
611; SSE41-LABEL: shuffle_v2i64_30:
612; SSE41:       # BB#0:
613; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
614; SSE41-NEXT:    retq
615;
616; AVX-LABEL: shuffle_v2i64_30:
617; AVX:       # BB#0:
618; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
619; AVX-NEXT:    retq
620  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
621  ret <2 x i64> %shuffle
622}
623define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
624; SSE2-LABEL: shuffle_v2i64_30_copy:
625; SSE2:       # BB#0:
626; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
627; SSE2-NEXT:    movapd %xmm2, %xmm0
628; SSE2-NEXT:    retq
629;
630; SSE3-LABEL: shuffle_v2i64_30_copy:
631; SSE3:       # BB#0:
632; SSE3-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
633; SSE3-NEXT:    movapd %xmm2, %xmm0
634; SSE3-NEXT:    retq
635;
636; SSSE3-LABEL: shuffle_v2i64_30_copy:
637; SSSE3:       # BB#0:
638; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
639; SSSE3-NEXT:    movdqa %xmm1, %xmm0
640; SSSE3-NEXT:    retq
641;
642; SSE41-LABEL: shuffle_v2i64_30_copy:
643; SSE41:       # BB#0:
644; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
645; SSE41-NEXT:    movdqa %xmm1, %xmm0
646; SSE41-NEXT:    retq
647;
648; AVX-LABEL: shuffle_v2i64_30_copy:
649; AVX:       # BB#0:
650; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
651; AVX-NEXT:    retq
652  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
653  ret <2 x i64> %shuffle
654}
655define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
656; SSE-LABEL: shuffle_v2i64_31:
657; SSE:       # BB#0:
658; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
659; SSE-NEXT:    movdqa %xmm1, %xmm0
660; SSE-NEXT:    retq
661;
662; AVX-LABEL: shuffle_v2i64_31:
663; AVX:       # BB#0:
664; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
665; AVX-NEXT:    retq
666  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
667  ret <2 x i64> %shuffle
668}
669define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
670; SSE-LABEL: shuffle_v2i64_31_copy:
671; SSE:       # BB#0:
672; SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
673; SSE-NEXT:    movdqa %xmm2, %xmm0
674; SSE-NEXT:    retq
675;
676; AVX-LABEL: shuffle_v2i64_31_copy:
677; AVX:       # BB#0:
678; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
679; AVX-NEXT:    retq
680  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
681  ret <2 x i64> %shuffle
682}
683
684define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
685; SSE-LABEL: shuffle_v2i64_0z:
686; SSE:       # BB#0:
687; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
688; SSE-NEXT:    retq
689;
690; AVX-LABEL: shuffle_v2i64_0z:
691; AVX:       # BB#0:
692; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
693; AVX-NEXT:    retq
694  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
695  ret <2 x i64> %shuffle
696}
697
698define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
699; SSE-LABEL: shuffle_v2i64_1z:
700; SSE:       # BB#0:
701; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
702; SSE-NEXT:    retq
703;
704; AVX-LABEL: shuffle_v2i64_1z:
705; AVX:       # BB#0:
706; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
707; AVX-NEXT:    retq
708  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
709  ret <2 x i64> %shuffle
710}
711
712define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
713; SSE-LABEL: shuffle_v2i64_z0:
714; SSE:       # BB#0:
715; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
716; SSE-NEXT:    retq
717;
718; AVX-LABEL: shuffle_v2i64_z0:
719; AVX:       # BB#0:
720; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
721; AVX-NEXT:    retq
722  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
723  ret <2 x i64> %shuffle
724}
725
726define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
727; SSE2-LABEL: shuffle_v2i64_z1:
728; SSE2:       # BB#0:
729; SSE2-NEXT:    xorpd %xmm1, %xmm1
730; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
731; SSE2-NEXT:    retq
732;
733; SSE3-LABEL: shuffle_v2i64_z1:
734; SSE3:       # BB#0:
735; SSE3-NEXT:    xorpd %xmm1, %xmm1
736; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
737; SSE3-NEXT:    retq
738;
739; SSSE3-LABEL: shuffle_v2i64_z1:
740; SSSE3:       # BB#0:
741; SSSE3-NEXT:    xorpd %xmm1, %xmm1
742; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
743; SSSE3-NEXT:    retq
744;
745; SSE41-LABEL: shuffle_v2i64_z1:
746; SSE41:       # BB#0:
747; SSE41-NEXT:    pxor %xmm1, %xmm1
748; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
749; SSE41-NEXT:    retq
750;
751; AVX1-LABEL: shuffle_v2i64_z1:
752; AVX1:       # BB#0:
753; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
754; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
755; AVX1-NEXT:    retq
756;
757; AVX2-LABEL: shuffle_v2i64_z1:
758; AVX2:       # BB#0:
759; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
760; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
761; AVX2-NEXT:    retq
762;
763; AVX512VL-LABEL: shuffle_v2i64_z1:
764; AVX512VL:       # BB#0:
765; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
766; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
767; AVX512VL-NEXT:    retq
768  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
769  ret <2 x i64> %shuffle
770}
771
772define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
773; SSE-LABEL: shuffle_v2f64_0z:
774; SSE:       # BB#0:
775; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
776; SSE-NEXT:    retq
777;
778; AVX-LABEL: shuffle_v2f64_0z:
779; AVX:       # BB#0:
780; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
781; AVX-NEXT:    retq
782  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
783  ret <2 x double> %shuffle
784}
785
786define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
787; SSE-LABEL: shuffle_v2f64_1z:
788; SSE:       # BB#0:
789; SSE-NEXT:    xorpd %xmm1, %xmm1
790; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
791; SSE-NEXT:    retq
792;
793; AVX1-LABEL: shuffle_v2f64_1z:
794; AVX1:       # BB#0:
795; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
796; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
797; AVX1-NEXT:    retq
798;
799; AVX2-LABEL: shuffle_v2f64_1z:
800; AVX2:       # BB#0:
801; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
802; AVX2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
803; AVX2-NEXT:    retq
804;
805; AVX512VL-LABEL: shuffle_v2f64_1z:
806; AVX512VL:       # BB#0:
807; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
808; AVX512VL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
809; AVX512VL-NEXT:    retq
810  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
811  ret <2 x double> %shuffle
812}
813
814define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
815; SSE-LABEL: shuffle_v2f64_z0:
816; SSE:       # BB#0:
817; SSE-NEXT:    xorpd %xmm1, %xmm1
818; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
819; SSE-NEXT:    movapd %xmm1, %xmm0
820; SSE-NEXT:    retq
821;
822; AVX1-LABEL: shuffle_v2f64_z0:
823; AVX1:       # BB#0:
824; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
825; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
826; AVX1-NEXT:    retq
827;
828; AVX2-LABEL: shuffle_v2f64_z0:
829; AVX2:       # BB#0:
830; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
831; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
832; AVX2-NEXT:    retq
833;
834; AVX512VL-LABEL: shuffle_v2f64_z0:
835; AVX512VL:       # BB#0:
836; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
837; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
838; AVX512VL-NEXT:    retq
839  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
840  ret <2 x double> %shuffle
841}
842
843define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
844; SSE2-LABEL: shuffle_v2f64_z1:
845; SSE2:       # BB#0:
846; SSE2-NEXT:    xorpd %xmm1, %xmm1
847; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
848; SSE2-NEXT:    retq
849;
850; SSE3-LABEL: shuffle_v2f64_z1:
851; SSE3:       # BB#0:
852; SSE3-NEXT:    xorpd %xmm1, %xmm1
853; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
854; SSE3-NEXT:    retq
855;
856; SSSE3-LABEL: shuffle_v2f64_z1:
857; SSSE3:       # BB#0:
858; SSSE3-NEXT:    xorpd %xmm1, %xmm1
859; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
860; SSSE3-NEXT:    retq
861;
862; SSE41-LABEL: shuffle_v2f64_z1:
863; SSE41:       # BB#0:
864; SSE41-NEXT:    xorpd %xmm1, %xmm1
865; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
866; SSE41-NEXT:    retq
867;
868; AVX1-LABEL: shuffle_v2f64_z1:
869; AVX1:       # BB#0:
870; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
871; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
872; AVX1-NEXT:    retq
873;
874; AVX2-LABEL: shuffle_v2f64_z1:
875; AVX2:       # BB#0:
876; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
877; AVX2-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
878; AVX2-NEXT:    retq
879;
880; AVX512VL-LABEL: shuffle_v2f64_z1:
881; AVX512VL:       # BB#0:
882; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
883; AVX512VL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
884; AVX512VL-NEXT:    retq
885  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
886  ret <2 x double> %shuffle
887}
888
889define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
890; SSE-LABEL: shuffle_v2f64_bitcast_1z:
891; SSE:       # BB#0:
892; SSE-NEXT:    xorpd %xmm1, %xmm1
893; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
894; SSE-NEXT:    retq
895;
896; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
897; AVX1:       # BB#0:
898; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
899; AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
900; AVX1-NEXT:    retq
901;
902; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
903; AVX2:       # BB#0:
904; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
905; AVX2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
906; AVX2-NEXT:    retq
907;
908; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
909; AVX512VL:       # BB#0:
910; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
911; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
912; AVX512VL-NEXT:    retq
913  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
914  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
915  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
916  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
917  ret <2 x double> %bitcast64
918}
919
920define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
921; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
922; SSE2:       # BB#0:
923; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
924; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
925; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
926; SSE2-NEXT:    retq
927;
928; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
929; SSE3:       # BB#0:
930; SSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
931; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
932; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
933; SSE3-NEXT:    retq
934;
935; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
936; SSSE3:       # BB#0:
937; SSSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
938; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
939; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
940; SSSE3-NEXT:    retq
941;
942; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
943; SSE41:       # BB#0:
944; SSE41-NEXT:    pxor %xmm1, %xmm1
945; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
946; SSE41-NEXT:    retq
947;
948; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
949; AVX1:       # BB#0:
950; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
951; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
952; AVX1-NEXT:    retq
953;
954; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
955; AVX2:       # BB#0:
956; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
957; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
958; AVX2-NEXT:    retq
959;
960; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
961; AVX512VL:       # BB#0:
962; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
963; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
964; AVX512VL-NEXT:    retq
965  %bitcast32 = bitcast <2 x i64> %x to <4 x float>
966  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
967  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
968  %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
969  ret <2 x i64> %and
970}
971
972define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
973; SSE-LABEL: insert_reg_and_zero_v2i64:
974; SSE:       # BB#0:
975; SSE-NEXT:    movd %rdi, %xmm0
976; SSE-NEXT:    retq
977;
978; AVX-LABEL: insert_reg_and_zero_v2i64:
979; AVX:       # BB#0:
980; AVX-NEXT:    vmovq %rdi, %xmm0
981; AVX-NEXT:    retq
982  %v = insertelement <2 x i64> undef, i64 %a, i32 0
983  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
984  ret <2 x i64> %shuffle
985}
986
987define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
988; SSE-LABEL: insert_mem_and_zero_v2i64:
989; SSE:       # BB#0:
990; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
991; SSE-NEXT:    retq
992;
993; AVX-LABEL: insert_mem_and_zero_v2i64:
994; AVX:       # BB#0:
995; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
996; AVX-NEXT:    retq
997  %a = load i64, i64* %ptr
998  %v = insertelement <2 x i64> undef, i64 %a, i32 0
999  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1000  ret <2 x i64> %shuffle
1001}
1002
1003define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1004; SSE-LABEL: insert_reg_and_zero_v2f64:
1005; SSE:       # BB#0:
1006; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1007; SSE-NEXT:    retq
1008;
1009; AVX-LABEL: insert_reg_and_zero_v2f64:
1010; AVX:       # BB#0:
1011; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1012; AVX-NEXT:    retq
1013  %v = insertelement <2 x double> undef, double %a, i32 0
1014  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1015  ret <2 x double> %shuffle
1016}
1017
1018define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1019; SSE-LABEL: insert_mem_and_zero_v2f64:
1020; SSE:       # BB#0:
1021; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1022; SSE-NEXT:    retq
1023;
1024; AVX-LABEL: insert_mem_and_zero_v2f64:
1025; AVX:       # BB#0:
1026; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1027; AVX-NEXT:    retq
1028  %a = load double, double* %ptr
1029  %v = insertelement <2 x double> undef, double %a, i32 0
1030  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1031  ret <2 x double> %shuffle
1032}
1033
1034define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1035; SSE2-LABEL: insert_reg_lo_v2i64:
1036; SSE2:       # BB#0:
1037; SSE2-NEXT:    movd %rdi, %xmm1
1038; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1039; SSE2-NEXT:    retq
1040;
1041; SSE3-LABEL: insert_reg_lo_v2i64:
1042; SSE3:       # BB#0:
1043; SSE3-NEXT:    movd %rdi, %xmm1
1044; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1045; SSE3-NEXT:    retq
1046;
1047; SSSE3-LABEL: insert_reg_lo_v2i64:
1048; SSSE3:       # BB#0:
1049; SSSE3-NEXT:    movd %rdi, %xmm1
1050; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1051; SSSE3-NEXT:    retq
1052;
1053; SSE41-LABEL: insert_reg_lo_v2i64:
1054; SSE41:       # BB#0:
1055; SSE41-NEXT:    movd %rdi, %xmm1
1056; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1057; SSE41-NEXT:    retq
1058;
1059; AVX1-LABEL: insert_reg_lo_v2i64:
1060; AVX1:       # BB#0:
1061; AVX1-NEXT:    vmovq %rdi, %xmm1
1062; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1063; AVX1-NEXT:    retq
1064;
1065; AVX2-LABEL: insert_reg_lo_v2i64:
1066; AVX2:       # BB#0:
1067; AVX2-NEXT:    vmovq %rdi, %xmm1
1068; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1069; AVX2-NEXT:    retq
1070;
1071; AVX512VL-LABEL: insert_reg_lo_v2i64:
1072; AVX512VL:       # BB#0:
1073; AVX512VL-NEXT:    vmovq %rdi, %xmm1
1074; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1075; AVX512VL-NEXT:    retq
1076  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1077  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1078  ret <2 x i64> %shuffle
1079}
1080
1081define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1082; SSE2-LABEL: insert_mem_lo_v2i64:
1083; SSE2:       # BB#0:
1084; SSE2-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1085; SSE2-NEXT:    retq
1086;
1087; SSE3-LABEL: insert_mem_lo_v2i64:
1088; SSE3:       # BB#0:
1089; SSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1090; SSE3-NEXT:    retq
1091;
1092; SSSE3-LABEL: insert_mem_lo_v2i64:
1093; SSSE3:       # BB#0:
1094; SSSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1095; SSSE3-NEXT:    retq
1096;
1097; SSE41-LABEL: insert_mem_lo_v2i64:
1098; SSE41:       # BB#0:
1099; SSE41-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1100; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1101; SSE41-NEXT:    retq
1102;
1103; AVX1-LABEL: insert_mem_lo_v2i64:
1104; AVX1:       # BB#0:
1105; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1106; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1107; AVX1-NEXT:    retq
1108;
1109; AVX2-LABEL: insert_mem_lo_v2i64:
1110; AVX2:       # BB#0:
1111; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1112; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1113; AVX2-NEXT:    retq
1114;
1115; AVX512VL-LABEL: insert_mem_lo_v2i64:
1116; AVX512VL:       # BB#0:
1117; AVX512VL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1118; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1119; AVX512VL-NEXT:    retq
1120  %a = load i64, i64* %ptr
1121  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1122  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1123  ret <2 x i64> %shuffle
1124}
1125
1126define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1127; SSE-LABEL: insert_reg_hi_v2i64:
1128; SSE:       # BB#0:
1129; SSE-NEXT:    movd %rdi, %xmm1
1130; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1131; SSE-NEXT:    retq
1132;
1133; AVX-LABEL: insert_reg_hi_v2i64:
1134; AVX:       # BB#0:
1135; AVX-NEXT:    vmovq %rdi, %xmm1
1136; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1137; AVX-NEXT:    retq
1138  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1139  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1140  ret <2 x i64> %shuffle
1141}
1142
1143define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1144; SSE-LABEL: insert_mem_hi_v2i64:
1145; SSE:       # BB#0:
1146; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1147; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1148; SSE-NEXT:    retq
1149;
1150; AVX-LABEL: insert_mem_hi_v2i64:
1151; AVX:       # BB#0:
1152; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1153; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1154; AVX-NEXT:    retq
1155  %a = load i64, i64* %ptr
1156  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1157  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1158  ret <2 x i64> %shuffle
1159}
1160
1161define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1162; SSE-LABEL: insert_reg_lo_v2f64:
1163; SSE:       # BB#0:
1164; SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1165; SSE-NEXT:    movapd %xmm1, %xmm0
1166; SSE-NEXT:    retq
1167;
1168; AVX-LABEL: insert_reg_lo_v2f64:
1169; AVX:       # BB#0:
1170; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1171; AVX-NEXT:    retq
1172  %v = insertelement <2 x double> undef, double %a, i32 0
1173  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1174  ret <2 x double> %shuffle
1175}
1176
1177define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1178; SSE-LABEL: insert_mem_lo_v2f64:
1179; SSE:       # BB#0:
1180; SSE-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1181; SSE-NEXT:    retq
1182;
1183; AVX-LABEL: insert_mem_lo_v2f64:
1184; AVX:       # BB#0:
1185; AVX-NEXT:    vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1186; AVX-NEXT:    retq
1187  %a = load double, double* %ptr
1188  %v = insertelement <2 x double> undef, double %a, i32 0
1189  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1190  ret <2 x double> %shuffle
1191}
1192
1193define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1194; SSE-LABEL: insert_reg_hi_v2f64:
1195; SSE:       # BB#0:
1196; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1197; SSE-NEXT:    movapd %xmm1, %xmm0
1198; SSE-NEXT:    retq
1199;
1200; AVX-LABEL: insert_reg_hi_v2f64:
1201; AVX:       # BB#0:
1202; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1203; AVX-NEXT:    retq
1204  %v = insertelement <2 x double> undef, double %a, i32 0
1205  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1206  ret <2 x double> %shuffle
1207}
1208
1209define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1210; SSE-LABEL: insert_mem_hi_v2f64:
1211; SSE:       # BB#0:
1212; SSE-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1213; SSE-NEXT:    retq
1214;
1215; AVX-LABEL: insert_mem_hi_v2f64:
1216; AVX:       # BB#0:
1217; AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1218; AVX-NEXT:    retq
1219  %a = load double, double* %ptr
1220  %v = insertelement <2 x double> undef, double %a, i32 0
1221  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1222  ret <2 x double> %shuffle
1223}
1224
1225define <2 x double> @insert_dup_reg_v2f64(double %a) {
1226; SSE2-LABEL: insert_dup_reg_v2f64:
1227; SSE2:       # BB#0:
1228; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1229; SSE2-NEXT:    retq
1230;
1231; SSE3-LABEL: insert_dup_reg_v2f64:
1232; SSE3:       # BB#0:
1233; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1234; SSE3-NEXT:    retq
1235;
1236; SSSE3-LABEL: insert_dup_reg_v2f64:
1237; SSSE3:       # BB#0:
1238; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1239; SSSE3-NEXT:    retq
1240;
1241; SSE41-LABEL: insert_dup_reg_v2f64:
1242; SSE41:       # BB#0:
1243; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1244; SSE41-NEXT:    retq
1245;
1246; AVX-LABEL: insert_dup_reg_v2f64:
1247; AVX:       # BB#0:
1248; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1249; AVX-NEXT:    retq
1250  %v = insertelement <2 x double> undef, double %a, i32 0
1251  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1252  ret <2 x double> %shuffle
1253}
1254
1255define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1256; SSE2-LABEL: insert_dup_mem_v2f64:
1257; SSE2:       # BB#0:
1258; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1259; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1260; SSE2-NEXT:    retq
1261;
1262; SSE3-LABEL: insert_dup_mem_v2f64:
1263; SSE3:       # BB#0:
1264; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1265; SSE3-NEXT:    retq
1266;
1267; SSSE3-LABEL: insert_dup_mem_v2f64:
1268; SSSE3:       # BB#0:
1269; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1270; SSSE3-NEXT:    retq
1271;
1272; SSE41-LABEL: insert_dup_mem_v2f64:
1273; SSE41:       # BB#0:
1274; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1275; SSE41-NEXT:    retq
1276;
1277; AVX-LABEL: insert_dup_mem_v2f64:
1278; AVX:       # BB#0:
1279; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1280; AVX-NEXT:    retq
1281  %a = load double, double* %ptr
1282  %v = insertelement <2 x double> undef, double %a, i32 0
1283  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1284  ret <2 x double> %shuffle
1285}
1286
1287define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1288; SSE2-LABEL: insert_dup_mem128_v2f64:
1289; SSE2:       # BB#0:
1290; SSE2-NEXT:    movaps (%rdi), %xmm0
1291; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1292; SSE2-NEXT:    retq
1293;
1294; SSE3-LABEL: insert_dup_mem128_v2f64:
1295; SSE3:       # BB#0:
1296; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1297; SSE3-NEXT:    retq
1298;
1299; SSSE3-LABEL: insert_dup_mem128_v2f64:
1300; SSSE3:       # BB#0:
1301; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1302; SSSE3-NEXT:    retq
1303;
1304; SSE41-LABEL: insert_dup_mem128_v2f64:
1305; SSE41:       # BB#0:
1306; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1307; SSE41-NEXT:    retq
1308;
1309; AVX-LABEL: insert_dup_mem128_v2f64:
1310; AVX:       # BB#0:
1311; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1312; AVX-NEXT:    retq
1313  %v = load  <2 x double>,  <2 x double>* %ptr
1314  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1315  ret <2 x double> %shuffle
1316}
1317
1318
1319define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1320; SSE-LABEL: insert_dup_mem_v2i64:
1321; SSE:       # BB#0:
1322; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1323; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1324; SSE-NEXT:    retq
1325;
1326; AVX1-LABEL: insert_dup_mem_v2i64:
1327; AVX1:       # BB#0:
1328; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
1329; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1330; AVX1-NEXT:    retq
1331;
1332; AVX2-LABEL: insert_dup_mem_v2i64:
1333; AVX2:       # BB#0:
1334; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
1335; AVX2-NEXT:    retq
1336;
1337; AVX512VL-LABEL: insert_dup_mem_v2i64:
1338; AVX512VL:       # BB#0:
1339; AVX512VL-NEXT:    vpbroadcastq (%rdi), %xmm0
1340; AVX512VL-NEXT:    retq
1341  %tmp = load i64, i64* %ptr, align 1
1342  %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1343  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1344  ret <2 x i64> %tmp2
1345}
1346
1347define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1348; SSE-LABEL: shuffle_mem_v2f64_10:
1349; SSE:       # BB#0:
1350; SSE-NEXT:    movapd (%rdi), %xmm0
1351; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
1352; SSE-NEXT:    retq
1353;
1354; AVX-LABEL: shuffle_mem_v2f64_10:
1355; AVX:       # BB#0:
1356; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
1357; AVX-NEXT:    retq
1358
1359  %a = load <2 x double>, <2 x double>* %ptr
1360  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1361  ret <2 x double> %shuffle
1362}
1363