• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL
9
10define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
11; SSE-LABEL: shuffle_v2i64_00:
12; SSE:       # %bb.0:
13; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
14; SSE-NEXT:    retq
15;
16; AVX1-LABEL: shuffle_v2i64_00:
17; AVX1:       # %bb.0:
18; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
19; AVX1-NEXT:    retq
20;
21; AVX2-LABEL: shuffle_v2i64_00:
22; AVX2:       # %bb.0:
23; AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
24; AVX2-NEXT:    retq
25;
26; AVX512VL-LABEL: shuffle_v2i64_00:
27; AVX512VL:       # %bb.0:
28; AVX512VL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
29; AVX512VL-NEXT:    retq
30  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
31  ret <2 x i64> %shuffle
32}
33define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
34; SSE-LABEL: shuffle_v2i64_10:
35; SSE:       # %bb.0:
36; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
37; SSE-NEXT:    retq
38;
39; AVX-LABEL: shuffle_v2i64_10:
40; AVX:       # %bb.0:
41; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
42; AVX-NEXT:    retq
43  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
44  ret <2 x i64> %shuffle
45}
46define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
47; SSE-LABEL: shuffle_v2i64_11:
48; SSE:       # %bb.0:
49; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
50; SSE-NEXT:    retq
51;
52; AVX-LABEL: shuffle_v2i64_11:
53; AVX:       # %bb.0:
54; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
55; AVX-NEXT:    retq
56  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
57  ret <2 x i64> %shuffle
58}
59define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
60; SSE-LABEL: shuffle_v2i64_22:
61; SSE:       # %bb.0:
62; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
63; SSE-NEXT:    retq
64;
65; AVX1-LABEL: shuffle_v2i64_22:
66; AVX1:       # %bb.0:
67; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1]
68; AVX1-NEXT:    retq
69;
70; AVX2-LABEL: shuffle_v2i64_22:
71; AVX2:       # %bb.0:
72; AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
73; AVX2-NEXT:    retq
74;
75; AVX512VL-LABEL: shuffle_v2i64_22:
76; AVX512VL:       # %bb.0:
77; AVX512VL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
78; AVX512VL-NEXT:    retq
79  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
80  ret <2 x i64> %shuffle
81}
82define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
83; SSE-LABEL: shuffle_v2i64_32:
84; SSE:       # %bb.0:
85; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
86; SSE-NEXT:    retq
87;
88; AVX-LABEL: shuffle_v2i64_32:
89; AVX:       # %bb.0:
90; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1]
91; AVX-NEXT:    retq
92  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
93  ret <2 x i64> %shuffle
94}
95define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
96; SSE-LABEL: shuffle_v2i64_33:
97; SSE:       # %bb.0:
98; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
99; SSE-NEXT:    retq
100;
101; AVX-LABEL: shuffle_v2i64_33:
102; AVX:       # %bb.0:
103; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3]
104; AVX-NEXT:    retq
105  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
106  ret <2 x i64> %shuffle
107}
108
109define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
110; SSE2-LABEL: shuffle_v2f64_00:
111; SSE2:       # %bb.0:
112; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
113; SSE2-NEXT:    retq
114;
115; SSE3-LABEL: shuffle_v2f64_00:
116; SSE3:       # %bb.0:
117; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
118; SSE3-NEXT:    retq
119;
120; SSSE3-LABEL: shuffle_v2f64_00:
121; SSSE3:       # %bb.0:
122; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
123; SSSE3-NEXT:    retq
124;
125; SSE41-LABEL: shuffle_v2f64_00:
126; SSE41:       # %bb.0:
127; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
128; SSE41-NEXT:    retq
129;
130; AVX-LABEL: shuffle_v2f64_00:
131; AVX:       # %bb.0:
132; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
133; AVX-NEXT:    retq
134  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
135  ret <2 x double> %shuffle
136}
137define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
138; SSE-LABEL: shuffle_v2f64_10:
139; SSE:       # %bb.0:
140; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
141; SSE-NEXT:    retq
142;
143; AVX-LABEL: shuffle_v2f64_10:
144; AVX:       # %bb.0:
145; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
146; AVX-NEXT:    retq
147
148  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
149  ret <2 x double> %shuffle
150}
151define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
152; SSE-LABEL: shuffle_v2f64_11:
153; SSE:       # %bb.0:
154; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
155; SSE-NEXT:    retq
156;
157; AVX-LABEL: shuffle_v2f64_11:
158; AVX:       # %bb.0:
159; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
160; AVX-NEXT:    retq
161  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
162  ret <2 x double> %shuffle
163}
164define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
165; SSE2-LABEL: shuffle_v2f64_22:
166; SSE2:       # %bb.0:
167; SSE2-NEXT:    movaps %xmm1, %xmm0
168; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
169; SSE2-NEXT:    retq
170;
171; SSE3-LABEL: shuffle_v2f64_22:
172; SSE3:       # %bb.0:
173; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
174; SSE3-NEXT:    retq
175;
176; SSSE3-LABEL: shuffle_v2f64_22:
177; SSSE3:       # %bb.0:
178; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
179; SSSE3-NEXT:    retq
180;
181; SSE41-LABEL: shuffle_v2f64_22:
182; SSE41:       # %bb.0:
183; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
184; SSE41-NEXT:    retq
185;
186; AVX-LABEL: shuffle_v2f64_22:
187; AVX:       # %bb.0:
188; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
189; AVX-NEXT:    retq
190  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
191  ret <2 x double> %shuffle
192}
193define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
194; SSE-LABEL: shuffle_v2f64_32:
195; SSE:       # %bb.0:
196; SSE-NEXT:    movaps %xmm1, %xmm0
197; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
198; SSE-NEXT:    retq
199;
200; AVX-LABEL: shuffle_v2f64_32:
201; AVX:       # %bb.0:
202; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
203; AVX-NEXT:    retq
204
205  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
206  ret <2 x double> %shuffle
207}
208define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
209; SSE-LABEL: shuffle_v2f64_33:
210; SSE:       # %bb.0:
211; SSE-NEXT:    movaps %xmm1, %xmm0
212; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
213; SSE-NEXT:    retq
214;
215; AVX-LABEL: shuffle_v2f64_33:
216; AVX:       # %bb.0:
217; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
218; AVX-NEXT:    retq
219  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
220  ret <2 x double> %shuffle
221}
222define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
223; SSE2-LABEL: shuffle_v2f64_03:
224; SSE2:       # %bb.0:
225; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
226; SSE2-NEXT:    retq
227;
228; SSE3-LABEL: shuffle_v2f64_03:
229; SSE3:       # %bb.0:
230; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
231; SSE3-NEXT:    retq
232;
233; SSSE3-LABEL: shuffle_v2f64_03:
234; SSSE3:       # %bb.0:
235; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
236; SSSE3-NEXT:    retq
237;
238; SSE41-LABEL: shuffle_v2f64_03:
239; SSE41:       # %bb.0:
240; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
241; SSE41-NEXT:    retq
242;
243; AVX-LABEL: shuffle_v2f64_03:
244; AVX:       # %bb.0:
245; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
246; AVX-NEXT:    retq
247  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
248  ret <2 x double> %shuffle
249}
250define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
251; SSE2-LABEL: shuffle_v2f64_21:
252; SSE2:       # %bb.0:
253; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
254; SSE2-NEXT:    retq
255;
256; SSE3-LABEL: shuffle_v2f64_21:
257; SSE3:       # %bb.0:
258; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
259; SSE3-NEXT:    retq
260;
261; SSSE3-LABEL: shuffle_v2f64_21:
262; SSSE3:       # %bb.0:
263; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
264; SSSE3-NEXT:    retq
265;
266; SSE41-LABEL: shuffle_v2f64_21:
267; SSE41:       # %bb.0:
268; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
269; SSE41-NEXT:    retq
270;
271; AVX-LABEL: shuffle_v2f64_21:
272; AVX:       # %bb.0:
273; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
274; AVX-NEXT:    retq
275  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
276  ret <2 x double> %shuffle
277}
278define <2 x double> @shuffle_v2f64_u2(<2 x double> %a, <2 x double> %b) {
279; SSE2-LABEL: shuffle_v2f64_u2:
280; SSE2:       # %bb.0:
281; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
282; SSE2-NEXT:    retq
283;
284; SSE3-LABEL: shuffle_v2f64_u2:
285; SSE3:       # %bb.0:
286; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
287; SSE3-NEXT:    retq
288;
289; SSSE3-LABEL: shuffle_v2f64_u2:
290; SSSE3:       # %bb.0:
291; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
292; SSSE3-NEXT:    retq
293;
294; SSE41-LABEL: shuffle_v2f64_u2:
295; SSE41:       # %bb.0:
296; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
297; SSE41-NEXT:    retq
298;
299; AVX-LABEL: shuffle_v2f64_u2:
300; AVX:       # %bb.0:
301; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
302; AVX-NEXT:    retq
303  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 undef, i32 2>
304  ret <2 x double> %shuffle
305}
306define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) {
307; SSE-LABEL: shuffle_v2f64_3u:
308; SSE:       # %bb.0:
309; SSE-NEXT:    movaps %xmm1, %xmm0
310; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
311; SSE-NEXT:    retq
312;
313; AVX-LABEL: shuffle_v2f64_3u:
314; AVX:       # %bb.0:
315; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
316; AVX-NEXT:    retq
317  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 undef>
318  ret <2 x double> %shuffle
319}
320
321define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
322; SSE-LABEL: shuffle_v2i64_02:
323; SSE:       # %bb.0:
324; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
325; SSE-NEXT:    retq
326;
327; AVX-LABEL: shuffle_v2i64_02:
328; AVX:       # %bb.0:
329; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
330; AVX-NEXT:    retq
331  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
332  ret <2 x i64> %shuffle
333}
334define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
335; SSE-LABEL: shuffle_v2i64_02_copy:
336; SSE:       # %bb.0:
337; SSE-NEXT:    movaps %xmm1, %xmm0
338; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
339; SSE-NEXT:    retq
340;
341; AVX-LABEL: shuffle_v2i64_02_copy:
342; AVX:       # %bb.0:
343; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
344; AVX-NEXT:    retq
345  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
346  ret <2 x i64> %shuffle
347}
348define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
349; SSE2-LABEL: shuffle_v2i64_03:
350; SSE2:       # %bb.0:
351; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
352; SSE2-NEXT:    retq
353;
354; SSE3-LABEL: shuffle_v2i64_03:
355; SSE3:       # %bb.0:
356; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
357; SSE3-NEXT:    retq
358;
359; SSSE3-LABEL: shuffle_v2i64_03:
360; SSSE3:       # %bb.0:
361; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
362; SSSE3-NEXT:    retq
363;
364; SSE41-LABEL: shuffle_v2i64_03:
365; SSE41:       # %bb.0:
366; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
367; SSE41-NEXT:    retq
368;
369; AVX-LABEL: shuffle_v2i64_03:
370; AVX:       # %bb.0:
371; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
372; AVX-NEXT:    retq
373  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
374  ret <2 x i64> %shuffle
375}
376define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
377; SSE2-LABEL: shuffle_v2i64_03_copy:
378; SSE2:       # %bb.0:
379; SSE2-NEXT:    movaps %xmm1, %xmm0
380; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
381; SSE2-NEXT:    retq
382;
383; SSE3-LABEL: shuffle_v2i64_03_copy:
384; SSE3:       # %bb.0:
385; SSE3-NEXT:    movaps %xmm1, %xmm0
386; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
387; SSE3-NEXT:    retq
388;
389; SSSE3-LABEL: shuffle_v2i64_03_copy:
390; SSSE3:       # %bb.0:
391; SSSE3-NEXT:    movaps %xmm1, %xmm0
392; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
393; SSSE3-NEXT:    retq
394;
395; SSE41-LABEL: shuffle_v2i64_03_copy:
396; SSE41:       # %bb.0:
397; SSE41-NEXT:    movaps %xmm1, %xmm0
398; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
399; SSE41-NEXT:    retq
400;
401; AVX-LABEL: shuffle_v2i64_03_copy:
402; AVX:       # %bb.0:
403; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
404; AVX-NEXT:    retq
405  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
406  ret <2 x i64> %shuffle
407}
408define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
409; SSE2-LABEL: shuffle_v2i64_12:
410; SSE2:       # %bb.0:
411; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
412; SSE2-NEXT:    retq
413;
414; SSE3-LABEL: shuffle_v2i64_12:
415; SSE3:       # %bb.0:
416; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
417; SSE3-NEXT:    retq
418;
419; SSSE3-LABEL: shuffle_v2i64_12:
420; SSSE3:       # %bb.0:
421; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
422; SSSE3-NEXT:    movdqa %xmm1, %xmm0
423; SSSE3-NEXT:    retq
424;
425; SSE41-LABEL: shuffle_v2i64_12:
426; SSE41:       # %bb.0:
427; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
428; SSE41-NEXT:    movdqa %xmm1, %xmm0
429; SSE41-NEXT:    retq
430;
431; AVX-LABEL: shuffle_v2i64_12:
432; AVX:       # %bb.0:
433; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
434; AVX-NEXT:    retq
435  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
436  ret <2 x i64> %shuffle
437}
438define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
439; SSE2-LABEL: shuffle_v2i64_12_copy:
440; SSE2:       # %bb.0:
441; SSE2-NEXT:    movaps %xmm1, %xmm0
442; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
443; SSE2-NEXT:    retq
444;
445; SSE3-LABEL: shuffle_v2i64_12_copy:
446; SSE3:       # %bb.0:
447; SSE3-NEXT:    movaps %xmm1, %xmm0
448; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
449; SSE3-NEXT:    retq
450;
451; SSSE3-LABEL: shuffle_v2i64_12_copy:
452; SSSE3:       # %bb.0:
453; SSSE3-NEXT:    movdqa %xmm2, %xmm0
454; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
455; SSSE3-NEXT:    retq
456;
457; SSE41-LABEL: shuffle_v2i64_12_copy:
458; SSE41:       # %bb.0:
459; SSE41-NEXT:    movdqa %xmm2, %xmm0
460; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
461; SSE41-NEXT:    retq
462;
463; AVX-LABEL: shuffle_v2i64_12_copy:
464; AVX:       # %bb.0:
465; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
466; AVX-NEXT:    retq
467  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
468  ret <2 x i64> %shuffle
469}
470define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
471; SSE-LABEL: shuffle_v2i64_13:
472; SSE:       # %bb.0:
473; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
474; SSE-NEXT:    retq
475;
476; AVX-LABEL: shuffle_v2i64_13:
477; AVX:       # %bb.0:
478; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
479; AVX-NEXT:    retq
480  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
481  ret <2 x i64> %shuffle
482}
483define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
484; SSE-LABEL: shuffle_v2i64_13_copy:
485; SSE:       # %bb.0:
486; SSE-NEXT:    movaps %xmm1, %xmm0
487; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
488; SSE-NEXT:    retq
489;
490; AVX-LABEL: shuffle_v2i64_13_copy:
491; AVX:       # %bb.0:
492; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1]
493; AVX-NEXT:    retq
494  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
495  ret <2 x i64> %shuffle
496}
497define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
498; SSE-LABEL: shuffle_v2i64_20:
499; SSE:       # %bb.0:
500; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
501; SSE-NEXT:    movaps %xmm1, %xmm0
502; SSE-NEXT:    retq
503;
504; AVX-LABEL: shuffle_v2i64_20:
505; AVX:       # %bb.0:
506; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
507; AVX-NEXT:    retq
508  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
509  ret <2 x i64> %shuffle
510}
511define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
512; SSE-LABEL: shuffle_v2i64_20_copy:
513; SSE:       # %bb.0:
514; SSE-NEXT:    movaps %xmm2, %xmm0
515; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
516; SSE-NEXT:    retq
517;
518; AVX-LABEL: shuffle_v2i64_20_copy:
519; AVX:       # %bb.0:
520; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
521; AVX-NEXT:    retq
522  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
523  ret <2 x i64> %shuffle
524}
525define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
526; SSE2-LABEL: shuffle_v2i64_21:
527; SSE2:       # %bb.0:
528; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
529; SSE2-NEXT:    retq
530;
531; SSE3-LABEL: shuffle_v2i64_21:
532; SSE3:       # %bb.0:
533; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
534; SSE3-NEXT:    retq
535;
536; SSSE3-LABEL: shuffle_v2i64_21:
537; SSSE3:       # %bb.0:
538; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
539; SSSE3-NEXT:    retq
540;
541; SSE41-LABEL: shuffle_v2i64_21:
542; SSE41:       # %bb.0:
543; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
544; SSE41-NEXT:    retq
545;
546; AVX-LABEL: shuffle_v2i64_21:
547; AVX:       # %bb.0:
548; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
549; AVX-NEXT:    retq
550  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
551  ret <2 x i64> %shuffle
552}
553define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
554; SSE2-LABEL: shuffle_v2i64_21_copy:
555; SSE2:       # %bb.0:
556; SSE2-NEXT:    movapd %xmm1, %xmm0
557; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
558; SSE2-NEXT:    retq
559;
560; SSE3-LABEL: shuffle_v2i64_21_copy:
561; SSE3:       # %bb.0:
562; SSE3-NEXT:    movapd %xmm1, %xmm0
563; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
564; SSE3-NEXT:    retq
565;
566; SSSE3-LABEL: shuffle_v2i64_21_copy:
567; SSSE3:       # %bb.0:
568; SSSE3-NEXT:    movapd %xmm1, %xmm0
569; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
570; SSSE3-NEXT:    retq
571;
572; SSE41-LABEL: shuffle_v2i64_21_copy:
573; SSE41:       # %bb.0:
574; SSE41-NEXT:    movaps %xmm1, %xmm0
575; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
576; SSE41-NEXT:    retq
577;
578; AVX-LABEL: shuffle_v2i64_21_copy:
579; AVX:       # %bb.0:
580; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
581; AVX-NEXT:    retq
582  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
583  ret <2 x i64> %shuffle
584}
585define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
586; SSE2-LABEL: shuffle_v2i64_30:
587; SSE2:       # %bb.0:
588; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
589; SSE2-NEXT:    movaps %xmm1, %xmm0
590; SSE2-NEXT:    retq
591;
592; SSE3-LABEL: shuffle_v2i64_30:
593; SSE3:       # %bb.0:
594; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
595; SSE3-NEXT:    movaps %xmm1, %xmm0
596; SSE3-NEXT:    retq
597;
598; SSSE3-LABEL: shuffle_v2i64_30:
599; SSSE3:       # %bb.0:
600; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
601; SSSE3-NEXT:    retq
602;
603; SSE41-LABEL: shuffle_v2i64_30:
604; SSE41:       # %bb.0:
605; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
606; SSE41-NEXT:    retq
607;
608; AVX-LABEL: shuffle_v2i64_30:
609; AVX:       # %bb.0:
610; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
611; AVX-NEXT:    retq
612  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
613  ret <2 x i64> %shuffle
614}
615define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
616; SSE2-LABEL: shuffle_v2i64_30_copy:
617; SSE2:       # %bb.0:
618; SSE2-NEXT:    movaps %xmm2, %xmm0
619; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
620; SSE2-NEXT:    retq
621;
622; SSE3-LABEL: shuffle_v2i64_30_copy:
623; SSE3:       # %bb.0:
624; SSE3-NEXT:    movaps %xmm2, %xmm0
625; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
626; SSE3-NEXT:    retq
627;
628; SSSE3-LABEL: shuffle_v2i64_30_copy:
629; SSSE3:       # %bb.0:
630; SSSE3-NEXT:    movdqa %xmm1, %xmm0
631; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
632; SSSE3-NEXT:    retq
633;
634; SSE41-LABEL: shuffle_v2i64_30_copy:
635; SSE41:       # %bb.0:
636; SSE41-NEXT:    movdqa %xmm1, %xmm0
637; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
638; SSE41-NEXT:    retq
639;
640; AVX-LABEL: shuffle_v2i64_30_copy:
641; AVX:       # %bb.0:
642; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
643; AVX-NEXT:    retq
644  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
645  ret <2 x i64> %shuffle
646}
647define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
648; SSE-LABEL: shuffle_v2i64_31:
649; SSE:       # %bb.0:
650; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
651; SSE-NEXT:    movaps %xmm1, %xmm0
652; SSE-NEXT:    retq
653;
654; AVX-LABEL: shuffle_v2i64_31:
655; AVX:       # %bb.0:
656; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
657; AVX-NEXT:    retq
658  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
659  ret <2 x i64> %shuffle
660}
661define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
662; SSE-LABEL: shuffle_v2i64_31_copy:
663; SSE:       # %bb.0:
664; SSE-NEXT:    movaps %xmm2, %xmm0
665; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
666; SSE-NEXT:    retq
667;
668; AVX-LABEL: shuffle_v2i64_31_copy:
669; AVX:       # %bb.0:
670; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1]
671; AVX-NEXT:    retq
672  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
673  ret <2 x i64> %shuffle
674}
675
676define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
677; SSE-LABEL: shuffle_v2i64_0z:
678; SSE:       # %bb.0:
679; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
680; SSE-NEXT:    retq
681;
682; AVX-LABEL: shuffle_v2i64_0z:
683; AVX:       # %bb.0:
684; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
685; AVX-NEXT:    retq
686  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
687  ret <2 x i64> %shuffle
688}
689
690define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
691; SSE-LABEL: shuffle_v2i64_1z:
692; SSE:       # %bb.0:
693; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
694; SSE-NEXT:    retq
695;
696; AVX-LABEL: shuffle_v2i64_1z:
697; AVX:       # %bb.0:
698; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
699; AVX-NEXT:    retq
700  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
701  ret <2 x i64> %shuffle
702}
703
704define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
705; SSE-LABEL: shuffle_v2i64_z0:
706; SSE:       # %bb.0:
707; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
708; SSE-NEXT:    retq
709;
710; AVX-LABEL: shuffle_v2i64_z0:
711; AVX:       # %bb.0:
712; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
713; AVX-NEXT:    retq
714  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
715  ret <2 x i64> %shuffle
716}
717
718define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
719; SSE2-LABEL: shuffle_v2i64_z1:
720; SSE2:       # %bb.0:
721; SSE2-NEXT:    xorps %xmm1, %xmm1
722; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
723; SSE2-NEXT:    movaps %xmm1, %xmm0
724; SSE2-NEXT:    retq
725;
726; SSE3-LABEL: shuffle_v2i64_z1:
727; SSE3:       # %bb.0:
728; SSE3-NEXT:    xorps %xmm1, %xmm1
729; SSE3-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
730; SSE3-NEXT:    movaps %xmm1, %xmm0
731; SSE3-NEXT:    retq
732;
733; SSSE3-LABEL: shuffle_v2i64_z1:
734; SSSE3:       # %bb.0:
735; SSSE3-NEXT:    xorps %xmm1, %xmm1
736; SSSE3-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
737; SSSE3-NEXT:    movaps %xmm1, %xmm0
738; SSSE3-NEXT:    retq
739;
740; SSE41-LABEL: shuffle_v2i64_z1:
741; SSE41:       # %bb.0:
742; SSE41-NEXT:    xorps %xmm1, %xmm1
743; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
744; SSE41-NEXT:    retq
745;
746; AVX-LABEL: shuffle_v2i64_z1:
747; AVX:       # %bb.0:
748; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
749; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
750; AVX-NEXT:    retq
751  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
752  ret <2 x i64> %shuffle
753}
754
755define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
756; SSE-LABEL: shuffle_v2f64_0z:
757; SSE:       # %bb.0:
758; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
759; SSE-NEXT:    retq
760;
761; AVX-LABEL: shuffle_v2f64_0z:
762; AVX:       # %bb.0:
763; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
764; AVX-NEXT:    retq
765  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
766  ret <2 x double> %shuffle
767}
768
769define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
770; SSE-LABEL: shuffle_v2f64_1z:
771; SSE:       # %bb.0:
772; SSE-NEXT:    xorps %xmm1, %xmm1
773; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
774; SSE-NEXT:    retq
775;
776; AVX-LABEL: shuffle_v2f64_1z:
777; AVX:       # %bb.0:
778; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
779; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
780; AVX-NEXT:    retq
781  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
782  ret <2 x double> %shuffle
783}
784
785define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
786; SSE-LABEL: shuffle_v2f64_z0:
787; SSE:       # %bb.0:
788; SSE-NEXT:    xorps %xmm1, %xmm1
789; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
790; SSE-NEXT:    movaps %xmm1, %xmm0
791; SSE-NEXT:    retq
792;
793; AVX-LABEL: shuffle_v2f64_z0:
794; AVX:       # %bb.0:
795; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
796; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
797; AVX-NEXT:    retq
798  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
799  ret <2 x double> %shuffle
800}
801
802define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
803; SSE2-LABEL: shuffle_v2f64_z1:
804; SSE2:       # %bb.0:
805; SSE2-NEXT:    xorpd %xmm1, %xmm1
806; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
807; SSE2-NEXT:    retq
808;
809; SSE3-LABEL: shuffle_v2f64_z1:
810; SSE3:       # %bb.0:
811; SSE3-NEXT:    xorpd %xmm1, %xmm1
812; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
813; SSE3-NEXT:    retq
814;
815; SSSE3-LABEL: shuffle_v2f64_z1:
816; SSSE3:       # %bb.0:
817; SSSE3-NEXT:    xorpd %xmm1, %xmm1
818; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
819; SSSE3-NEXT:    retq
820;
821; SSE41-LABEL: shuffle_v2f64_z1:
822; SSE41:       # %bb.0:
823; SSE41-NEXT:    xorps %xmm1, %xmm1
824; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
825; SSE41-NEXT:    retq
826;
827; AVX-LABEL: shuffle_v2f64_z1:
828; AVX:       # %bb.0:
829; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
830; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
831; AVX-NEXT:    retq
832  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
833  ret <2 x double> %shuffle
834}
835
836define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
837; SSE-LABEL: shuffle_v2f64_bitcast_1z:
838; SSE:       # %bb.0:
839; SSE-NEXT:    xorps %xmm1, %xmm1
840; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
841; SSE-NEXT:    retq
842;
843; AVX-LABEL: shuffle_v2f64_bitcast_1z:
844; AVX:       # %bb.0:
845; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
846; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
847; AVX-NEXT:    retq
848  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
849  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
850  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
851  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
852  ret <2 x double> %bitcast64
853}
854
855define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
856; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
857; SSE2:       # %bb.0:
858; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
859; SSE2-NEXT:    retq
860;
861; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
862; SSE3:       # %bb.0:
863; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
864; SSE3-NEXT:    retq
865;
866; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
867; SSSE3:       # %bb.0:
868; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
869; SSSE3-NEXT:    retq
870;
871; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
872; SSE41:       # %bb.0:
873; SSE41-NEXT:    xorps %xmm1, %xmm1
874; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
875; SSE41-NEXT:    retq
876;
877; AVX-LABEL: shuffle_v2i64_bitcast_z123:
878; AVX:       # %bb.0:
879; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
880; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
881; AVX-NEXT:    retq
882  %bitcast32 = bitcast <2 x i64> %x to <4 x float>
883  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
884  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
885  %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
886  ret <2 x i64> %and
887}
888
889define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
890; SSE-LABEL: insert_reg_and_zero_v2i64:
891; SSE:       # %bb.0:
892; SSE-NEXT:    movq %rdi, %xmm0
893; SSE-NEXT:    retq
894;
895; AVX-LABEL: insert_reg_and_zero_v2i64:
896; AVX:       # %bb.0:
897; AVX-NEXT:    vmovq %rdi, %xmm0
898; AVX-NEXT:    retq
899  %v = insertelement <2 x i64> undef, i64 %a, i32 0
900  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
901  ret <2 x i64> %shuffle
902}
903
904define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
905; SSE-LABEL: insert_mem_and_zero_v2i64:
906; SSE:       # %bb.0:
907; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
908; SSE-NEXT:    retq
909;
910; AVX-LABEL: insert_mem_and_zero_v2i64:
911; AVX:       # %bb.0:
912; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
913; AVX-NEXT:    retq
914  %a = load i64, i64* %ptr
915  %v = insertelement <2 x i64> undef, i64 %a, i32 0
916  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
917  ret <2 x i64> %shuffle
918}
919
920define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
921; SSE-LABEL: insert_reg_and_zero_v2f64:
922; SSE:       # %bb.0:
923; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
924; SSE-NEXT:    retq
925;
926; AVX-LABEL: insert_reg_and_zero_v2f64:
927; AVX:       # %bb.0:
928; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
929; AVX-NEXT:    retq
930  %v = insertelement <2 x double> undef, double %a, i32 0
931  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
932  ret <2 x double> %shuffle
933}
934
935define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
936; SSE-LABEL: insert_mem_and_zero_v2f64:
937; SSE:       # %bb.0:
938; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
939; SSE-NEXT:    retq
940;
941; AVX-LABEL: insert_mem_and_zero_v2f64:
942; AVX:       # %bb.0:
943; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
944; AVX-NEXT:    retq
945  %a = load double, double* %ptr
946  %v = insertelement <2 x double> undef, double %a, i32 0
947  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
948  ret <2 x double> %shuffle
949}
950
951define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
952; SSE2-LABEL: insert_reg_lo_v2i64:
953; SSE2:       # %bb.0:
954; SSE2-NEXT:    movq %rdi, %xmm1
955; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
956; SSE2-NEXT:    retq
957;
958; SSE3-LABEL: insert_reg_lo_v2i64:
959; SSE3:       # %bb.0:
960; SSE3-NEXT:    movq %rdi, %xmm1
961; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
962; SSE3-NEXT:    retq
963;
964; SSSE3-LABEL: insert_reg_lo_v2i64:
965; SSSE3:       # %bb.0:
966; SSSE3-NEXT:    movq %rdi, %xmm1
967; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
968; SSSE3-NEXT:    retq
969;
970; SSE41-LABEL: insert_reg_lo_v2i64:
971; SSE41:       # %bb.0:
972; SSE41-NEXT:    pinsrq $0, %rdi, %xmm0
973; SSE41-NEXT:    retq
974;
975; AVX-LABEL: insert_reg_lo_v2i64:
976; AVX:       # %bb.0:
977; AVX-NEXT:    vpinsrq $0, %rdi, %xmm0, %xmm0
978; AVX-NEXT:    retq
979  %v = insertelement <2 x i64> undef, i64 %a, i32 0
980  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
981  ret <2 x i64> %shuffle
982}
983
984define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
985; SSE2-LABEL: insert_mem_lo_v2i64:
986; SSE2:       # %bb.0:
987; SSE2-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
988; SSE2-NEXT:    retq
989;
990; SSE3-LABEL: insert_mem_lo_v2i64:
991; SSE3:       # %bb.0:
992; SSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
993; SSE3-NEXT:    retq
994;
995; SSSE3-LABEL: insert_mem_lo_v2i64:
996; SSSE3:       # %bb.0:
997; SSSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
998; SSSE3-NEXT:    retq
999;
1000; SSE41-LABEL: insert_mem_lo_v2i64:
1001; SSE41:       # %bb.0:
1002; SSE41-NEXT:    pinsrq $0, (%rdi), %xmm0
1003; SSE41-NEXT:    retq
1004;
1005; AVX-LABEL: insert_mem_lo_v2i64:
1006; AVX:       # %bb.0:
1007; AVX-NEXT:    vpinsrq $0, (%rdi), %xmm0, %xmm0
1008; AVX-NEXT:    retq
1009  %a = load i64, i64* %ptr
1010  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1011  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1012  ret <2 x i64> %shuffle
1013}
1014
1015define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1016; SSE2-LABEL: insert_reg_hi_v2i64:
1017; SSE2:       # %bb.0:
1018; SSE2-NEXT:    movq %rdi, %xmm1
1019; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1020; SSE2-NEXT:    retq
1021;
1022; SSE3-LABEL: insert_reg_hi_v2i64:
1023; SSE3:       # %bb.0:
1024; SSE3-NEXT:    movq %rdi, %xmm1
1025; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1026; SSE3-NEXT:    retq
1027;
1028; SSSE3-LABEL: insert_reg_hi_v2i64:
1029; SSSE3:       # %bb.0:
1030; SSSE3-NEXT:    movq %rdi, %xmm1
1031; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1032; SSSE3-NEXT:    retq
1033;
1034; SSE41-LABEL: insert_reg_hi_v2i64:
1035; SSE41:       # %bb.0:
1036; SSE41-NEXT:    pinsrq $1, %rdi, %xmm0
1037; SSE41-NEXT:    retq
1038;
1039; AVX-LABEL: insert_reg_hi_v2i64:
1040; AVX:       # %bb.0:
1041; AVX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0
1042; AVX-NEXT:    retq
1043  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1044  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1045  ret <2 x i64> %shuffle
1046}
1047
1048define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1049; SSE2-LABEL: insert_mem_hi_v2i64:
1050; SSE2:       # %bb.0:
1051; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1052; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1053; SSE2-NEXT:    retq
1054;
1055; SSE3-LABEL: insert_mem_hi_v2i64:
1056; SSE3:       # %bb.0:
1057; SSE3-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1058; SSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1059; SSE3-NEXT:    retq
1060;
1061; SSSE3-LABEL: insert_mem_hi_v2i64:
1062; SSSE3:       # %bb.0:
1063; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1064; SSSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1065; SSSE3-NEXT:    retq
1066;
1067; SSE41-LABEL: insert_mem_hi_v2i64:
1068; SSE41:       # %bb.0:
1069; SSE41-NEXT:    pinsrq $1, (%rdi), %xmm0
1070; SSE41-NEXT:    retq
1071;
1072; AVX-LABEL: insert_mem_hi_v2i64:
1073; AVX:       # %bb.0:
1074; AVX-NEXT:    vpinsrq $1, (%rdi), %xmm0, %xmm0
1075; AVX-NEXT:    retq
1076  %a = load i64, i64* %ptr
1077  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1078  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1079  ret <2 x i64> %shuffle
1080}
1081
1082define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1083; SSE2-LABEL: insert_reg_lo_v2f64:
1084; SSE2:       # %bb.0:
1085; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1086; SSE2-NEXT:    retq
1087;
1088; SSE3-LABEL: insert_reg_lo_v2f64:
1089; SSE3:       # %bb.0:
1090; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1091; SSE3-NEXT:    retq
1092;
1093; SSSE3-LABEL: insert_reg_lo_v2f64:
1094; SSSE3:       # %bb.0:
1095; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1096; SSSE3-NEXT:    retq
1097;
1098; SSE41-LABEL: insert_reg_lo_v2f64:
1099; SSE41:       # %bb.0:
1100; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1101; SSE41-NEXT:    retq
1102;
1103; AVX-LABEL: insert_reg_lo_v2f64:
1104; AVX:       # %bb.0:
1105; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1106; AVX-NEXT:    retq
1107  %v = insertelement <2 x double> undef, double %a, i32 0
1108  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1109  ret <2 x double> %shuffle
1110}
1111
1112define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1113; SSE-LABEL: insert_mem_lo_v2f64:
1114; SSE:       # %bb.0:
1115; SSE-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1116; SSE-NEXT:    retq
1117;
1118; AVX-LABEL: insert_mem_lo_v2f64:
1119; AVX:       # %bb.0:
1120; AVX-NEXT:    vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1121; AVX-NEXT:    retq
1122  %a = load double, double* %ptr
1123  %v = insertelement <2 x double> undef, double %a, i32 0
1124  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1125  ret <2 x double> %shuffle
1126}
1127
1128define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1129; SSE-LABEL: insert_reg_hi_v2f64:
1130; SSE:       # %bb.0:
1131; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1132; SSE-NEXT:    movaps %xmm1, %xmm0
1133; SSE-NEXT:    retq
1134;
1135; AVX-LABEL: insert_reg_hi_v2f64:
1136; AVX:       # %bb.0:
1137; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1138; AVX-NEXT:    retq
1139  %v = insertelement <2 x double> undef, double %a, i32 0
1140  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1141  ret <2 x double> %shuffle
1142}
1143
1144define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1145; SSE-LABEL: insert_mem_hi_v2f64:
1146; SSE:       # %bb.0:
1147; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1148; SSE-NEXT:    retq
1149;
1150; AVX-LABEL: insert_mem_hi_v2f64:
1151; AVX:       # %bb.0:
1152; AVX-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1153; AVX-NEXT:    retq
1154  %a = load double, double* %ptr
1155  %v = insertelement <2 x double> undef, double %a, i32 0
1156  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1157  ret <2 x double> %shuffle
1158}
1159
1160define <2 x double> @insert_dup_reg_v2f64(double %a) {
1161; SSE2-LABEL: insert_dup_reg_v2f64:
1162; SSE2:       # %bb.0:
1163; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1164; SSE2-NEXT:    retq
1165;
1166; SSE3-LABEL: insert_dup_reg_v2f64:
1167; SSE3:       # %bb.0:
1168; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1169; SSE3-NEXT:    retq
1170;
1171; SSSE3-LABEL: insert_dup_reg_v2f64:
1172; SSSE3:       # %bb.0:
1173; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1174; SSSE3-NEXT:    retq
1175;
1176; SSE41-LABEL: insert_dup_reg_v2f64:
1177; SSE41:       # %bb.0:
1178; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1179; SSE41-NEXT:    retq
1180;
1181; AVX-LABEL: insert_dup_reg_v2f64:
1182; AVX:       # %bb.0:
1183; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1184; AVX-NEXT:    retq
1185  %v = insertelement <2 x double> undef, double %a, i32 0
1186  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1187  ret <2 x double> %shuffle
1188}
1189
1190define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1191; SSE2-LABEL: insert_dup_mem_v2f64:
1192; SSE2:       # %bb.0:
1193; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1194; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1195; SSE2-NEXT:    retq
1196;
1197; SSE3-LABEL: insert_dup_mem_v2f64:
1198; SSE3:       # %bb.0:
1199; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1200; SSE3-NEXT:    retq
1201;
1202; SSSE3-LABEL: insert_dup_mem_v2f64:
1203; SSSE3:       # %bb.0:
1204; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1205; SSSE3-NEXT:    retq
1206;
1207; SSE41-LABEL: insert_dup_mem_v2f64:
1208; SSE41:       # %bb.0:
1209; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1210; SSE41-NEXT:    retq
1211;
1212; AVX-LABEL: insert_dup_mem_v2f64:
1213; AVX:       # %bb.0:
1214; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1215; AVX-NEXT:    retq
1216  %a = load double, double* %ptr
1217  %v = insertelement <2 x double> undef, double %a, i32 0
1218  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1219  ret <2 x double> %shuffle
1220}
1221
1222define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1223; SSE2-LABEL: insert_dup_mem128_v2f64:
1224; SSE2:       # %bb.0:
1225; SSE2-NEXT:    movaps (%rdi), %xmm0
1226; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1227; SSE2-NEXT:    retq
1228;
1229; SSE3-LABEL: insert_dup_mem128_v2f64:
1230; SSE3:       # %bb.0:
1231; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1232; SSE3-NEXT:    retq
1233;
1234; SSSE3-LABEL: insert_dup_mem128_v2f64:
1235; SSSE3:       # %bb.0:
1236; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1237; SSSE3-NEXT:    retq
1238;
1239; SSE41-LABEL: insert_dup_mem128_v2f64:
1240; SSE41:       # %bb.0:
1241; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1242; SSE41-NEXT:    retq
1243;
1244; AVX-LABEL: insert_dup_mem128_v2f64:
1245; AVX:       # %bb.0:
1246; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1247; AVX-NEXT:    retq
1248  %v = load  <2 x double>,  <2 x double>* %ptr
1249  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1250  ret <2 x double> %shuffle
1251}
1252
1253
1254define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1255; SSE-LABEL: insert_dup_mem_v2i64:
1256; SSE:       # %bb.0:
1257; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1258; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1259; SSE-NEXT:    retq
1260;
1261; AVX-LABEL: insert_dup_mem_v2i64:
1262; AVX:       # %bb.0:
1263; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1264; AVX-NEXT:    retq
1265  %tmp = load i64, i64* %ptr, align 1
1266  %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1267  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1268  ret <2 x i64> %tmp2
1269}
1270
1271define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1272; SSE-LABEL: shuffle_mem_v2f64_10:
1273; SSE:       # %bb.0:
1274; SSE-NEXT:    movaps (%rdi), %xmm0
1275; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
1276; SSE-NEXT:    retq
1277;
1278; AVX-LABEL: shuffle_mem_v2f64_10:
1279; AVX:       # %bb.0:
1280; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
1281; AVX-NEXT:    retq
1282
1283  %a = load <2 x double>, <2 x double>* %ptr
1284  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1285  ret <2 x double> %shuffle
1286}
1287
1288define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) {
1289; SSE-LABEL: shuffle_mem_v2f64_31:
1290; SSE:       # %bb.0:
1291; SSE-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1292; SSE-NEXT:    retq
1293;
1294; AVX-LABEL: shuffle_mem_v2f64_31:
1295; AVX:       # %bb.0:
1296; AVX-NEXT:    vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1297; AVX-NEXT:    retq
1298  %c = load <2 x double>, <2 x double>* %b
1299  %f = shufflevector <2 x double> %a, <2 x double> %c, <2 x i32> <i32 3, i32 1>
1300  ret <2 x double> %f
1301}
1302
1303define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) {
1304; SSE-LABEL: shuffle_mem_v2f64_02:
1305; SSE:       # %bb.0:
1306; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1307; SSE-NEXT:    retq
1308;
1309; AVX-LABEL: shuffle_mem_v2f64_02:
1310; AVX:       # %bb.0:
1311; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1312; AVX-NEXT:    retq
1313  %b = load <2 x double>, <2 x double>* %pb, align 1
1314  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
1315  ret <2 x double> %shuffle
1316}
1317
1318define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) {
1319; SSE2-LABEL: shuffle_mem_v2f64_21:
1320; SSE2:       # %bb.0:
1321; SSE2-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1322; SSE2-NEXT:    retq
1323;
1324; SSE3-LABEL: shuffle_mem_v2f64_21:
1325; SSE3:       # %bb.0:
1326; SSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1327; SSE3-NEXT:    retq
1328;
1329; SSSE3-LABEL: shuffle_mem_v2f64_21:
1330; SSSE3:       # %bb.0:
1331; SSSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1332; SSSE3-NEXT:    retq
1333;
1334; SSE41-LABEL: shuffle_mem_v2f64_21:
1335; SSE41:       # %bb.0:
1336; SSE41-NEXT:    movups (%rdi), %xmm1
1337; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1338; SSE41-NEXT:    retq
1339;
1340; AVX-LABEL: shuffle_mem_v2f64_21:
1341; AVX:       # %bb.0:
1342; AVX-NEXT:    vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1343; AVX-NEXT:    retq
1344  %b = load <2 x double>, <2 x double>* %pb, align 1
1345  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
1346  ret <2 x double> %shuffle
1347}
1348