• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
5;
6; Just one 32-bit run to make sure we do reasonable things.
7; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
8
9define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noinline ssp {
10; AVX-LABEL: merge_4f64_2f64_23:
11; AVX:       # BB#0:
12; AVX-NEXT:    vmovups 32(%rdi), %ymm0
13; AVX-NEXT:    retq
14;
15; X32-AVX-LABEL: merge_4f64_2f64_23:
16; X32-AVX:       # BB#0:
17; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
18; X32-AVX-NEXT:    vmovups 32(%eax), %ymm0
19; X32-AVX-NEXT:    retl
20  %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
21  %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
22  %val0 = load <2 x double>, <2 x double>* %ptr0
23  %val1 = load <2 x double>, <2 x double>* %ptr1
24  %res = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
25  ret <4 x double> %res
26}
27
28define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
29; AVX-LABEL: merge_4f64_2f64_2z:
30; AVX:       # BB#0:
31; AVX-NEXT:    vmovaps 32(%rdi), %xmm0
32; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
33; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
34; AVX-NEXT:    retq
35;
36; X32-AVX-LABEL: merge_4f64_2f64_2z:
37; X32-AVX:       # BB#0:
38; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
39; X32-AVX-NEXT:    vmovaps 32(%eax), %xmm0
40; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
41; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
42; X32-AVX-NEXT:    retl
43  %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
44  %val0 = load <2 x double>, <2 x double>* %ptr0
45  %res = shufflevector <2 x double> %val0, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
46  ret <4 x double> %res
47}
48
49define <4 x double> @merge_4f64_f64_2345(double* %ptr) nounwind uwtable noinline ssp {
50; AVX-LABEL: merge_4f64_f64_2345:
51; AVX:       # BB#0:
52; AVX-NEXT:    vmovups 16(%rdi), %ymm0
53; AVX-NEXT:    retq
54;
55; X32-AVX-LABEL: merge_4f64_f64_2345:
56; X32-AVX:       # BB#0:
57; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
58; X32-AVX-NEXT:    vmovups 16(%eax), %ymm0
59; X32-AVX-NEXT:    retl
60  %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
61  %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
62  %ptr2 = getelementptr inbounds double, double* %ptr, i64 4
63  %ptr3 = getelementptr inbounds double, double* %ptr, i64 5
64  %val0 = load double, double* %ptr0
65  %val1 = load double, double* %ptr1
66  %val2 = load double, double* %ptr2
67  %val3 = load double, double* %ptr3
68  %res0 = insertelement <4 x double> undef, double %val0, i32 0
69  %res1 = insertelement <4 x double> %res0, double %val1, i32 1
70  %res2 = insertelement <4 x double> %res1, double %val2, i32 2
71  %res3 = insertelement <4 x double> %res2, double %val3, i32 3
72  ret <4 x double> %res3
73}
74
75define <4 x double> @merge_4f64_f64_3zuu(double* %ptr) nounwind uwtable noinline ssp {
76; AVX-LABEL: merge_4f64_f64_3zuu:
77; AVX:       # BB#0:
78; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
79; AVX-NEXT:    retq
80;
81; X32-AVX-LABEL: merge_4f64_f64_3zuu:
82; X32-AVX:       # BB#0:
83; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
84; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
85; X32-AVX-NEXT:    retl
86  %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
87  %val0 = load double, double* %ptr0
88  %res0 = insertelement <4 x double> undef, double %val0, i32 0
89  %res1 = insertelement <4 x double> %res0, double 0.0, i32 1
90  ret <4 x double> %res1
91}
92
93define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline ssp {
94; AVX-LABEL: merge_4f64_f64_34uu:
95; AVX:       # BB#0:
96; AVX-NEXT:    vmovups 24(%rdi), %xmm0
97; AVX-NEXT:    retq
98;
99; X32-AVX-LABEL: merge_4f64_f64_34uu:
100; X32-AVX:       # BB#0:
101; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; X32-AVX-NEXT:    vmovups 24(%eax), %xmm0
103; X32-AVX-NEXT:    retl
104  %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
105  %ptr1 = getelementptr inbounds double, double* %ptr, i64 4
106  %val0 = load double, double* %ptr0
107  %val1 = load double, double* %ptr1
108  %res0 = insertelement <4 x double> undef, double %val0, i32 0
109  %res1 = insertelement <4 x double> %res0, double %val1, i32 1
110  ret <4 x double> %res1
111}
112
113define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
114; AVX-LABEL: merge_4f64_f64_45zz:
115; AVX:       # BB#0:
116; AVX-NEXT:    vmovups 32(%rdi), %xmm0
117; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
118; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
119; AVX-NEXT:    retq
120;
121; X32-AVX-LABEL: merge_4f64_f64_45zz:
122; X32-AVX:       # BB#0:
123; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
124; X32-AVX-NEXT:    vmovups 32(%eax), %xmm0
125; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
126; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
127; X32-AVX-NEXT:    retl
128  %ptr0 = getelementptr inbounds double, double* %ptr, i64 4
129  %ptr1 = getelementptr inbounds double, double* %ptr, i64 5
130  %val0 = load double, double* %ptr0
131  %val1 = load double, double* %ptr1
132  %res0 = insertelement <4 x double> zeroinitializer, double %val0, i32 0
133  %res1 = insertelement <4 x double> %res0, double %val1, i32 1
134  ret <4 x double> %res1
135}
136
137define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline ssp {
138; AVX-LABEL: merge_4f64_f64_34z6:
139; AVX:       # BB#0:
140; AVX-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
141; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
142; AVX-NEXT:    retq
143;
144; X32-AVX-LABEL: merge_4f64_f64_34z6:
145; X32-AVX:       # BB#0:
146; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
147; X32-AVX-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
148; X32-AVX-NEXT:    vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
149; X32-AVX-NEXT:    retl
150  %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
151  %ptr1 = getelementptr inbounds double, double* %ptr, i64 4
152  %ptr3 = getelementptr inbounds double, double* %ptr, i64 6
153  %val0 = load double, double* %ptr0
154  %val1 = load double, double* %ptr1
155  %val3 = load double, double* %ptr3
156  %res0 = insertelement <4 x double> undef, double %val0, i32 0
157  %res1 = insertelement <4 x double> %res0, double %val1, i32 1
158  %res2 = insertelement <4 x double> %res1, double   0.0, i32 2
159  %res3 = insertelement <4 x double> %res2, double %val3, i32 3
160  ret <4 x double> %res3
161}
162
163define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
164; AVX-LABEL: merge_4i64_2i64_3z:
165; AVX:       # BB#0:
166; AVX-NEXT:    vmovaps 48(%rdi), %xmm0
167; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
168; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
169; AVX-NEXT:    retq
170;
171; X32-AVX-LABEL: merge_4i64_2i64_3z:
172; X32-AVX:       # BB#0:
173; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
174; X32-AVX-NEXT:    vmovaps 48(%eax), %xmm0
175; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
176; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
177; X32-AVX-NEXT:    retl
178  %ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
179  %val0 = load <2 x i64>, <2 x i64>* %ptr0
180  %res = shufflevector <2 x i64> %val0, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
181  ret <4 x i64> %res
182}
183
184define <4 x i64> @merge_4i64_i64_1234(i64* %ptr) nounwind uwtable noinline ssp {
185; AVX-LABEL: merge_4i64_i64_1234:
186; AVX:       # BB#0:
187; AVX-NEXT:    vmovups 8(%rdi), %ymm0
188; AVX-NEXT:    retq
189;
190; X32-AVX-LABEL: merge_4i64_i64_1234:
191; X32-AVX:       # BB#0:
192; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
193; X32-AVX-NEXT:    vmovups 8(%eax), %ymm0
194; X32-AVX-NEXT:    retl
195  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
196  %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 2
197  %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
198  %ptr3 = getelementptr inbounds i64, i64* %ptr, i64 4
199  %val0 = load i64, i64* %ptr0
200  %val1 = load i64, i64* %ptr1
201  %val2 = load i64, i64* %ptr2
202  %val3 = load i64, i64* %ptr3
203  %res0 = insertelement <4 x i64> undef, i64 %val0, i32 0
204  %res1 = insertelement <4 x i64> %res0, i64 %val1, i32 1
205  %res2 = insertelement <4 x i64> %res1, i64 %val2, i32 2
206  %res3 = insertelement <4 x i64> %res2, i64 %val3, i32 3
207  ret <4 x i64> %res3
208}
209
210define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
211; AVX-LABEL: merge_4i64_i64_1zzu:
212; AVX:       # BB#0:
213; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
214; AVX-NEXT:    retq
215;
216; X32-AVX-LABEL: merge_4i64_i64_1zzu:
217; X32-AVX:       # BB#0:
218; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
219; X32-AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
220; X32-AVX-NEXT:    retl
221  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
222  %val0 = load i64, i64* %ptr0
223  %res0 = insertelement <4 x i64> undef, i64 %val0, i32 0
224  %res1 = insertelement <4 x i64> %res0, i64 0, i32 1
225  %res2 = insertelement <4 x i64> %res1, i64 0, i32 1
226  ret <4 x i64> %res2
227}
228
229define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
230; AVX-LABEL: merge_4i64_i64_23zz:
231; AVX:       # BB#0:
232; AVX-NEXT:    vmovups 16(%rdi), %xmm0
233; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
234; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
235; AVX-NEXT:    retq
236;
237; X32-AVX-LABEL: merge_4i64_i64_23zz:
238; X32-AVX:       # BB#0:
239; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
240; X32-AVX-NEXT:    vmovups 16(%eax), %xmm0
241; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
242; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
243; X32-AVX-NEXT:    retl
244  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
245  %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
246  %val0 = load i64, i64* %ptr0
247  %val1 = load i64, i64* %ptr1
248  %res0 = insertelement <4 x i64> zeroinitializer, i64 %val0, i32 0
249  %res1 = insertelement <4 x i64> %res0, i64 %val1, i32 1
250  ret <4 x i64> %res1
251}
252
253define <8 x float> @merge_8f32_2f32_23z5(<2 x float>* %ptr) nounwind uwtable noinline ssp {
254; AVX1-LABEL: merge_8f32_2f32_23z5:
255; AVX1:       # BB#0:
256; AVX1-NEXT:    vmovupd 16(%rdi), %xmm0
257; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
258; AVX1-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
259; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
260; AVX1-NEXT:    retq
261;
262; AVX2-LABEL: merge_8f32_2f32_23z5:
263; AVX2:       # BB#0:
264; AVX2-NEXT:    vmovupd 16(%rdi), %xmm0
265; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
266; AVX2-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
267; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
268; AVX2-NEXT:    retq
269;
270; AVX512F-LABEL: merge_8f32_2f32_23z5:
271; AVX512F:       # BB#0:
272; AVX512F-NEXT:    vmovups 16(%rdi), %xmm0
273; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
274; AVX512F-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
275; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
276; AVX512F-NEXT:    retq
277;
278; X32-AVX-LABEL: merge_8f32_2f32_23z5:
279; X32-AVX:       # BB#0:
280; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
281; X32-AVX-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
282; X32-AVX-NEXT:    vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
283; X32-AVX-NEXT:    retl
284  %ptr0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 2
285  %ptr1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3
286  %ptr3 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 5
287  %val0 = load <2 x float>, <2 x float>* %ptr0
288  %val1 = load <2 x float>, <2 x float>* %ptr1
289  %val3 = load <2 x float>, <2 x float>* %ptr3
290  %res01 = shufflevector <2 x float> %val0, <2 x float> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
291  %res23 = shufflevector <2 x float> zeroinitializer, <2 x float> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
292  %res = shufflevector <4 x float> %res01, <4 x float> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
293  ret <8 x float> %res
294}
295
296define <8 x float> @merge_8f32_4f32_z2(<4 x float>* %ptr) nounwind uwtable noinline ssp {
297; AVX-LABEL: merge_8f32_4f32_z2:
298; AVX:       # BB#0:
299; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
300; AVX-NEXT:    vinsertf128 $1, 32(%rdi), %ymm0, %ymm0
301; AVX-NEXT:    retq
302;
303; X32-AVX-LABEL: merge_8f32_4f32_z2:
304; X32-AVX:       # BB#0:
305; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
306; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
307; X32-AVX-NEXT:    vinsertf128 $1, 32(%eax), %ymm0, %ymm0
308; X32-AVX-NEXT:    retl
309  %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2
310  %val1 = load <4 x float>, <4 x float>* %ptr1
311  %res = shufflevector <4 x float> zeroinitializer, <4 x float> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
312  ret <8 x float> %res
313}
314
315define <8 x float> @merge_8f32_f32_12zzuuzz(float* %ptr) nounwind uwtable noinline ssp {
316; AVX-LABEL: merge_8f32_f32_12zzuuzz:
317; AVX:       # BB#0:
318; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
319; AVX-NEXT:    retq
320;
321; X32-AVX-LABEL: merge_8f32_f32_12zzuuzz:
322; X32-AVX:       # BB#0:
323; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
324; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
325; X32-AVX-NEXT:    retl
326  %ptr0 = getelementptr inbounds float, float* %ptr, i64 1
327  %ptr1 = getelementptr inbounds float, float* %ptr, i64 2
328  %val0 = load float, float* %ptr0
329  %val1 = load float, float* %ptr1
330  %res0 = insertelement <8 x float> undef, float %val0, i32 0
331  %res1 = insertelement <8 x float> %res0, float %val1, i32 1
332  %res2 = insertelement <8 x float> %res1, float   0.0, i32 2
333  %res3 = insertelement <8 x float> %res2, float   0.0, i32 3
334  %res6 = insertelement <8 x float> %res3, float   0.0, i32 6
335  %res7 = insertelement <8 x float> %res6, float   0.0, i32 7
336  ret <8 x float> %res7
337}
338
339define <8 x float> @merge_8f32_f32_1u3u5zu8(float* %ptr) nounwind uwtable noinline ssp {
340; AVX-LABEL: merge_8f32_f32_1u3u5zu8:
341; AVX:       # BB#0:
342; AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
343; AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
344; AVX-NEXT:    retq
345;
346; X32-AVX-LABEL: merge_8f32_f32_1u3u5zu8:
347; X32-AVX:       # BB#0:
348; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
349; X32-AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
350; X32-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
351; X32-AVX-NEXT:    retl
352  %ptr0 = getelementptr inbounds float, float* %ptr, i64 1
353  %ptr2 = getelementptr inbounds float, float* %ptr, i64 3
354  %ptr4 = getelementptr inbounds float, float* %ptr, i64 5
355  %ptr7 = getelementptr inbounds float, float* %ptr, i64 8
356  %val0 = load float, float* %ptr0
357  %val2 = load float, float* %ptr2
358  %val4 = load float, float* %ptr4
359  %val7 = load float, float* %ptr7
360  %res0 = insertelement <8 x float> undef, float %val0, i32 0
361  %res2 = insertelement <8 x float> %res0, float %val2, i32 2
362  %res4 = insertelement <8 x float> %res2, float %val4, i32 4
363  %res5 = insertelement <8 x float> %res4, float   0.0, i32 5
364  %res7 = insertelement <8 x float> %res5, float %val7, i32 7
365  ret <8 x float> %res7
366}
367
368define <8 x i32> @merge_8i32_4i32_z3(<4 x i32>* %ptr) nounwind uwtable noinline ssp {
369; AVX-LABEL: merge_8i32_4i32_z3:
370; AVX:       # BB#0:
371; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
372; AVX-NEXT:    vinsertf128 $1, 48(%rdi), %ymm0, %ymm0
373; AVX-NEXT:    retq
374;
375; X32-AVX-LABEL: merge_8i32_4i32_z3:
376; X32-AVX:       # BB#0:
377; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
378; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
379; X32-AVX-NEXT:    vinsertf128 $1, 48(%eax), %ymm0, %ymm0
380; X32-AVX-NEXT:    retl
381  %ptr1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
382  %val1 = load <4 x i32>, <4 x i32>* %ptr1
383  %res = shufflevector <4 x i32> zeroinitializer, <4 x i32> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
384  ret <8 x i32> %res
385}
386
387define <8 x i32> @merge_8i32_i32_56zz9uzz(i32* %ptr) nounwind uwtable noinline ssp {
388; AVX1-LABEL: merge_8i32_i32_56zz9uzz:
389; AVX1:       # BB#0:
390; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
391; AVX1-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
392; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
393; AVX1-NEXT:    retq
394;
395; AVX2-LABEL: merge_8i32_i32_56zz9uzz:
396; AVX2:       # BB#0:
397; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
398; AVX2-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
399; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
400; AVX2-NEXT:    retq
401;
402; AVX512F-LABEL: merge_8i32_i32_56zz9uzz:
403; AVX512F:       # BB#0:
404; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
405; AVX512F-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
406; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
407; AVX512F-NEXT:    retq
408;
409; X32-AVX-LABEL: merge_8i32_i32_56zz9uzz:
410; X32-AVX:       # BB#0:
411; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
412; X32-AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
413; X32-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
414; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
415; X32-AVX-NEXT:    retl
416  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 5
417  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 6
418  %ptr4 = getelementptr inbounds i32, i32* %ptr, i64 9
419  %val0 = load i32, i32* %ptr0
420  %val1 = load i32, i32* %ptr1
421  %val4 = load i32, i32* %ptr4
422  %res0 = insertelement <8 x i32> undef, i32 %val0, i32 0
423  %res1 = insertelement <8 x i32> %res0, i32 %val1, i32 1
424  %res2 = insertelement <8 x i32> %res1, i32     0, i32 2
425  %res3 = insertelement <8 x i32> %res2, i32     0, i32 3
426  %res4 = insertelement <8 x i32> %res3, i32 %val4, i32 4
427  %res6 = insertelement <8 x i32> %res4, i32     0, i32 6
428  %res7 = insertelement <8 x i32> %res6, i32     0, i32 7
429  ret <8 x i32> %res7
430}
431
432define <8 x i32> @merge_8i32_i32_1u3u5zu8(i32* %ptr) nounwind uwtable noinline ssp {
433; AVX1-LABEL: merge_8i32_i32_1u3u5zu8:
434; AVX1:       # BB#0:
435; AVX1-NEXT:    vxorps %ymm0, %ymm0, %ymm0
436; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
437; AVX1-NEXT:    retq
438;
439; AVX2-LABEL: merge_8i32_i32_1u3u5zu8:
440; AVX2:       # BB#0:
441; AVX2-NEXT:    vpxor %ymm0, %ymm0, %ymm0
442; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
443; AVX2-NEXT:    retq
444;
445; AVX512F-LABEL: merge_8i32_i32_1u3u5zu8:
446; AVX512F:       # BB#0:
447; AVX512F-NEXT:    vpxor %ymm0, %ymm0, %ymm0
448; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
449; AVX512F-NEXT:    retq
450;
451; X32-AVX-LABEL: merge_8i32_i32_1u3u5zu8:
452; X32-AVX:       # BB#0:
453; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
454; X32-AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
455; X32-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
456; X32-AVX-NEXT:    retl
457  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
458  %ptr2 = getelementptr inbounds i32, i32* %ptr, i64 3
459  %ptr4 = getelementptr inbounds i32, i32* %ptr, i64 5
460  %ptr7 = getelementptr inbounds i32, i32* %ptr, i64 8
461  %val0 = load i32, i32* %ptr0
462  %val2 = load i32, i32* %ptr2
463  %val4 = load i32, i32* %ptr4
464  %val7 = load i32, i32* %ptr7
465  %res0 = insertelement <8 x i32> undef, i32 %val0, i32 0
466  %res2 = insertelement <8 x i32> %res0, i32 %val2, i32 2
467  %res4 = insertelement <8 x i32> %res2, i32 %val4, i32 4
468  %res5 = insertelement <8 x i32> %res4, i32     0, i32 5
469  %res7 = insertelement <8 x i32> %res5, i32 %val7, i32 7
470  ret <8 x i32> %res7
471}
472
473define <16 x i16> @merge_16i16_i16_89zzzuuuuuuuuuuuz(i16* %ptr) nounwind uwtable noinline ssp {
474; AVX-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
475; AVX:       # BB#0:
476; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
477; AVX-NEXT:    retq
478;
479; X32-AVX-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
480; X32-AVX:       # BB#0:
481; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
482; X32-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
483; X32-AVX-NEXT:    retl
484  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 8
485  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 9
486  %val0 = load i16, i16* %ptr0
487  %val1 = load i16, i16* %ptr1
488  %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
489  %res1 = insertelement <16 x i16> %res0, i16 %val1, i16 1
490  %res2 = insertelement <16 x i16> %res1, i16     0, i16 2
491  %res3 = insertelement <16 x i16> %res2, i16     0, i16 3
492  %res4 = insertelement <16 x i16> %res3, i16     0, i16 4
493  %resF = insertelement <16 x i16> %res4, i16     0, i16 15
494  ret <16 x i16> %resF
495}
496
497define <16 x i16> @merge_16i16_i16_45u7uuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
498; AVX-LABEL: merge_16i16_i16_45u7uuuuuuuuuuuu:
499; AVX:       # BB#0:
500; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
501; AVX-NEXT:    retq
502;
503; X32-AVX-LABEL: merge_16i16_i16_45u7uuuuuuuuuuuu:
504; X32-AVX:       # BB#0:
505; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
506; X32-AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
507; X32-AVX-NEXT:    retl
508  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
509  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
510  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
511  %val0 = load i16, i16* %ptr0
512  %val1 = load i16, i16* %ptr1
513  %val3 = load i16, i16* %ptr3
514  %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
515  %res1 = insertelement <16 x i16> %res0, i16 %val1, i16 1
516  %res3 = insertelement <16 x i16> %res1, i16 %val3, i16 3
517  ret <16 x i16> %res3
518}
519
520define <16 x i16> @merge_16i16_i16_0uu3uuuuuuuuCuEF(i16* %ptr) nounwind uwtable noinline ssp {
521; AVX-LABEL: merge_16i16_i16_0uu3uuuuuuuuCuEF:
522; AVX:       # BB#0:
523; AVX-NEXT:    vmovups (%rdi), %ymm0
524; AVX-NEXT:    retq
525;
526; X32-AVX-LABEL: merge_16i16_i16_0uu3uuuuuuuuCuEF:
527; X32-AVX:       # BB#0:
528; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
529; X32-AVX-NEXT:    vmovups (%eax), %ymm0
530; X32-AVX-NEXT:    retl
531  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0
532  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 3
533  %ptrC = getelementptr inbounds i16, i16* %ptr, i64 12
534  %ptrE = getelementptr inbounds i16, i16* %ptr, i64 14
535  %ptrF = getelementptr inbounds i16, i16* %ptr, i64 15
536  %val0 = load i16, i16* %ptr0
537  %val3 = load i16, i16* %ptr3
538  %valC = load i16, i16* %ptrC
539  %valE = load i16, i16* %ptrE
540  %valF = load i16, i16* %ptrF
541  %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
542  %res3 = insertelement <16 x i16> %res0, i16 %val3, i16 3
543  %resC = insertelement <16 x i16> %res3, i16 %valC, i16 12
544  %resE = insertelement <16 x i16> %resC, i16 %valE, i16 14
545  %resF = insertelement <16 x i16> %resE, i16 %valF, i16 15
546  ret <16 x i16> %resF
547}
548
549define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF(i16* %ptr) nounwind uwtable noinline ssp {
550; AVX1-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
551; AVX1:       # BB#0:
552; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,0,0,65535,0,0,0,0,0,0,0,0,65535,0,65535,65535]
553; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
554; AVX1-NEXT:    retq
555;
556; AVX2-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
557; AVX2:       # BB#0:
558; AVX2-NEXT:    vmovups (%rdi), %ymm0
559; AVX2-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
560; AVX2-NEXT:    retq
561;
562; AVX512F-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
563; AVX512F:       # BB#0:
564; AVX512F-NEXT:    vmovups (%rdi), %ymm0
565; AVX512F-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
566; AVX512F-NEXT:    retq
567;
568; X32-AVX-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
569; X32-AVX:       # BB#0:
570; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
571; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,0,0,65535,0,0,0,0,0,0,0,0,65535,0,65535,65535]
572; X32-AVX-NEXT:    vandps (%eax), %ymm0, %ymm0
573; X32-AVX-NEXT:    retl
574  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0
575  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 3
576  %ptrC = getelementptr inbounds i16, i16* %ptr, i64 12
577  %ptrE = getelementptr inbounds i16, i16* %ptr, i64 14
578  %ptrF = getelementptr inbounds i16, i16* %ptr, i64 15
579  %val0 = load i16, i16* %ptr0
580  %val3 = load i16, i16* %ptr3
581  %valC = load i16, i16* %ptrC
582  %valE = load i16, i16* %ptrE
583  %valF = load i16, i16* %ptrF
584  %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
585  %res3 = insertelement <16 x i16> %res0, i16 %val3, i16 3
586  %res4 = insertelement <16 x i16> %res3, i16     0, i16 4
587  %res5 = insertelement <16 x i16> %res4, i16     0, i16 5
588  %resC = insertelement <16 x i16> %res5, i16 %valC, i16 12
589  %resD = insertelement <16 x i16> %resC, i16     0, i16 13
590  %resE = insertelement <16 x i16> %resD, i16 %valE, i16 14
591  %resF = insertelement <16 x i16> %resE, i16 %valF, i16 15
592  ret <16 x i16> %resF
593}
594
595define <32 x i8> @merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i8* %ptr) nounwind uwtable noinline ssp {
596; AVX-LABEL: merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
597; AVX:       # BB#0:
598; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
599; AVX-NEXT:    retq
600;
601; X32-AVX-LABEL: merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
602; X32-AVX:       # BB#0:
603; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
604; X32-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
605; X32-AVX-NEXT:    retl
606  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 4
607  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 5
608  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 7
609  %val0 = load i8, i8* %ptr0
610  %val1 = load i8, i8* %ptr1
611  %val3 = load i8, i8* %ptr3
612  %res0 = insertelement <32 x i8> undef, i8 %val0, i8 0
613  %res1 = insertelement <32 x i8> %res0, i8 %val1, i8 1
614  %res3 = insertelement <32 x i8> %res1, i8 %val3, i8 3
615  ret <32 x i8> %res3
616}
617
618define <32 x i8> @merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu(i8* %ptr) nounwind uwtable noinline ssp {
619; AVX-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
620; AVX:       # BB#0:
621; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
622; AVX-NEXT:    retq
623;
624; X32-AVX-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
625; X32-AVX:       # BB#0:
626; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
627; X32-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
628; X32-AVX-NEXT:    retl
629  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 2
630  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 3
631  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 5
632  %val0 = load i8, i8* %ptr0
633  %val1 = load i8, i8* %ptr1
634  %val3 = load i8, i8* %ptr3
635  %res0 = insertelement <32 x i8> undef, i8 %val0, i8 0
636  %res1 = insertelement <32 x i8> %res0, i8 %val1, i8 1
637  %res3 = insertelement <32 x i8> %res1, i8 %val3, i8 3
638  %resE = insertelement <32 x i8> %res3, i8     0, i8 14
639  %resF = insertelement <32 x i8> %resE, i8     0, i8 15
640  %resG = insertelement <32 x i8> %resF, i8     0, i8 16
641  %resH = insertelement <32 x i8> %resG, i8     0, i8 17
642  ret <32 x i8> %resH
643}
644
645;
646; consecutive loads including any/all volatiles may not be combined
647;
648
649define <4 x double> @merge_4f64_f64_34uz_volatile(double* %ptr) nounwind uwtable noinline ssp {
650; AVX1-LABEL: merge_4f64_f64_34uz_volatile:
651; AVX1:       # BB#0:
652; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
653; AVX1-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
654; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
655; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
656; AVX1-NEXT:    retq
657;
658; AVX2-LABEL: merge_4f64_f64_34uz_volatile:
659; AVX2:       # BB#0:
660; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
661; AVX2-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
662; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
663; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
664; AVX2-NEXT:    retq
665;
666; AVX512F-LABEL: merge_4f64_f64_34uz_volatile:
667; AVX512F:       # BB#0:
668; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
669; AVX512F-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
670; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
671; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
672; AVX512F-NEXT:    retq
673;
674; X32-AVX-LABEL: merge_4f64_f64_34uz_volatile:
675; X32-AVX:       # BB#0:
676; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
677; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
678; X32-AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
679; X32-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
680; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
681; X32-AVX-NEXT:    retl
682  %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
683  %ptr1 = getelementptr inbounds double, double* %ptr, i64 4
684  %val0 = load volatile double, double* %ptr0
685  %val1 = load volatile double, double* %ptr1
686  %res0 = insertelement <4 x double> undef, double %val0, i32 0
687  %res1 = insertelement <4 x double> %res0, double %val1, i32 1
688  %res3 = insertelement <4 x double> %res1, double   0.0, i32 3
689  ret <4 x double> %res3
690}
691
692define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind uwtable noinline ssp {
693; AVX1-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
694; AVX1:       # BB#0:
695; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
696; AVX1-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
697; AVX1-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
698; AVX1-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
699; AVX1-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
700; AVX1-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
701; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
702; AVX1-NEXT:    retq
703;
704; AVX2-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
705; AVX2:       # BB#0:
706; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
707; AVX2-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
708; AVX2-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
709; AVX2-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
710; AVX2-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
711; AVX2-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
712; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
713; AVX2-NEXT:    retq
714;
715; AVX512F-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
716; AVX512F:       # BB#0:
717; AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
718; AVX512F-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
719; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
720; AVX512F-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
721; AVX512F-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
722; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
723; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
724; AVX512F-NEXT:    retq
725;
726; X32-AVX-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
727; X32-AVX:       # BB#0:
728; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
729; X32-AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
730; X32-AVX-NEXT:    vpinsrw $0, (%eax), %xmm0, %xmm1
731; X32-AVX-NEXT:    vpinsrw $3, 6(%eax), %xmm1, %xmm1
732; X32-AVX-NEXT:    vpinsrw $4, 24(%eax), %xmm0, %xmm0
733; X32-AVX-NEXT:    vpinsrw $6, 28(%eax), %xmm0, %xmm0
734; X32-AVX-NEXT:    vpinsrw $7, 30(%eax), %xmm0, %xmm0
735; X32-AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
736; X32-AVX-NEXT:    retl
737  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0
738  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 3
739  %ptrC = getelementptr inbounds i16, i16* %ptr, i64 12
740  %ptrE = getelementptr inbounds i16, i16* %ptr, i64 14
741  %ptrF = getelementptr inbounds i16, i16* %ptr, i64 15
742  %val0 = load volatile i16, i16* %ptr0
743  %val3 = load i16, i16* %ptr3
744  %valC = load i16, i16* %ptrC
745  %valE = load i16, i16* %ptrE
746  %valF = load volatile i16, i16* %ptrF
747  %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
748  %res3 = insertelement <16 x i16> %res0, i16 %val3, i16 3
749  %res4 = insertelement <16 x i16> %res3, i16     0, i16 4
750  %res5 = insertelement <16 x i16> %res4, i16     0, i16 5
751  %resC = insertelement <16 x i16> %res5, i16 %valC, i16 12
752  %resD = insertelement <16 x i16> %resC, i16     0, i16 13
753  %resE = insertelement <16 x i16> %resD, i16 %valE, i16 14
754  %resF = insertelement <16 x i16> %resE, i16 %valF, i16 15
755  ret <16 x i16> %resF
756}
757