• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
4;
5; Just one 32-bit run to make sure we do reasonable things.
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F
7
8define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
9; ALL-LABEL: merge_8f64_2f64_12u4:
10; ALL:       # BB#0:
11; ALL-NEXT:    vmovupd 16(%rdi), %ymm0
12; ALL-NEXT:    vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
13; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
14; ALL-NEXT:    retq
15;
16; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
17; X32-AVX512F:       # BB#0:
18; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
19; X32-AVX512F-NEXT:    vmovupd 16(%eax), %ymm0
20; X32-AVX512F-NEXT:    vinsertf128 $1, 64(%eax), %ymm0, %ymm1
21; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
22; X32-AVX512F-NEXT:    retl
23  %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1
24  %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
25  %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4
26  %val0 = load <2 x double>, <2 x double>* %ptr0
27  %val1 = load <2 x double>, <2 x double>* %ptr1
28  %val3 = load <2 x double>, <2 x double>* %ptr3
29  %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30  %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
31  %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
32  ret <8 x double> %res
33}
34
35define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
36; ALL-LABEL: merge_8f64_2f64_23z5:
37; ALL:       # BB#0:
38; ALL-NEXT:    vmovupd 32(%rdi), %ymm0
39; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
40; ALL-NEXT:    vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
41; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
42; ALL-NEXT:    retq
43;
44; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
45; X32-AVX512F:       # BB#0:
46; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
47; X32-AVX512F-NEXT:    vmovupd 32(%eax), %ymm0
48; X32-AVX512F-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
49; X32-AVX512F-NEXT:    vinsertf128 $1, 80(%eax), %ymm1, %ymm1
50; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
51; X32-AVX512F-NEXT:    retl
52  %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
53  %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
54  %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5
55  %val0 = load <2 x double>, <2 x double>* %ptr0
56  %val1 = load <2 x double>, <2 x double>* %ptr1
57  %val3 = load <2 x double>, <2 x double>* %ptr3
58  %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
59  %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
60  %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61  ret <8 x double> %res
62}
63
64define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp {
65; ALL-LABEL: merge_8f64_4f64_z2:
66; ALL:       # BB#0:
67; ALL-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
68; ALL-NEXT:    vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
69; ALL-NEXT:    retq
70;
71; X32-AVX512F-LABEL: merge_8f64_4f64_z2:
72; X32-AVX512F:       # BB#0:
73; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
74; X32-AVX512F-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
75; X32-AVX512F-NEXT:    vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
76; X32-AVX512F-NEXT:    retl
77  %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2
78  %val1 = load <4 x double>, <4 x double>* %ptr1
79  %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
80  ret <8 x double> %res
81}
82
83define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp {
84; ALL-LABEL: merge_8f64_f64_23uuuuu9:
85; ALL:       # BB#0:
86; ALL-NEXT:    vmovupd 16(%rdi), %zmm0
87; ALL-NEXT:    retq
88;
89; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9:
90; X32-AVX512F:       # BB#0:
91; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
92; X32-AVX512F-NEXT:    vmovupd 16(%eax), %zmm0
93; X32-AVX512F-NEXT:    retl
94  %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
95  %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
96  %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
97  %val0 = load double, double* %ptr0
98  %val1 = load double, double* %ptr1
99  %val7 = load double, double* %ptr7
100  %res0 = insertelement <8 x double> undef, double %val0, i32 0
101  %res1 = insertelement <8 x double> %res0, double %val1, i32 1
102  %res7 = insertelement <8 x double> %res1, double %val7, i32 7
103  ret <8 x double> %res7
104}
105
106define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
107; ALL-LABEL: merge_8f64_f64_12zzuuzz:
108; ALL:       # BB#0:
109; ALL-NEXT:    vmovupd 8(%rdi), %xmm0
110; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
111; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
112; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
113; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
114; ALL-NEXT:    retq
115;
116; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
117; X32-AVX512F:       # BB#0:
118; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
119; X32-AVX512F-NEXT:    vmovupd 8(%eax), %xmm0
120; X32-AVX512F-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
121; X32-AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
122; X32-AVX512F-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
123; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
124; X32-AVX512F-NEXT:    retl
125  %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
126  %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
127  %val0 = load double, double* %ptr0
128  %val1 = load double, double* %ptr1
129  %res0 = insertelement <8 x double> undef, double %val0, i32 0
130  %res1 = insertelement <8 x double> %res0, double %val1, i32 1
131  %res2 = insertelement <8 x double> %res1, double   0.0, i32 2
132  %res3 = insertelement <8 x double> %res2, double   0.0, i32 3
133  %res6 = insertelement <8 x double> %res3, double   0.0, i32 6
134  %res7 = insertelement <8 x double> %res6, double   0.0, i32 7
135  ret <8 x double> %res7
136}
137
138define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
139; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
140; ALL:       # BB#0:
141; ALL-NEXT:    vmovupd 8(%rdi), %zmm0
142; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
143; ALL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
144; ALL-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
145; ALL-NEXT:    retq
146;
147; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
148; X32-AVX512F:       # BB#0:
149; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
150; X32-AVX512F-NEXT:    vmovupd 8(%eax), %zmm0
151; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
152; X32-AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
153; X32-AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
154; X32-AVX512F-NEXT:    retl
155  %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
156  %ptr2 = getelementptr inbounds double, double* %ptr, i64 3
157  %ptr4 = getelementptr inbounds double, double* %ptr, i64 5
158  %ptr7 = getelementptr inbounds double, double* %ptr, i64 8
159  %val0 = load double, double* %ptr0
160  %val2 = load double, double* %ptr2
161  %val4 = load double, double* %ptr4
162  %val7 = load double, double* %ptr7
163  %res0 = insertelement <8 x double> undef, double %val0, i32 0
164  %res2 = insertelement <8 x double> %res0, double %val2, i32 2
165  %res4 = insertelement <8 x double> %res2, double %val4, i32 4
166  %res5 = insertelement <8 x double> %res4, double   0.0, i32 5
167  %res7 = insertelement <8 x double> %res5, double %val7, i32 7
168  ret <8 x double> %res7
169}
170
171define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp {
172; ALL-LABEL: merge_8i64_4i64_z3:
173; ALL:       # BB#0:
174; ALL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
175; ALL-NEXT:    vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0
176; ALL-NEXT:    retq
177;
178; X32-AVX512F-LABEL: merge_8i64_4i64_z3:
179; X32-AVX512F:       # BB#0:
180; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
181; X32-AVX512F-NEXT:    vpxor %ymm0, %ymm0, %ymm0
182; X32-AVX512F-NEXT:    vinserti64x4 $1, 96(%eax), %zmm0, %zmm0
183; X32-AVX512F-NEXT:    retl
184  %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3
185  %val1 = load <4 x i64>, <4 x i64>* %ptr1
186  %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
187  ret <8 x i64> %res
188}
189
190define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
191; ALL-LABEL: merge_8i64_i64_56zz9uzz:
192; ALL:       # BB#0:
193; ALL-NEXT:    vmovdqu 40(%rdi), %xmm0
194; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
195; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
196; ALL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
197; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
198; ALL-NEXT:    retq
199;
200; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
201; X32-AVX512F:       # BB#0:
202; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
203; X32-AVX512F-NEXT:    vmovdqu 40(%eax), %xmm0
204; X32-AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
205; X32-AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
206; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
207; X32-AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
208; X32-AVX512F-NEXT:    retl
209  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5
210  %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6
211  %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9
212  %val0 = load i64, i64* %ptr0
213  %val1 = load i64, i64* %ptr1
214  %val4 = load i64, i64* %ptr4
215  %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
216  %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1
217  %res2 = insertelement <8 x i64> %res1, i64     0, i32 2
218  %res3 = insertelement <8 x i64> %res2, i64     0, i32 3
219  %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4
220  %res6 = insertelement <8 x i64> %res4, i64     0, i32 6
221  %res7 = insertelement <8 x i64> %res6, i64     0, i32 7
222  ret <8 x i64> %res7
223}
224
225define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
226; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
227; ALL:       # BB#0:
228; ALL-NEXT:    vmovdqu64 8(%rdi), %zmm0
229; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
230; ALL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
231; ALL-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
232; ALL-NEXT:    retq
233;
234; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
235; X32-AVX512F:       # BB#0:
236; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
237; X32-AVX512F-NEXT:    vmovdqu64 8(%eax), %zmm0
238; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
239; X32-AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
240; X32-AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
241; X32-AVX512F-NEXT:    retl
242  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
243  %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
244  %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5
245  %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8
246  %val0 = load i64, i64* %ptr0
247  %val2 = load i64, i64* %ptr2
248  %val4 = load i64, i64* %ptr4
249  %val7 = load i64, i64* %ptr7
250  %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
251  %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2
252  %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4
253  %res5 = insertelement <8 x i64> %res4, i64     0, i32 5
254  %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7
255  ret <8 x i64> %res7
256}
257
258define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp {
259; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
260; ALL:       # BB#0:
261; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
262; ALL-NEXT:    retq
263;
264; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
265; X32-AVX512F:       # BB#0:
266; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
267; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
268; X32-AVX512F-NEXT:    retl
269  %ptr0 = getelementptr inbounds float, float* %ptr, i64 8
270  %ptr1 = getelementptr inbounds float, float* %ptr, i64 9
271  %val0 = load float, float* %ptr0
272  %val1 = load float, float* %ptr1
273  %res0 = insertelement <16 x float> undef, float %val0, i32 0
274  %res1 = insertelement <16 x float> %res0, float %val1, i32 1
275  %res2 = insertelement <16 x float> %res1, float   0.0, i32 2
276  %res3 = insertelement <16 x float> %res2, float   0.0, i32 3
277  %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
278  %resF = insertelement <16 x float> %res4, float   0.0, i32 15
279  ret <16 x float> %resF
280}
281
282define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp {
283; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
284; ALL:       # BB#0:
285; ALL-NEXT:    vmovups 16(%rdi), %xmm0
286; ALL-NEXT:    retq
287;
288; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
289; X32-AVX512F:       # BB#0:
290; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
291; X32-AVX512F-NEXT:    vmovups 16(%eax), %xmm0
292; X32-AVX512F-NEXT:    retl
293  %ptr0 = getelementptr inbounds float, float* %ptr, i64 4
294  %ptr1 = getelementptr inbounds float, float* %ptr, i64 5
295  %ptr3 = getelementptr inbounds float, float* %ptr, i64 7
296  %val0 = load float, float* %ptr0
297  %val1 = load float, float* %ptr1
298  %val3 = load float, float* %ptr3
299  %res0 = insertelement <16 x float> undef, float %val0, i32 0
300  %res1 = insertelement <16 x float> %res0, float %val1, i32 1
301  %res3 = insertelement <16 x float> %res1, float %val3, i32 3
302  ret <16 x float> %res3
303}
304
305define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp {
306; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
307; ALL:       # BB#0:
308; ALL-NEXT:    vmovups (%rdi), %zmm0
309; ALL-NEXT:    retq
310;
311; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
312; X32-AVX512F:       # BB#0:
313; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
314; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
315; X32-AVX512F-NEXT:    retl
316  %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
317  %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
318  %ptrC = getelementptr inbounds float, float* %ptr, i64 12
319  %ptrE = getelementptr inbounds float, float* %ptr, i64 14
320  %ptrF = getelementptr inbounds float, float* %ptr, i64 15
321  %val0 = load float, float* %ptr0
322  %val3 = load float, float* %ptr3
323  %valC = load float, float* %ptrC
324  %valE = load float, float* %ptrE
325  %valF = load float, float* %ptrF
326  %res0 = insertelement <16 x float> undef, float %val0, i32 0
327  %res3 = insertelement <16 x float> %res0, float %val3, i32 3
328  %resC = insertelement <16 x float> %res3, float %valC, i32 12
329  %resE = insertelement <16 x float> %resC, float %valE, i32 14
330  %resF = insertelement <16 x float> %resE, float %valF, i32 15
331  ret <16 x float> %resF
332}
333
334define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp {
335; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
336; ALL:       # BB#0:
337; ALL-NEXT:    vmovups (%rdi), %zmm0
338; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
339; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
340; ALL-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
341; ALL-NEXT:    retq
342;
343; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
344; X32-AVX512F:       # BB#0:
345; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
346; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
347; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
348; X32-AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
349; X32-AVX512F-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
350; X32-AVX512F-NEXT:    retl
351  %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
352  %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
353  %ptrC = getelementptr inbounds float, float* %ptr, i64 12
354  %ptrE = getelementptr inbounds float, float* %ptr, i64 14
355  %ptrF = getelementptr inbounds float, float* %ptr, i64 15
356  %val0 = load float, float* %ptr0
357  %val3 = load float, float* %ptr3
358  %valC = load float, float* %ptrC
359  %valE = load float, float* %ptrE
360  %valF = load float, float* %ptrF
361  %res0 = insertelement <16 x float> undef, float %val0, i32 0
362  %res3 = insertelement <16 x float> %res0, float %val3, i32 3
363  %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
364  %res5 = insertelement <16 x float> %res4, float   0.0, i32 5
365  %resC = insertelement <16 x float> %res5, float %valC, i32 12
366  %resD = insertelement <16 x float> %resC, float   0.0, i32 13
367  %resE = insertelement <16 x float> %resD, float %valE, i32 14
368  %resF = insertelement <16 x float> %resE, float %valF, i32 15
369  ret <16 x float> %resF
370}
371
372define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp {
373; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
374; ALL:       # BB#0:
375; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
376; ALL-NEXT:    retq
377;
378; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
379; X32-AVX512F:       # BB#0:
380; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
381; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
382; X32-AVX512F-NEXT:    retl
383  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
384  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2
385  %val0 = load i32, i32* %ptr0
386  %val1 = load i32, i32* %ptr1
387  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
388  %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
389  %res2 = insertelement <16 x i32> %res1, i32     0, i32 2
390  %res3 = insertelement <16 x i32> %res2, i32     0, i32 3
391  %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
392  %resF = insertelement <16 x i32> %res4, i32     0, i32 15
393  ret <16 x i32> %resF
394}
395
396define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp {
397; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
398; ALL:       # BB#0:
399; ALL-NEXT:    vmovups 8(%rdi), %xmm0
400; ALL-NEXT:    retq
401;
402; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
403; X32-AVX512F:       # BB#0:
404; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
405; X32-AVX512F-NEXT:    vmovups 8(%eax), %xmm0
406; X32-AVX512F-NEXT:    retl
407  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
408  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
409  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
410  %val0 = load i32, i32* %ptr0
411  %val1 = load i32, i32* %ptr1
412  %val3 = load i32, i32* %ptr3
413  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
414  %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
415  %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3
416  ret <16 x i32> %res3
417}
418
419define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp {
420; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
421; ALL:       # BB#0:
422; ALL-NEXT:    vmovdqu32 (%rdi), %zmm0
423; ALL-NEXT:    retq
424;
425; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
426; X32-AVX512F:       # BB#0:
427; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
428; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0
429; X32-AVX512F-NEXT:    retl
430  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
431  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
432  %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
433  %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
434  %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
435  %val0 = load i32, i32* %ptr0
436  %val3 = load i32, i32* %ptr3
437  %valC = load i32, i32* %ptrC
438  %valE = load i32, i32* %ptrE
439  %valF = load i32, i32* %ptrF
440  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
441  %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
442  %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
443  %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
444  %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
445  ret <16 x i32> %resF
446}
447
448define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
449; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
450; ALL:       # BB#0:
451; ALL-NEXT:    vmovdqu32 (%rdi), %zmm0
452; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
453; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
454; ALL-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
455; ALL-NEXT:    retq
456;
457; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
458; X32-AVX512F:       # BB#0:
459; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
460; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0
461; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
462; X32-AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
463; X32-AVX512F-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
464; X32-AVX512F-NEXT:    retl
465  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
466  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
467  %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
468  %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
469  %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
470  %val0 = load i32, i32* %ptr0
471  %val3 = load i32, i32* %ptr3
472  %valC = load i32, i32* %ptrC
473  %valE = load i32, i32* %ptrE
474  %valF = load i32, i32* %ptrF
475  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
476  %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
477  %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
478  %res5 = insertelement <16 x i32> %res4, i32     0, i32 5
479  %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12
480  %resD = insertelement <16 x i32> %resC, i32     0, i32 13
481  %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14
482  %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
483  ret <16 x i32> %resF
484}
485
486define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp {
487; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
488; AVX512F:       # BB#0:
489; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
490; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
491; AVX512F-NEXT:    retq
492;
493; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
494; AVX512BW:       # BB#0:
495; AVX512BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
496; AVX512BW-NEXT:    retq
497;
498; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
499; X32-AVX512F:       # BB#0:
500; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
501; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
502; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
503; X32-AVX512F-NEXT:    retl
504  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1
505  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2
506  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4
507  %val0 = load i16, i16* %ptr0
508  %val1 = load i16, i16* %ptr1
509  %val3 = load i16, i16* %ptr3
510  %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
511  %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
512  %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
513  %res30 = insertelement <32 x i16> %res3, i16 0, i16 30
514  %res31 = insertelement <32 x i16> %res30, i16 0, i16 31
515  ret <32 x i16> %res31
516}
517
518define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
519; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
520; ALL:       # BB#0:
521; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
522; ALL-NEXT:    retq
523;
524; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
525; X32-AVX512F:       # BB#0:
526; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
527; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
528; X32-AVX512F-NEXT:    retl
529  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
530  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
531  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
532  %val0 = load i16, i16* %ptr0
533  %val1 = load i16, i16* %ptr1
534  %val3 = load i16, i16* %ptr3
535  %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
536  %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
537  %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
538  ret <32 x i16> %res3
539}
540
541define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
542; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
543; AVX512F:       # BB#0:
544; AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
545; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
546; AVX512F-NEXT:    retq
547;
548; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
549; AVX512BW:       # BB#0:
550; AVX512BW-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
551; AVX512BW-NEXT:    retq
552;
553; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
554; X32-AVX512F:       # BB#0:
555; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
556; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
557; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
558; X32-AVX512F-NEXT:    retl
559  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
560  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
561  %val0 = load i16, i16* %ptr0
562  %val1 = load i16, i16* %ptr1
563  %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
564  %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
565  %res3 = insertelement <32 x i16> %res1, i16     0, i16 3
566  %resE = insertelement <32 x i16> %res3, i16     0, i16 14
567  %resF = insertelement <32 x i16> %resE, i16     0, i16 15
568  %resG = insertelement <32 x i16> %resF, i16     0, i16 16
569  %resH = insertelement <32 x i16> %resG, i16     0, i16 17
570  ret <32 x i16> %resH
571}
572
573define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
574; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
575; AVX512F:       # BB#0:
576; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
577; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
578; AVX512F-NEXT:    retq
579;
580; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
581; AVX512BW:       # BB#0:
582; AVX512BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
583; AVX512BW-NEXT:    retq
584;
585; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
586; X32-AVX512F:       # BB#0:
587; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
588; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
589; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
590; X32-AVX512F-NEXT:    retl
591  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
592  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
593  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
594  %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8
595  %val0 = load i8, i8* %ptr0
596  %val1 = load i8, i8* %ptr1
597  %val3 = load i8, i8* %ptr3
598  %val7 = load i8, i8* %ptr7
599  %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
600  %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
601  %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
602  %res7  = insertelement <64 x i8> %res3,  i8 %val7, i8 7
603  %res14 = insertelement <64 x i8> %res7,  i8     0, i8 14
604  %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
605  %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
606  %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
607  %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
608  ret <64 x i8> %res63
609}
610
611define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
612; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
613; AVX512F:       # BB#0:
614; AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
615; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
616; AVX512F-NEXT:    retq
617;
618; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
619; AVX512BW:       # BB#0:
620; AVX512BW-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
621; AVX512BW-NEXT:    retq
622;
623; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
624; X32-AVX512F:       # BB#0:
625; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
626; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
627; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
628; X32-AVX512F-NEXT:    retl
629  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
630  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
631  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
632  %val0 = load i8, i8* %ptr0
633  %val1 = load i8, i8* %ptr1
634  %val3 = load i8, i8* %ptr3
635  %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
636  %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
637  %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
638  %res14 = insertelement <64 x i8> %res3,  i8     0, i8 14
639  %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
640  %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
641  %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
642  %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
643  ret <64 x i8> %res63
644}
645
646;
647; consecutive loads including any/all volatiles may not be combined
648;
649
650define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp {
651; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile:
652; ALL:       # BB#0:
653; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
654; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
655; ALL-NEXT:    vbroadcastsd 72(%rdi), %ymm1
656; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
657; ALL-NEXT:    retq
658;
659; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile:
660; X32-AVX512F:       # BB#0:
661; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
662; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
663; X32-AVX512F-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
664; X32-AVX512F-NEXT:    vbroadcastsd 72(%eax), %ymm1
665; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
666; X32-AVX512F-NEXT:    retl
667  %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
668  %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
669  %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
670  %val0 = load volatile double, double* %ptr0
671  %val1 = load double, double* %ptr1
672  %val7 = load double, double* %ptr7
673  %res0 = insertelement <8 x double> undef, double %val0, i32 0
674  %res1 = insertelement <8 x double> %res0, double %val1, i32 1
675  %res7 = insertelement <8 x double> %res1, double %val7, i32 7
676  ret <8 x double> %res7
677}
678
679define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp {
680; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
681; ALL:       # BB#0:
682; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
683; ALL-NEXT:    vpinsrd $3, 12(%rdi), %xmm0, %xmm0
684; ALL-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
685; ALL-NEXT:    vpinsrd $2, 56(%rdi), %xmm1, %xmm1
686; ALL-NEXT:    vpinsrd $3, 60(%rdi), %xmm1, %xmm1
687; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
688; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
689; ALL-NEXT:    retq
690;
691; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
692; X32-AVX512F:       # BB#0:
693; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
694; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
695; X32-AVX512F-NEXT:    vpinsrd $3, 12(%eax), %xmm0, %xmm0
696; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
697; X32-AVX512F-NEXT:    vpinsrd $2, 56(%eax), %xmm1, %xmm1
698; X32-AVX512F-NEXT:    vpinsrd $3, 60(%eax), %xmm1, %xmm1
699; X32-AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
700; X32-AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
701; X32-AVX512F-NEXT:    retl
702  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
703  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
704  %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
705  %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
706  %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
707  %val0 = load volatile i32, i32* %ptr0
708  %val3 = load volatile i32, i32* %ptr3
709  %valC = load volatile i32, i32* %ptrC
710  %valE = load volatile i32, i32* %ptrE
711  %valF = load volatile i32, i32* %ptrF
712  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
713  %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
714  %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
715  %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
716  %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
717  ret <16 x i32> %resF
718}
719