• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
7
8; Widened shuffle broadcast loads
9
10define <4 x float> @load_splat_4f32_4f32_0101(<4 x float>* %ptr) nounwind uwtable readnone ssp {
11; SSE2-LABEL: load_splat_4f32_4f32_0101:
12; SSE2:       # %bb.0: # %entry
13; SSE2-NEXT:    movaps (%rdi), %xmm0
14; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
15; SSE2-NEXT:    retq
16;
17; SSE42-LABEL: load_splat_4f32_4f32_0101:
18; SSE42:       # %bb.0: # %entry
19; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
20; SSE42-NEXT:    retq
21;
22; AVX-LABEL: load_splat_4f32_4f32_0101:
23; AVX:       # %bb.0: # %entry
24; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
25; AVX-NEXT:    retq
26entry:
27  %ld = load <4 x float>, <4 x float>* %ptr
28  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
29  ret <4 x float> %ret
30}
31
32define <8 x float> @load_splat_8f32_4f32_01010101(<4 x float>* %ptr) nounwind uwtable readnone ssp {
33; SSE2-LABEL: load_splat_8f32_4f32_01010101:
34; SSE2:       # %bb.0: # %entry
35; SSE2-NEXT:    movaps (%rdi), %xmm0
36; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
37; SSE2-NEXT:    movaps %xmm0, %xmm1
38; SSE2-NEXT:    retq
39;
40; SSE42-LABEL: load_splat_8f32_4f32_01010101:
41; SSE42:       # %bb.0: # %entry
42; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
43; SSE42-NEXT:    movapd %xmm0, %xmm1
44; SSE42-NEXT:    retq
45;
46; AVX1-LABEL: load_splat_8f32_4f32_01010101:
47; AVX1:       # %bb.0: # %entry
48; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
49; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
50; AVX1-NEXT:    retq
51;
52; AVX2-LABEL: load_splat_8f32_4f32_01010101:
53; AVX2:       # %bb.0: # %entry
54; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
55; AVX2-NEXT:    retq
56;
57; AVX512-LABEL: load_splat_8f32_4f32_01010101:
58; AVX512:       # %bb.0: # %entry
59; AVX512-NEXT:    vbroadcastsd (%rdi), %ymm0
60; AVX512-NEXT:    retq
61entry:
62  %ld = load <4 x float>, <4 x float>* %ptr
63  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
64  ret <8 x float> %ret
65}
66
67define <8 x float> @load_splat_8f32_8f32_01010101(<8 x float>* %ptr) nounwind uwtable readnone ssp {
68; SSE2-LABEL: load_splat_8f32_8f32_01010101:
69; SSE2:       # %bb.0: # %entry
70; SSE2-NEXT:    movaps (%rdi), %xmm0
71; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
72; SSE2-NEXT:    movaps %xmm0, %xmm1
73; SSE2-NEXT:    retq
74;
75; SSE42-LABEL: load_splat_8f32_8f32_01010101:
76; SSE42:       # %bb.0: # %entry
77; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
78; SSE42-NEXT:    movapd %xmm0, %xmm1
79; SSE42-NEXT:    retq
80;
81; AVX-LABEL: load_splat_8f32_8f32_01010101:
82; AVX:       # %bb.0: # %entry
83; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
84; AVX-NEXT:    retq
85entry:
86  %ld = load <8 x float>, <8 x float>* %ptr
87  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
88  ret <8 x float> %ret
89}
90
91define <4 x i32> @load_splat_4i32_4i32_0101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
92; SSE-LABEL: load_splat_4i32_4i32_0101:
93; SSE:       # %bb.0: # %entry
94; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
95; SSE-NEXT:    retq
96;
97; AVX1-LABEL: load_splat_4i32_4i32_0101:
98; AVX1:       # %bb.0: # %entry
99; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
100; AVX1-NEXT:    retq
101;
102; AVX2-LABEL: load_splat_4i32_4i32_0101:
103; AVX2:       # %bb.0: # %entry
104; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
105; AVX2-NEXT:    retq
106;
107; AVX512-LABEL: load_splat_4i32_4i32_0101:
108; AVX512:       # %bb.0: # %entry
109; AVX512-NEXT:    vpbroadcastq (%rdi), %xmm0
110; AVX512-NEXT:    retq
111entry:
112  %ld = load <4 x i32>, <4 x i32>* %ptr
113  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
114  ret <4 x i32> %ret
115}
116
117define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
118; SSE-LABEL: load_splat_8i32_4i32_01010101:
119; SSE:       # %bb.0: # %entry
120; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
121; SSE-NEXT:    movdqa %xmm0, %xmm1
122; SSE-NEXT:    retq
123;
124; AVX-LABEL: load_splat_8i32_4i32_01010101:
125; AVX:       # %bb.0: # %entry
126; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
127; AVX-NEXT:    retq
128entry:
129  %ld = load <4 x i32>, <4 x i32>* %ptr
130  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
131  ret <8 x i32> %ret
132}
133
134define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
135; SSE-LABEL: load_splat_8i32_8i32_01010101:
136; SSE:       # %bb.0: # %entry
137; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
138; SSE-NEXT:    movdqa %xmm0, %xmm1
139; SSE-NEXT:    retq
140;
141; AVX1-LABEL: load_splat_8i32_8i32_01010101:
142; AVX1:       # %bb.0: # %entry
143; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
144; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
145; AVX1-NEXT:    retq
146;
147; AVX2-LABEL: load_splat_8i32_8i32_01010101:
148; AVX2:       # %bb.0: # %entry
149; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
150; AVX2-NEXT:    retq
151;
152; AVX512-LABEL: load_splat_8i32_8i32_01010101:
153; AVX512:       # %bb.0: # %entry
154; AVX512-NEXT:    vbroadcastsd (%rdi), %ymm0
155; AVX512-NEXT:    retq
156entry:
157  %ld = load <8 x i32>, <8 x i32>* %ptr
158  %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
159  ret <8 x i32> %ret
160}
161
162define <8 x i16> @load_splat_8i16_8i16_01010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
163; SSE-LABEL: load_splat_8i16_8i16_01010101:
164; SSE:       # %bb.0: # %entry
165; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
166; SSE-NEXT:    retq
167;
168; AVX1-LABEL: load_splat_8i16_8i16_01010101:
169; AVX1:       # %bb.0: # %entry
170; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
171; AVX1-NEXT:    retq
172;
173; AVX2-LABEL: load_splat_8i16_8i16_01010101:
174; AVX2:       # %bb.0: # %entry
175; AVX2-NEXT:    vbroadcastss (%rdi), %xmm0
176; AVX2-NEXT:    retq
177;
178; AVX512-LABEL: load_splat_8i16_8i16_01010101:
179; AVX512:       # %bb.0: # %entry
180; AVX512-NEXT:    vbroadcastss (%rdi), %xmm0
181; AVX512-NEXT:    retq
182entry:
183  %ld = load <8 x i16>, <8 x i16>* %ptr
184  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
185  ret <8 x i16> %ret
186}
187
188define <8 x i16> @load_splat_8i16_8i16_01230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
189; SSE-LABEL: load_splat_8i16_8i16_01230123:
190; SSE:       # %bb.0: # %entry
191; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
192; SSE-NEXT:    retq
193;
194; AVX1-LABEL: load_splat_8i16_8i16_01230123:
195; AVX1:       # %bb.0: # %entry
196; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
197; AVX1-NEXT:    retq
198;
199; AVX2-LABEL: load_splat_8i16_8i16_01230123:
200; AVX2:       # %bb.0: # %entry
201; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
202; AVX2-NEXT:    retq
203;
204; AVX512-LABEL: load_splat_8i16_8i16_01230123:
205; AVX512:       # %bb.0: # %entry
206; AVX512-NEXT:    vpbroadcastq (%rdi), %xmm0
207; AVX512-NEXT:    retq
208entry:
209  %ld = load <8 x i16>, <8 x i16>* %ptr
210  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
211  ret <8 x i16> %ret
212}
213
214define <16 x i16> @load_splat_16i16_8i16_0101010101010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
215; SSE-LABEL: load_splat_16i16_8i16_0101010101010101:
216; SSE:       # %bb.0: # %entry
217; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
218; SSE-NEXT:    movdqa %xmm0, %xmm1
219; SSE-NEXT:    retq
220;
221; AVX1-LABEL: load_splat_16i16_8i16_0101010101010101:
222; AVX1:       # %bb.0: # %entry
223; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
224; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
225; AVX1-NEXT:    retq
226;
227; AVX2-LABEL: load_splat_16i16_8i16_0101010101010101:
228; AVX2:       # %bb.0: # %entry
229; AVX2-NEXT:    vbroadcastss (%rdi), %ymm0
230; AVX2-NEXT:    retq
231;
232; AVX512-LABEL: load_splat_16i16_8i16_0101010101010101:
233; AVX512:       # %bb.0: # %entry
234; AVX512-NEXT:    vbroadcastss (%rdi), %ymm0
235; AVX512-NEXT:    retq
236entry:
237  %ld = load <8 x i16>, <8 x i16>* %ptr
238  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
239  ret <16 x i16> %ret
240}
241
242define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
243; SSE-LABEL: load_splat_16i16_8i16_0123012301230123:
244; SSE:       # %bb.0: # %entry
245; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
246; SSE-NEXT:    movdqa %xmm0, %xmm1
247; SSE-NEXT:    retq
248;
249; AVX-LABEL: load_splat_16i16_8i16_0123012301230123:
250; AVX:       # %bb.0: # %entry
251; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
252; AVX-NEXT:    retq
253entry:
254  %ld = load <8 x i16>, <8 x i16>* %ptr
255  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
256  ret <16 x i16> %ret
257}
258
259define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
260; SSE-LABEL: load_splat_16i16_16i16_0101010101010101:
261; SSE:       # %bb.0: # %entry
262; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
263; SSE-NEXT:    movdqa %xmm0, %xmm1
264; SSE-NEXT:    retq
265;
266; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101:
267; AVX1:       # %bb.0: # %entry
268; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
269; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
270; AVX1-NEXT:    retq
271;
272; AVX2-LABEL: load_splat_16i16_16i16_0101010101010101:
273; AVX2:       # %bb.0: # %entry
274; AVX2-NEXT:    vbroadcastss (%rdi), %ymm0
275; AVX2-NEXT:    retq
276;
277; AVX512-LABEL: load_splat_16i16_16i16_0101010101010101:
278; AVX512:       # %bb.0: # %entry
279; AVX512-NEXT:    vbroadcastss (%rdi), %ymm0
280; AVX512-NEXT:    retq
281entry:
282  %ld = load <16 x i16>, <16 x i16>* %ptr
283  %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
284  ret <16 x i16> %ret
285}
286
287define <16 x i16> @load_splat_16i16_16i16_0123012301230123(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
288; SSE-LABEL: load_splat_16i16_16i16_0123012301230123:
289; SSE:       # %bb.0: # %entry
290; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
291; SSE-NEXT:    movdqa %xmm0, %xmm1
292; SSE-NEXT:    retq
293;
294; AVX-LABEL: load_splat_16i16_16i16_0123012301230123:
295; AVX:       # %bb.0: # %entry
296; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
297; AVX-NEXT:    retq
298entry:
299  %ld = load <16 x i16>, <16 x i16>* %ptr
300  %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
301  ret <16 x i16> %ret
302}
303
304define <16 x i8> @load_splat_16i8_16i8_0101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
305; SSE-LABEL: load_splat_16i8_16i8_0101010101010101:
306; SSE:       # %bb.0: # %entry
307; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
308; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
309; SSE-NEXT:    retq
310;
311; AVX1-LABEL: load_splat_16i8_16i8_0101010101010101:
312; AVX1:       # %bb.0: # %entry
313; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
314; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
315; AVX1-NEXT:    retq
316;
317; AVX2-LABEL: load_splat_16i8_16i8_0101010101010101:
318; AVX2:       # %bb.0: # %entry
319; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
320; AVX2-NEXT:    retq
321;
322; AVX512-LABEL: load_splat_16i8_16i8_0101010101010101:
323; AVX512:       # %bb.0: # %entry
324; AVX512-NEXT:    vpbroadcastw (%rdi), %xmm0
325; AVX512-NEXT:    retq
326entry:
327  %ld = load <16 x i8>, <16 x i8>* %ptr
328  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
329  ret <16 x i8> %ret
330}
331
332define <16 x i8> @load_splat_16i8_16i8_0123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
333; SSE-LABEL: load_splat_16i8_16i8_0123012301230123:
334; SSE:       # %bb.0: # %entry
335; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
336; SSE-NEXT:    retq
337;
338; AVX1-LABEL: load_splat_16i8_16i8_0123012301230123:
339; AVX1:       # %bb.0: # %entry
340; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
341; AVX1-NEXT:    retq
342;
343; AVX2-LABEL: load_splat_16i8_16i8_0123012301230123:
344; AVX2:       # %bb.0: # %entry
345; AVX2-NEXT:    vbroadcastss (%rdi), %xmm0
346; AVX2-NEXT:    retq
347;
348; AVX512-LABEL: load_splat_16i8_16i8_0123012301230123:
349; AVX512:       # %bb.0: # %entry
350; AVX512-NEXT:    vbroadcastss (%rdi), %xmm0
351; AVX512-NEXT:    retq
352entry:
353  %ld = load <16 x i8>, <16 x i8>* %ptr
354  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
355  ret <16 x i8> %ret
356}
357
358define <16 x i8> @load_splat_16i8_16i8_0123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
359; SSE-LABEL: load_splat_16i8_16i8_0123456701234567:
360; SSE:       # %bb.0: # %entry
361; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
362; SSE-NEXT:    retq
363;
364; AVX1-LABEL: load_splat_16i8_16i8_0123456701234567:
365; AVX1:       # %bb.0: # %entry
366; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
367; AVX1-NEXT:    retq
368;
369; AVX2-LABEL: load_splat_16i8_16i8_0123456701234567:
370; AVX2:       # %bb.0: # %entry
371; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
372; AVX2-NEXT:    retq
373;
374; AVX512-LABEL: load_splat_16i8_16i8_0123456701234567:
375; AVX512:       # %bb.0: # %entry
376; AVX512-NEXT:    vpbroadcastq (%rdi), %xmm0
377; AVX512-NEXT:    retq
378entry:
379  %ld = load <16 x i8>, <16 x i8>* %ptr
380  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
381  ret <16 x i8> %ret
382}
383
384define <32 x i8> @load_splat_32i8_16i8_01010101010101010101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
385; SSE-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
386; SSE:       # %bb.0: # %entry
387; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
388; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
389; SSE-NEXT:    movdqa %xmm0, %xmm1
390; SSE-NEXT:    retq
391;
392; AVX1-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
393; AVX1:       # %bb.0: # %entry
394; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
395; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
396; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
397; AVX1-NEXT:    retq
398;
399; AVX2-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
400; AVX2:       # %bb.0: # %entry
401; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
402; AVX2-NEXT:    retq
403;
404; AVX512-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101:
405; AVX512:       # %bb.0: # %entry
406; AVX512-NEXT:    vpbroadcastw (%rdi), %ymm0
407; AVX512-NEXT:    retq
408entry:
409  %ld = load <16 x i8>, <16 x i8>* %ptr
410  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
411  ret <32 x i8> %ret
412}
413
414define <32 x i8> @load_splat_32i8_16i8_01230123012301230123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
415; SSE-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
416; SSE:       # %bb.0: # %entry
417; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
418; SSE-NEXT:    movdqa %xmm0, %xmm1
419; SSE-NEXT:    retq
420;
421; AVX1-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
422; AVX1:       # %bb.0: # %entry
423; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
424; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
425; AVX1-NEXT:    retq
426;
427; AVX2-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
428; AVX2:       # %bb.0: # %entry
429; AVX2-NEXT:    vbroadcastss (%rdi), %ymm0
430; AVX2-NEXT:    retq
431;
432; AVX512-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
433; AVX512:       # %bb.0: # %entry
434; AVX512-NEXT:    vbroadcastss (%rdi), %ymm0
435; AVX512-NEXT:    retq
436entry:
437  %ld = load <16 x i8>, <16 x i8>* %ptr
438  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
439  ret <32 x i8> %ret
440}
441
442define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
443; SSE-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
444; SSE:       # %bb.0: # %entry
445; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
446; SSE-NEXT:    movdqa %xmm0, %xmm1
447; SSE-NEXT:    retq
448;
449; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
450; AVX:       # %bb.0: # %entry
451; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
452; AVX-NEXT:    retq
453entry:
454  %ld = load <16 x i8>, <16 x i8>* %ptr
455  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
456  ret <32 x i8> %ret
457}
458
459define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
460; SSE-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
461; SSE:       # %bb.0: # %entry
462; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
463; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
464; SSE-NEXT:    movdqa %xmm0, %xmm1
465; SSE-NEXT:    retq
466;
467; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
468; AVX1:       # %bb.0: # %entry
469; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7]
470; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
471; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
472; AVX1-NEXT:    retq
473;
474; AVX2-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
475; AVX2:       # %bb.0: # %entry
476; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
477; AVX2-NEXT:    retq
478;
479; AVX512-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
480; AVX512:       # %bb.0: # %entry
481; AVX512-NEXT:    vpbroadcastw (%rdi), %ymm0
482; AVX512-NEXT:    retq
483entry:
484  %ld = load <32 x i8>, <32 x i8>* %ptr
485  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
486  ret <32 x i8> %ret
487}
488
489define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
490; SSE-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
491; SSE:       # %bb.0: # %entry
492; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
493; SSE-NEXT:    movdqa %xmm0, %xmm1
494; SSE-NEXT:    retq
495;
496; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
497; AVX:       # %bb.0: # %entry
498; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
499; AVX-NEXT:    retq
500entry:
501  %ld = load <32 x i8>, <32 x i8>* %ptr
502  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
503  ret <32 x i8> %ret
504}
505
506define <32 x i8> @load_splat_32i8_32i8_01234567012345670123456701234567(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
507; SSE-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
508; SSE:       # %bb.0: # %entry
509; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
510; SSE-NEXT:    movdqa %xmm0, %xmm1
511; SSE-NEXT:    retq
512;
513; AVX-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567:
514; AVX:       # %bb.0: # %entry
515; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
516; AVX-NEXT:    retq
517entry:
518  %ld = load <32 x i8>, <32 x i8>* %ptr
519  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
520  ret <32 x i8> %ret
521}
522
523define <4 x float> @load_splat_4f32_8f32_0000(<8 x float>* %ptr) nounwind uwtable readnone ssp {
524; SSE-LABEL: load_splat_4f32_8f32_0000:
525; SSE:       # %bb.0: # %entry
526; SSE-NEXT:    movaps (%rdi), %xmm0
527; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
528; SSE-NEXT:    retq
529;
530; AVX-LABEL: load_splat_4f32_8f32_0000:
531; AVX:       # %bb.0: # %entry
532; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
533; AVX-NEXT:    retq
534entry:
535  %ld = load <8 x float>, <8 x float>* %ptr
536  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer
537  ret <4 x float> %ret
538}
539
540define <8 x float> @load_splat_8f32_16f32_89898989(<16 x float>* %ptr) nounwind uwtable readnone ssp {
541; SSE2-LABEL: load_splat_8f32_16f32_89898989:
542; SSE2:       # %bb.0: # %entry
543; SSE2-NEXT:    movaps 32(%rdi), %xmm0
544; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
545; SSE2-NEXT:    movaps %xmm0, %xmm1
546; SSE2-NEXT:    retq
547;
548; SSE42-LABEL: load_splat_8f32_16f32_89898989:
549; SSE42:       # %bb.0: # %entry
550; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
551; SSE42-NEXT:    movapd %xmm0, %xmm1
552; SSE42-NEXT:    retq
553;
554; AVX-LABEL: load_splat_8f32_16f32_89898989:
555; AVX:       # %bb.0: # %entry
556; AVX-NEXT:    vbroadcastsd 32(%rdi), %ymm0
557; AVX-NEXT:    retq
558entry:
559  %ld = load <16 x float>, <16 x float>* %ptr
560  %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
561  ret <8 x float> %ret
562}
563
564; PR34394
565define <4 x i32> @load_splat_4i32_2i32_0101(<2 x i32>* %vp) {
566; SSE-LABEL: load_splat_4i32_2i32_0101:
567; SSE:       # %bb.0:
568; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
569; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
570; SSE-NEXT:    retq
571;
572; AVX1-LABEL: load_splat_4i32_2i32_0101:
573; AVX1:       # %bb.0:
574; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
575; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
576; AVX1-NEXT:    retq
577;
578; AVX2-LABEL: load_splat_4i32_2i32_0101:
579; AVX2:       # %bb.0:
580; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
581; AVX2-NEXT:    retq
582;
583; AVX512-LABEL: load_splat_4i32_2i32_0101:
584; AVX512:       # %bb.0:
585; AVX512-NEXT:    vpbroadcastq (%rdi), %xmm0
586; AVX512-NEXT:    retq
587  %vec = load <2 x i32>, <2 x i32>* %vp
588  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
589  ret <4 x i32> %res
590}
591
592define <8 x i32> @load_splat_8i32_2i32_0101(<2 x i32>* %vp) {
593; SSE-LABEL: load_splat_8i32_2i32_0101:
594; SSE:       # %bb.0:
595; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
596; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
597; SSE-NEXT:    movdqa %xmm0, %xmm1
598; SSE-NEXT:    retq
599;
600; AVX1-LABEL: load_splat_8i32_2i32_0101:
601; AVX1:       # %bb.0:
602; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
603; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
604; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
605; AVX1-NEXT:    retq
606;
607; AVX2-LABEL: load_splat_8i32_2i32_0101:
608; AVX2:       # %bb.0:
609; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
610; AVX2-NEXT:    retq
611;
612; AVX512-LABEL: load_splat_8i32_2i32_0101:
613; AVX512:       # %bb.0:
614; AVX512-NEXT:    vbroadcastsd (%rdi), %ymm0
615; AVX512-NEXT:    retq
616  %vec = load <2 x i32>, <2 x i32>* %vp
617  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
618  ret <8 x i32> %res
619}
620
621define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) {
622; SSE-LABEL: load_splat_16i32_2i32_0101:
623; SSE:       # %bb.0:
624; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
625; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
626; SSE-NEXT:    movdqa %xmm0, %xmm1
627; SSE-NEXT:    movdqa %xmm0, %xmm2
628; SSE-NEXT:    movdqa %xmm0, %xmm3
629; SSE-NEXT:    retq
630;
631; AVX1-LABEL: load_splat_16i32_2i32_0101:
632; AVX1:       # %bb.0:
633; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
634; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
635; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
636; AVX1-NEXT:    vmovaps %ymm0, %ymm1
637; AVX1-NEXT:    retq
638;
639; AVX2-LABEL: load_splat_16i32_2i32_0101:
640; AVX2:       # %bb.0:
641; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
642; AVX2-NEXT:    vmovaps %ymm0, %ymm1
643; AVX2-NEXT:    retq
644;
645; AVX512-LABEL: load_splat_16i32_2i32_0101:
646; AVX512:       # %bb.0:
647; AVX512-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
648; AVX512-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
649; AVX512-NEXT:    vpermd %zmm0, %zmm1, %zmm0
650; AVX512-NEXT:    retq
651  %vec = load <2 x i32>, <2 x i32>* %vp
652  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
653  ret <16 x i32> %res
654}
655