• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx    | FileCheck %s --check-prefixes=AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2   | FileCheck %s --check-prefixes=AVX,AVX2
5
6define <16 x i8> @load8_ins_elt0_v16i8(i8* %p) nounwind {
7; SSE-LABEL: load8_ins_elt0_v16i8:
8; SSE:       # %bb.0:
9; SSE-NEXT:    movzbl (%rdi), %eax
10; SSE-NEXT:    movd %eax, %xmm0
11; SSE-NEXT:    retq
12;
13; AVX1-LABEL: load8_ins_elt0_v16i8:
14; AVX1:       # %bb.0:
15; AVX1-NEXT:    movzbl (%rdi), %eax
16; AVX1-NEXT:    vmovd %eax, %xmm0
17; AVX1-NEXT:    retq
18;
19; AVX2-LABEL: load8_ins_elt0_v16i8:
20; AVX2:       # %bb.0:
21; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
22; AVX2-NEXT:    retq
23  %x = load i8, i8* %p
24  %ins = insertelement <16 x i8> undef, i8 %x, i32 0
25  ret <16 x i8> %ins
26}
27
28define <8 x i16> @load16_ins_elt0_v8i16(i16* %p) nounwind {
29; SSE-LABEL: load16_ins_elt0_v8i16:
30; SSE:       # %bb.0:
31; SSE-NEXT:    movzwl (%rdi), %eax
32; SSE-NEXT:    movd %eax, %xmm0
33; SSE-NEXT:    retq
34;
35; AVX1-LABEL: load16_ins_elt0_v8i16:
36; AVX1:       # %bb.0:
37; AVX1-NEXT:    movzwl (%rdi), %eax
38; AVX1-NEXT:    vmovd %eax, %xmm0
39; AVX1-NEXT:    retq
40;
41; AVX2-LABEL: load16_ins_elt0_v8i16:
42; AVX2:       # %bb.0:
43; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
44; AVX2-NEXT:    retq
45  %x = load i16, i16* %p
46  %ins = insertelement <8 x i16> undef, i16 %x, i32 0
47  ret <8 x i16> %ins
48}
49
50define <4 x i32> @load32_ins_elt0_v4i32(i32* %p) nounwind {
51; SSE-LABEL: load32_ins_elt0_v4i32:
52; SSE:       # %bb.0:
53; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
54; SSE-NEXT:    retq
55;
56; AVX-LABEL: load32_ins_elt0_v4i32:
57; AVX:       # %bb.0:
58; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
59; AVX-NEXT:    retq
60  %x = load i32, i32* %p
61  %ins = insertelement <4 x i32> undef, i32 %x, i32 0
62  ret <4 x i32> %ins
63}
64
65define <2 x i64> @load64_ins_elt0_v2i64(i64* %p) nounwind {
66; SSE-LABEL: load64_ins_elt0_v2i64:
67; SSE:       # %bb.0:
68; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
69; SSE-NEXT:    retq
70;
71; AVX-LABEL: load64_ins_elt0_v2i64:
72; AVX:       # %bb.0:
73; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
74; AVX-NEXT:    retq
75  %x = load i64, i64* %p
76  %ins = insertelement <2 x i64> undef, i64 %x, i32 0
77  ret <2 x i64> %ins
78}
79
80define <4 x float> @load32_ins_elt0_v4f32(float* %p) nounwind {
81; SSE-LABEL: load32_ins_elt0_v4f32:
82; SSE:       # %bb.0:
83; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
84; SSE-NEXT:    retq
85;
86; AVX-LABEL: load32_ins_elt0_v4f32:
87; AVX:       # %bb.0:
88; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
89; AVX-NEXT:    retq
90  %x = load float, float* %p
91  %ins = insertelement <4 x float> undef, float %x, i32 0
92  ret <4 x float> %ins
93}
94
95define <2 x double> @load64_ins_elt0_v2f64(double* %p) nounwind {
96; SSE-LABEL: load64_ins_elt0_v2f64:
97; SSE:       # %bb.0:
98; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
99; SSE-NEXT:    retq
100;
101; AVX-LABEL: load64_ins_elt0_v2f64:
102; AVX:       # %bb.0:
103; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
104; AVX-NEXT:    retq
105  %x = load double, double* %p
106  %ins = insertelement <2 x double> undef, double %x, i32 0
107  ret <2 x double> %ins
108}
109
110define <16 x i8> @load8_ins_eltc_v16i8(i8* %p) nounwind {
111; SSE-LABEL: load8_ins_eltc_v16i8:
112; SSE:       # %bb.0:
113; SSE-NEXT:    movzbl (%rdi), %eax
114; SSE-NEXT:    movd %eax, %xmm0
115; SSE-NEXT:    pslld $24, %xmm0
116; SSE-NEXT:    retq
117;
118; AVX1-LABEL: load8_ins_eltc_v16i8:
119; AVX1:       # %bb.0:
120; AVX1-NEXT:    movzbl (%rdi), %eax
121; AVX1-NEXT:    vmovd %eax, %xmm0
122; AVX1-NEXT:    vpslld $24, %xmm0, %xmm0
123; AVX1-NEXT:    retq
124;
125; AVX2-LABEL: load8_ins_eltc_v16i8:
126; AVX2:       # %bb.0:
127; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
128; AVX2-NEXT:    retq
129  %x = load i8, i8* %p
130  %ins = insertelement <16 x i8> undef, i8 %x, i32 3
131  ret <16 x i8> %ins
132}
133
134define <8 x i16> @load16_ins_eltc_v8i16(i16* %p) nounwind {
135; SSE-LABEL: load16_ins_eltc_v8i16:
136; SSE:       # %bb.0:
137; SSE-NEXT:    movzwl (%rdi), %eax
138; SSE-NEXT:    movd %eax, %xmm0
139; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
140; SSE-NEXT:    retq
141;
142; AVX1-LABEL: load16_ins_eltc_v8i16:
143; AVX1:       # %bb.0:
144; AVX1-NEXT:    movzwl (%rdi), %eax
145; AVX1-NEXT:    vmovd %eax, %xmm0
146; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
147; AVX1-NEXT:    retq
148;
149; AVX2-LABEL: load16_ins_eltc_v8i16:
150; AVX2:       # %bb.0:
151; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
152; AVX2-NEXT:    retq
153  %x = load i16, i16* %p
154  %ins = insertelement <8 x i16> undef, i16 %x, i32 5
155  ret <8 x i16> %ins
156}
157
158define <4 x i32> @load32_ins_eltc_v4i32(i32* %p) nounwind {
159; SSE-LABEL: load32_ins_eltc_v4i32:
160; SSE:       # %bb.0:
161; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
162; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: load32_ins_eltc_v4i32:
166; AVX:       # %bb.0:
167; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
168; AVX-NEXT:    retq
169  %x = load i32, i32* %p
170  %ins = insertelement <4 x i32> undef, i32 %x, i32 2
171  ret <4 x i32> %ins
172}
173
174define <2 x i64> @load64_ins_eltc_v2i64(i64* %p) nounwind {
175; SSE-LABEL: load64_ins_eltc_v2i64:
176; SSE:       # %bb.0:
177; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
178; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
179; SSE-NEXT:    retq
180;
181; AVX-LABEL: load64_ins_eltc_v2i64:
182; AVX:       # %bb.0:
183; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
184; AVX-NEXT:    retq
185  %x = load i64, i64* %p
186  %ins = insertelement <2 x i64> undef, i64 %x, i32 1
187  ret <2 x i64> %ins
188}
189
190define <4 x float> @load32_ins_eltc_v4f32(float* %p) nounwind {
191; SSE-LABEL: load32_ins_eltc_v4f32:
192; SSE:       # %bb.0:
193; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
194; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
195; SSE-NEXT:    retq
196;
197; AVX-LABEL: load32_ins_eltc_v4f32:
198; AVX:       # %bb.0:
199; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
200; AVX-NEXT:    retq
201  %x = load float, float* %p
202  %ins = insertelement <4 x float> undef, float %x, i32 3
203  ret <4 x float> %ins
204}
205
206define <2 x double> @load64_ins_eltc_v2f64(double* %p) nounwind {
207; SSE-LABEL: load64_ins_eltc_v2f64:
208; SSE:       # %bb.0:
209; SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
210; SSE-NEXT:    retq
211;
212; AVX-LABEL: load64_ins_eltc_v2f64:
213; AVX:       # %bb.0:
214; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
215; AVX-NEXT:    retq
216  %x = load double, double* %p
217  %ins = insertelement <2 x double> undef, double %x, i32 1
218  ret <2 x double> %ins
219}
220
221define <32 x i8> @load8_ins_elt0_v32i8(i8* %p) nounwind {
222; SSE-LABEL: load8_ins_elt0_v32i8:
223; SSE:       # %bb.0:
224; SSE-NEXT:    movzbl (%rdi), %eax
225; SSE-NEXT:    movd %eax, %xmm0
226; SSE-NEXT:    retq
227;
228; AVX1-LABEL: load8_ins_elt0_v32i8:
229; AVX1:       # %bb.0:
230; AVX1-NEXT:    movzbl (%rdi), %eax
231; AVX1-NEXT:    vmovd %eax, %xmm0
232; AVX1-NEXT:    retq
233;
234; AVX2-LABEL: load8_ins_elt0_v32i8:
235; AVX2:       # %bb.0:
236; AVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
237; AVX2-NEXT:    retq
238  %x = load i8, i8* %p
239  %ins = insertelement <32 x i8> undef, i8 %x, i32 0
240  ret <32 x i8> %ins
241}
242
243define <16 x i16> @load16_ins_elt0_v16i16(i16* %p) nounwind {
244; SSE-LABEL: load16_ins_elt0_v16i16:
245; SSE:       # %bb.0:
246; SSE-NEXT:    movzwl (%rdi), %eax
247; SSE-NEXT:    movd %eax, %xmm0
248; SSE-NEXT:    retq
249;
250; AVX1-LABEL: load16_ins_elt0_v16i16:
251; AVX1:       # %bb.0:
252; AVX1-NEXT:    movzwl (%rdi), %eax
253; AVX1-NEXT:    vmovd %eax, %xmm0
254; AVX1-NEXT:    retq
255;
256; AVX2-LABEL: load16_ins_elt0_v16i16:
257; AVX2:       # %bb.0:
258; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
259; AVX2-NEXT:    retq
260  %x = load i16, i16* %p
261  %ins = insertelement <16 x i16> undef, i16 %x, i32 0
262  ret <16 x i16> %ins
263}
264
265define <8 x i32> @load32_ins_elt0_v8i32(i32* %p) nounwind {
266; SSE-LABEL: load32_ins_elt0_v8i32:
267; SSE:       # %bb.0:
268; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
269; SSE-NEXT:    retq
270;
271; AVX-LABEL: load32_ins_elt0_v8i32:
272; AVX:       # %bb.0:
273; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
274; AVX-NEXT:    retq
275  %x = load i32, i32* %p
276  %ins = insertelement <8 x i32> undef, i32 %x, i32 0
277  ret <8 x i32> %ins
278}
279
280define <4 x i64> @load64_ins_elt0_v4i64(i64* %p) nounwind {
281; SSE-LABEL: load64_ins_elt0_v4i64:
282; SSE:       # %bb.0:
283; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
284; SSE-NEXT:    retq
285;
286; AVX-LABEL: load64_ins_elt0_v4i64:
287; AVX:       # %bb.0:
288; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
289; AVX-NEXT:    retq
290  %x = load i64, i64* %p
291  %ins = insertelement <4 x i64> undef, i64 %x, i32 0
292  ret <4 x i64> %ins
293}
294
295define <8 x float> @load32_ins_elt0_v8f32(float* %p) nounwind {
296; SSE-LABEL: load32_ins_elt0_v8f32:
297; SSE:       # %bb.0:
298; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
299; SSE-NEXT:    retq
300;
301; AVX-LABEL: load32_ins_elt0_v8f32:
302; AVX:       # %bb.0:
303; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
304; AVX-NEXT:    retq
305  %x = load float, float* %p
306  %ins = insertelement <8 x float> undef, float %x, i32 0
307  ret <8 x float> %ins
308}
309
310define <4 x double> @load64_ins_elt0_v4f64(double* %p) nounwind {
311; SSE-LABEL: load64_ins_elt0_v4f64:
312; SSE:       # %bb.0:
313; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
314; SSE-NEXT:    retq
315;
316; AVX-LABEL: load64_ins_elt0_v4f64:
317; AVX:       # %bb.0:
318; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
319; AVX-NEXT:    retq
320  %x = load double, double* %p
321  %ins = insertelement <4 x double> undef, double %x, i32 0
322  ret <4 x double> %ins
323}
324
325define <32 x i8> @load8_ins_eltc_v32i8(i8* %p) nounwind {
326; SSE-LABEL: load8_ins_eltc_v32i8:
327; SSE:       # %bb.0:
328; SSE-NEXT:    movzbl (%rdi), %eax
329; SSE-NEXT:    movd %eax, %xmm1
330; SSE-NEXT:    psllq $40, %xmm1
331; SSE-NEXT:    retq
332;
333; AVX1-LABEL: load8_ins_eltc_v32i8:
334; AVX1:       # %bb.0:
335; AVX1-NEXT:    movzbl (%rdi), %eax
336; AVX1-NEXT:    vmovd %eax, %xmm0
337; AVX1-NEXT:    vpsllq $40, %xmm0, %xmm0
338; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
339; AVX1-NEXT:    retq
340;
341; AVX2-LABEL: load8_ins_eltc_v32i8:
342; AVX2:       # %bb.0:
343; AVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
344; AVX2-NEXT:    retq
345  %x = load i8, i8* %p
346  %ins = insertelement <32 x i8> undef, i8 %x, i32 21
347  ret <32 x i8> %ins
348}
349
350define <16 x i16> @load16_ins_eltc_v16i16(i16* %p) nounwind {
351; SSE-LABEL: load16_ins_eltc_v16i16:
352; SSE:       # %bb.0:
353; SSE-NEXT:    movzwl (%rdi), %eax
354; SSE-NEXT:    movd %eax, %xmm1
355; SSE-NEXT:    psllq $48, %xmm1
356; SSE-NEXT:    retq
357;
358; AVX1-LABEL: load16_ins_eltc_v16i16:
359; AVX1:       # %bb.0:
360; AVX1-NEXT:    movzwl (%rdi), %eax
361; AVX1-NEXT:    vmovd %eax, %xmm0
362; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
363; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
364; AVX1-NEXT:    retq
365;
366; AVX2-LABEL: load16_ins_eltc_v16i16:
367; AVX2:       # %bb.0:
368; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
369; AVX2-NEXT:    retq
370  %x = load i16, i16* %p
371  %ins = insertelement <16 x i16> undef, i16 %x, i32 11
372  ret <16 x i16> %ins
373}
374
375define <8 x i32> @load32_ins_eltc_v8i32(i32* %p) nounwind {
376; SSE-LABEL: load32_ins_eltc_v8i32:
377; SSE:       # %bb.0:
378; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
379; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
380; SSE-NEXT:    retq
381;
382; AVX-LABEL: load32_ins_eltc_v8i32:
383; AVX:       # %bb.0:
384; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
385; AVX-NEXT:    retq
386  %x = load i32, i32* %p
387  %ins = insertelement <8 x i32> undef, i32 %x, i32 7
388  ret <8 x i32> %ins
389}
390
391define <4 x i64> @load64_ins_eltc_v4i64(i64* %p) nounwind {
392; SSE-LABEL: load64_ins_eltc_v4i64:
393; SSE:       # %bb.0:
394; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
395; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
396; SSE-NEXT:    retq
397;
398; AVX-LABEL: load64_ins_eltc_v4i64:
399; AVX:       # %bb.0:
400; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
401; AVX-NEXT:    retq
402  %x = load i64, i64* %p
403  %ins = insertelement <4 x i64> undef, i64 %x, i32 3
404  ret <4 x i64> %ins
405}
406
407define <8 x float> @load32_ins_eltc_v8f32(float* %p) nounwind {
408; SSE-LABEL: load32_ins_eltc_v8f32:
409; SSE:       # %bb.0:
410; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
411; SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2]
412; SSE-NEXT:    retq
413;
414; AVX-LABEL: load32_ins_eltc_v8f32:
415; AVX:       # %bb.0:
416; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
417; AVX-NEXT:    retq
418  %x = load float, float* %p
419  %ins = insertelement <8 x float> undef, float %x, i32 5
420  ret <8 x float> %ins
421}
422
423define <4 x double> @load64_ins_eltc_v4f64(double* %p) nounwind {
424; SSE-LABEL: load64_ins_eltc_v4f64:
425; SSE:       # %bb.0:
426; SSE-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0]
427; SSE-NEXT:    retq
428;
429; AVX-LABEL: load64_ins_eltc_v4f64:
430; AVX:       # %bb.0:
431; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
432; AVX-NEXT:    retq
433  %x = load double, double* %p
434  %ins = insertelement <4 x double> undef, double %x, i32 3
435  ret <4 x double> %ins
436}
437
438