• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
4
5define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind {
6; X32-LABEL: test_broadcast_2f64_4f64:
7; X32:       # %bb.0:
8; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
10; X32-NEXT:    vaddpd {{\.LCPI.*}}, %ymm0, %ymm0
11; X32-NEXT:    retl
12;
13; X64-LABEL: test_broadcast_2f64_4f64:
14; X64:       # %bb.0:
15; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
16; X64-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
17; X64-NEXT:    retq
18 %1 = load <2 x double>, <2 x double> *%p
19 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
20 %3 = fadd <4 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0>
21 ret <4 x double> %3
22}
23
24define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind {
25; X32-LABEL: test_broadcast_2i64_4i64:
26; X32:       # %bb.0:
27; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
28; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
29; X32-NEXT:    vpaddq {{\.LCPI.*}}, %ymm0, %ymm0
30; X32-NEXT:    retl
31;
32; X64-LABEL: test_broadcast_2i64_4i64:
33; X64:       # %bb.0:
34; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
35; X64-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
36; X64-NEXT:    retq
37 %1 = load <2 x i64>, <2 x i64> *%p
38 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
39 %3 = add <4 x i64> %2, <i64 1, i64 2, i64 3, i64 4>
40 ret <4 x i64> %3
41}
42
43define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind {
44; X32-LABEL: test_broadcast_4f32_8f32:
45; X32:       # %bb.0:
46; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
47; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
48; X32-NEXT:    vaddps {{\.LCPI.*}}, %ymm0, %ymm0
49; X32-NEXT:    retl
50;
51; X64-LABEL: test_broadcast_4f32_8f32:
52; X64:       # %bb.0:
53; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
54; X64-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
55; X64-NEXT:    retq
56 %1 = load <4 x float>, <4 x float> *%p
57 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
58 %3 = fadd <8 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
59 ret <8 x float> %3
60}
61
62define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind {
63; X32-LABEL: test_broadcast_4i32_8i32:
64; X32:       # %bb.0:
65; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
66; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
67; X32-NEXT:    vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
68; X32-NEXT:    retl
69;
70; X64-LABEL: test_broadcast_4i32_8i32:
71; X64:       # %bb.0:
72; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
73; X64-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
74; X64-NEXT:    retq
75 %1 = load <4 x i32>, <4 x i32> *%p
76 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
77 %3 = add <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
78 ret <8 x i32> %3
79}
80
81define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind {
82; X32-LABEL: test_broadcast_8i16_16i16:
83; X32:       # %bb.0:
84; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
85; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
86; X32-NEXT:    vpaddw {{\.LCPI.*}}, %ymm0, %ymm0
87; X32-NEXT:    retl
88;
89; X64-LABEL: test_broadcast_8i16_16i16:
90; X64:       # %bb.0:
91; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
92; X64-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
93; X64-NEXT:    retq
94 %1 = load <8 x i16>, <8 x i16> *%p
95 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
96 %3  = add <16 x i16> %2, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>
97 ret <16 x i16> %3
98}
99
100define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
101; X32-LABEL: test_broadcast_16i8_32i8:
102; X32:       # %bb.0:
103; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
104; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
105; X32-NEXT:    vpaddb {{\.LCPI.*}}, %ymm0, %ymm0
106; X32-NEXT:    retl
107;
108; X64-LABEL: test_broadcast_16i8_32i8:
109; X64:       # %bb.0:
110; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
111; X64-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
112; X64-NEXT:    retq
113 %1 = load <16 x i8>, <16 x i8> *%p
114 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
115 %3 = add <32 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32>
116 ret <32 x i8> %3
117}
118
119define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
120; X32-LABEL: test_broadcast_2f64_4f64_reuse:
121; X32:       # %bb.0:
122; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
123; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
124; X32-NEXT:    vmovapd (%ecx), %xmm1
125; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
126; X32-NEXT:    vaddpd {{\.LCPI.*}}, %ymm0, %ymm0
127; X32-NEXT:    vmovapd %xmm1, (%eax)
128; X32-NEXT:    retl
129;
130; X64-LABEL: test_broadcast_2f64_4f64_reuse:
131; X64:       # %bb.0:
132; X64-NEXT:    vmovapd (%rdi), %xmm1
133; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
134; X64-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
135; X64-NEXT:    vmovapd %xmm1, (%rsi)
136; X64-NEXT:    retq
137 %1 = load <2 x double>, <2 x double>* %p0
138 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
139 %3 = fadd <4 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0>
140 store <2 x double> %1, <2 x double>* %p1
141 ret <4 x double> %3
142}
143
144define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
145; X32-LABEL: test_broadcast_2i64_4i64_reuse:
146; X32:       # %bb.0:
147; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
148; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
149; X32-NEXT:    vmovdqa (%ecx), %xmm1
150; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
151; X32-NEXT:    vpaddq {{\.LCPI.*}}, %ymm0, %ymm0
152; X32-NEXT:    vmovdqa %xmm1, (%eax)
153; X32-NEXT:    retl
154;
155; X64-LABEL: test_broadcast_2i64_4i64_reuse:
156; X64:       # %bb.0:
157; X64-NEXT:    vmovdqa (%rdi), %xmm1
158; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
159; X64-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
160; X64-NEXT:    vmovdqa %xmm1, (%rsi)
161; X64-NEXT:    retq
162 %1 = load <2 x i64>, <2 x i64>* %p0
163 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
164 %3 = add <4 x i64> %2, <i64 1, i64 2, i64 3, i64 4>
165 store <2 x i64> %1, <2 x i64>* %p1
166 ret <4 x i64> %3
167}
168
169define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
170; X32-LABEL: test_broadcast_4f32_8f32_reuse:
171; X32:       # %bb.0:
172; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
173; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
174; X32-NEXT:    vmovaps (%ecx), %xmm1
175; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
176; X32-NEXT:    vaddps {{\.LCPI.*}}, %ymm0, %ymm0
177; X32-NEXT:    vmovaps %xmm1, (%eax)
178; X32-NEXT:    retl
179;
180; X64-LABEL: test_broadcast_4f32_8f32_reuse:
181; X64:       # %bb.0:
182; X64-NEXT:    vmovaps (%rdi), %xmm1
183; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
184; X64-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
185; X64-NEXT:    vmovaps %xmm1, (%rsi)
186; X64-NEXT:    retq
187 %1 = load <4 x float>, <4 x float>* %p0
188 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
189 %3 = fadd <8 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
190 store <4 x float> %1, <4 x float>* %p1
191 ret <8 x float> %3
192}
193
194define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
195; X32-LABEL: test_broadcast_4i32_8i32_reuse:
196; X32:       # %bb.0:
197; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
198; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
199; X32-NEXT:    vmovdqa (%ecx), %xmm1
200; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
201; X32-NEXT:    vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
202; X32-NEXT:    vmovdqa %xmm1, (%eax)
203; X32-NEXT:    retl
204;
205; X64-LABEL: test_broadcast_4i32_8i32_reuse:
206; X64:       # %bb.0:
207; X64-NEXT:    vmovdqa (%rdi), %xmm1
208; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
209; X64-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
210; X64-NEXT:    vmovdqa %xmm1, (%rsi)
211; X64-NEXT:    retq
212 %1 = load <4 x i32>, <4 x i32>* %p0
213 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
214 %3 = add <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
215 store <4 x i32> %1, <4 x i32>* %p1
216 ret <8 x i32> %3
217}
218
219define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind {
220; X32-LABEL: test_broadcast_8i16_16i16_reuse:
221; X32:       # %bb.0:
222; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
223; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
224; X32-NEXT:    vmovdqa (%ecx), %xmm1
225; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
226; X32-NEXT:    vpaddw {{\.LCPI.*}}, %ymm0, %ymm0
227; X32-NEXT:    vmovdqa %xmm1, (%eax)
228; X32-NEXT:    retl
229;
230; X64-LABEL: test_broadcast_8i16_16i16_reuse:
231; X64:       # %bb.0:
232; X64-NEXT:    vmovdqa (%rdi), %xmm1
233; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
234; X64-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
235; X64-NEXT:    vmovdqa %xmm1, (%rsi)
236; X64-NEXT:    retq
237 %1 = load <8 x i16>, <8 x i16> *%p0
238 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
239 %3  = add <16 x i16> %2, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>
240 store <8 x i16> %1, <8 x i16>* %p1
241 ret <16 x i16> %3
242}
243
244define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind {
245; X32-LABEL: test_broadcast_16i8_32i8_reuse:
246; X32:       # %bb.0:
247; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
248; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
249; X32-NEXT:    vmovdqa (%ecx), %xmm1
250; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
251; X32-NEXT:    vpaddb {{\.LCPI.*}}, %ymm0, %ymm0
252; X32-NEXT:    vmovdqa %xmm1, (%eax)
253; X32-NEXT:    retl
254;
255; X64-LABEL: test_broadcast_16i8_32i8_reuse:
256; X64:       # %bb.0:
257; X64-NEXT:    vmovdqa (%rdi), %xmm1
258; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
259; X64-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
260; X64-NEXT:    vmovdqa %xmm1, (%rsi)
261; X64-NEXT:    retq
262 %1 = load <16 x i8>, <16 x i8> *%p0
263 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
264 %3 = add <32 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32>
265 store <16 x i8> %1, <16 x i8>* %p1
266 ret <32 x i8> %3
267}
268
269define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
270; X32-LABEL: PR29088:
271; X32:       # %bb.0:
272; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
273; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
274; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
275; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
276; X32-NEXT:    vmovaps %ymm1, (%eax)
277; X32-NEXT:    retl
278;
279; X64-LABEL: PR29088:
280; X64:       # %bb.0:
281; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
282; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
283; X64-NEXT:    vmovaps %ymm1, (%rsi)
284; X64-NEXT:    retq
285  %ld = load <4 x i32>, <4 x i32>* %p0
286  store <8 x float> zeroinitializer, <8 x float>* %p1
287  %shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
288  ret <8 x i32> %shuf
289}
290