• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind {
6; X32-LABEL: test_broadcast_2f64_4f64:
7; X32:       # %bb.0:
8; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
10; X32-NEXT:    retl
11;
12; X64-LABEL: test_broadcast_2f64_4f64:
13; X64:       # %bb.0:
14; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
15; X64-NEXT:    retq
16 %1 = load <2 x double>, <2 x double> *%p
17 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
18 ret <4 x double> %2
19}
20
21define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind {
22; X32-LABEL: test_broadcast_2i64_4i64:
23; X32:       # %bb.0:
24; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
25; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
26; X32-NEXT:    retl
27;
28; X64-LABEL: test_broadcast_2i64_4i64:
29; X64:       # %bb.0:
30; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
31; X64-NEXT:    retq
32 %1 = load <2 x i64>, <2 x i64> *%p
33 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
34 ret <4 x i64> %2
35}
36
37define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind {
38; X32-LABEL: test_broadcast_4f32_8f32:
39; X32:       # %bb.0:
40; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
42; X32-NEXT:    retl
43;
44; X64-LABEL: test_broadcast_4f32_8f32:
45; X64:       # %bb.0:
46; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
47; X64-NEXT:    retq
48 %1 = load <4 x float>, <4 x float> *%p
49 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
50 ret <8 x float> %2
51}
52
53define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind {
54; X32-LABEL: test_broadcast_4i32_8i32:
55; X32:       # %bb.0:
56; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
57; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
58; X32-NEXT:    retl
59;
60; X64-LABEL: test_broadcast_4i32_8i32:
61; X64:       # %bb.0:
62; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
63; X64-NEXT:    retq
64 %1 = load <4 x i32>, <4 x i32> *%p
65 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
66 ret <8 x i32> %2
67}
68
69define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind {
70; X32-LABEL: test_broadcast_8i16_16i16:
71; X32:       # %bb.0:
72; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
73; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
74; X32-NEXT:    retl
75;
76; X64-LABEL: test_broadcast_8i16_16i16:
77; X64:       # %bb.0:
78; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
79; X64-NEXT:    retq
80 %1 = load <8 x i16>, <8 x i16> *%p
81 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
82 ret <16 x i16> %2
83}
84
85define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
86; X32-LABEL: test_broadcast_16i8_32i8:
87; X32:       # %bb.0:
88; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
89; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
90; X32-NEXT:    retl
91;
92; X64-LABEL: test_broadcast_16i8_32i8:
93; X64:       # %bb.0:
94; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
95; X64-NEXT:    retq
96 %1 = load <16 x i8>, <16 x i8> *%p
97 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
98 ret <32 x i8> %2
99}
100
101; PR38949 - https://bugs.llvm.org/show_bug.cgi?id=38949
102; Don't limit the transform based on extra uses of the load itself (the store is a user of the load's chain value).
103
104define void @subv_reuse_is_ok(<4 x float>* %a, <8 x float>* %b) {
105; X32-LABEL: subv_reuse_is_ok:
106; X32:       # %bb.0:
107; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
108; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
109; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
110; X32-NEXT:    vmovups %ymm0, (%eax)
111; X32-NEXT:    vzeroupper
112; X32-NEXT:    retl
113;
114; X64-LABEL: subv_reuse_is_ok:
115; X64:       # %bb.0:
116; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
117; X64-NEXT:    vmovups %ymm0, (%rsi)
118; X64-NEXT:    vzeroupper
119; X64-NEXT:    retq
120  %ld = load <4 x float>, <4 x float>* %a, align 1
121  %splat128 = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
122  store <8 x float> %splat128, <8 x float>* %b, align 16
123  ret void
124}
125
126define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
127; X32-LABEL: test_broadcast_2f64_4f64_reuse:
128; X32:       # %bb.0:
129; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
130; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
131; X32-NEXT:    vmovaps (%ecx), %xmm1
132; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
133; X32-NEXT:    vmovaps %xmm1, (%eax)
134; X32-NEXT:    retl
135;
136; X64-LABEL: test_broadcast_2f64_4f64_reuse:
137; X64:       # %bb.0:
138; X64-NEXT:    vmovaps (%rdi), %xmm1
139; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
140; X64-NEXT:    vmovaps %xmm1, (%rsi)
141; X64-NEXT:    retq
142 %1 = load <2 x double>, <2 x double>* %p0
143 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
144 store <2 x double> %1, <2 x double>* %p1
145 ret <4 x double> %2
146}
147
148define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
149; X32-LABEL: test_broadcast_2i64_4i64_reuse:
150; X32:       # %bb.0:
151; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
152; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
153; X32-NEXT:    vmovaps (%ecx), %xmm1
154; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
155; X32-NEXT:    vmovaps %xmm1, (%eax)
156; X32-NEXT:    retl
157;
158; X64-LABEL: test_broadcast_2i64_4i64_reuse:
159; X64:       # %bb.0:
160; X64-NEXT:    vmovaps (%rdi), %xmm1
161; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
162; X64-NEXT:    vmovaps %xmm1, (%rsi)
163; X64-NEXT:    retq
164 %1 = load <2 x i64>, <2 x i64>* %p0
165 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
166 store <2 x i64> %1, <2 x i64>* %p1
167 ret <4 x i64> %2
168}
169
170define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
171; X32-LABEL: test_broadcast_4f32_8f32_reuse:
172; X32:       # %bb.0:
173; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
174; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
175; X32-NEXT:    vmovaps (%ecx), %xmm1
176; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
177; X32-NEXT:    vmovaps %xmm1, (%eax)
178; X32-NEXT:    retl
179;
180; X64-LABEL: test_broadcast_4f32_8f32_reuse:
181; X64:       # %bb.0:
182; X64-NEXT:    vmovaps (%rdi), %xmm1
183; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
184; X64-NEXT:    vmovaps %xmm1, (%rsi)
185; X64-NEXT:    retq
186 %1 = load <4 x float>, <4 x float>* %p0
187 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
188 store <4 x float> %1, <4 x float>* %p1
189 ret <8 x float> %2
190}
191
192define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
193; X32-LABEL: test_broadcast_4i32_8i32_reuse:
194; X32:       # %bb.0:
195; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
196; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
197; X32-NEXT:    vmovaps (%ecx), %xmm1
198; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
199; X32-NEXT:    vmovaps %xmm1, (%eax)
200; X32-NEXT:    retl
201;
202; X64-LABEL: test_broadcast_4i32_8i32_reuse:
203; X64:       # %bb.0:
204; X64-NEXT:    vmovaps (%rdi), %xmm1
205; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
206; X64-NEXT:    vmovaps %xmm1, (%rsi)
207; X64-NEXT:    retq
208 %1 = load <4 x i32>, <4 x i32>* %p0
209 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
210 store <4 x i32> %1, <4 x i32>* %p1
211 ret <8 x i32> %2
212}
213
214define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind {
215; X32-LABEL: test_broadcast_8i16_16i16_reuse:
216; X32:       # %bb.0:
217; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
218; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
219; X32-NEXT:    vmovaps (%ecx), %xmm1
220; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
221; X32-NEXT:    vmovaps %xmm1, (%eax)
222; X32-NEXT:    retl
223;
224; X64-LABEL: test_broadcast_8i16_16i16_reuse:
225; X64:       # %bb.0:
226; X64-NEXT:    vmovaps (%rdi), %xmm1
227; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
228; X64-NEXT:    vmovaps %xmm1, (%rsi)
229; X64-NEXT:    retq
230 %1 = load <8 x i16>, <8 x i16> *%p0
231 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
232 store <8 x i16> %1, <8 x i16>* %p1
233 ret <16 x i16> %2
234}
235
236define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind {
237; X32-LABEL: test_broadcast_16i8_32i8_reuse:
238; X32:       # %bb.0:
239; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
240; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
241; X32-NEXT:    vmovaps (%ecx), %xmm1
242; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
243; X32-NEXT:    vmovaps %xmm1, (%eax)
244; X32-NEXT:    retl
245;
246; X64-LABEL: test_broadcast_16i8_32i8_reuse:
247; X64:       # %bb.0:
248; X64-NEXT:    vmovaps (%rdi), %xmm1
249; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
250; X64-NEXT:    vmovaps %xmm1, (%rsi)
251; X64-NEXT:    retq
252 %1 = load <16 x i8>, <16 x i8> *%p0
253 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
254 store <16 x i8> %1, <16 x i8>* %p1
255 ret <32 x i8> %2
256}
257
258define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
259; X32-LABEL: PR29088:
260; X32:       # %bb.0:
261; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
262; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
263; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
264; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
265; X32-NEXT:    vmovaps %ymm1, (%eax)
266; X32-NEXT:    retl
267;
268; X64-LABEL: PR29088:
269; X64:       # %bb.0:
270; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
271; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
272; X64-NEXT:    vmovaps %ymm1, (%rsi)
273; X64-NEXT:    retq
274  %ld = load <4 x i32>, <4 x i32>* %p0
275  store <8 x float> zeroinitializer, <8 x float>* %p1
276  %shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
277  ret <8 x i32> %shuf
278}
279