• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
2
3; CHECK: vbroadcastsd (%
4define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
5entry:
6  %q = load i64* %ptr, align 8
7  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
8  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
9  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
10  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
11  ret <4 x i64> %vecinit6.i
12}
13
14; CHECK: vbroadcastss (%
15define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
16entry:
17  %q = load i32* %ptr, align 4
18  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
19  %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
20  %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
21  %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
22  ret <8 x i32> %vecinit6.i
23}
24
25; CHECK: vbroadcastsd (%
26define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
27entry:
28  %q = load double* %ptr, align 8
29  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
30  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
31  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
32  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
33  ret <4 x double> %vecinit6.i
34}
35
36; CHECK: vbroadcastss (%
37define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
38entry:
39  %q = load float* %ptr, align 4
40  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
41  %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
42  %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
43  %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
44  ret <8 x float> %vecinit6.i
45}
46
47;;;; 128-bit versions
48
49; CHECK: vbroadcastss (%
50define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
51entry:
52  %q = load float* %ptr, align 4
53  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
54  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
55  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
56  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
57  ret <4 x float> %vecinit6.i
58}
59
60
61; CHECK: _e2
62; CHECK-NOT: vbroadcastss
63; CHECK: ret
64define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
65    %vecinit.i = insertelement <4 x float> undef, float      0xbf80000000000000, i32 0
66  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
67  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
68  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
69  ret <4 x float> %vecinit6.i
70}
71
72
73; CHECK: vbroadcastss (%
74define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
75entry:
76  %q = load i32* %ptr, align 4
77  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
78  %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
79  %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
80  %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
81  ret <4 x i32> %vecinit6.i
82}
83
84; Unsupported vbroadcasts
85
86; CHECK: _G
87; CHECK-NOT: broadcast (%
88; CHECK: ret
89define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
90entry:
91  %q = load i64* %ptr, align 8
92  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
93  %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
94  ret <2 x i64> %vecinit2.i
95}
96
97; CHECK: _H
98; CHECK-NOT: broadcast
99; CHECK: ret
100define <4 x i32> @H(<4 x i32> %a) {
101  %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
102  ret <4 x i32> %x
103}
104
105; CHECK: _I
106; CHECK-NOT: broadcast (%
107; CHECK: ret
108define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
109entry:
110  %q = load double* %ptr, align 4
111  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
112  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
113  ret <2 x double> %vecinit2.i
114}
115
116; CHECK: _RR
117; CHECK: vbroadcastss (%
118; CHECK: ret
119define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
120entry:
121  %q = load float* %ptr, align 4
122  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
123  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
124  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
125  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
126  ; force a chain
127  %j = load i32* %k, align 4
128  store i32 %j, i32* undef
129  ret <4 x float> %vecinit6.i
130}
131
132
133; CHECK: _RR2
134; CHECK: vbroadcastss (%
135; CHECK: ret
136define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
137entry:
138  %q = load float* %ptr, align 4
139  %v = insertelement <4 x float> undef, float %q, i32 0
140  %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
141  ret <4 x float> %t
142}
143
144
145; These tests check that a vbroadcast instruction is used when we have a splat
146; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
147; (via the insertelements).
148
149; CHECK-LABEL: splat_concat1
150; CHECK-NOT: vinsertf128
151; CHECK: vbroadcastss (%
152; CHECK-NEXT: ret
153define <8 x float> @splat_concat1(float* %p) {
154  %1 = load float* %p, align 4
155  %2 = insertelement <4 x float> undef, float %1, i32 0
156  %3 = insertelement <4 x float> %2, float %1, i32 1
157  %4 = insertelement <4 x float> %3, float %1, i32 2
158  %5 = insertelement <4 x float> %4, float %1, i32 3
159  %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
160  ret <8 x float> %6
161}
162
163; CHECK-LABEL: splat_concat2
164; CHECK-NOT: vinsertf128
165; CHECK: vbroadcastss (%
166; CHECK-NEXT: ret
167define <8 x float> @splat_concat2(float* %p) {
168  %1 = load float* %p, align 4
169  %2 = insertelement <4 x float> undef, float %1, i32 0
170  %3 = insertelement <4 x float> %2, float %1, i32 1
171  %4 = insertelement <4 x float> %3, float %1, i32 2
172  %5 = insertelement <4 x float> %4, float %1, i32 3
173  %6 = insertelement <4 x float> undef, float %1, i32 0
174  %7 = insertelement <4 x float> %6, float %1, i32 1
175  %8 = insertelement <4 x float> %7, float %1, i32 2
176  %9 = insertelement <4 x float> %8, float %1, i32 3
177  %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
178  ret <8 x float> %10
179}
180
181; CHECK-LABEL: splat_concat3
182; CHECK-NOT: vinsertf128
183; CHECK: vbroadcastsd (%
184; CHECK-NEXT: ret
185define <4 x double> @splat_concat3(double* %p) {
186  %1 = load double* %p, align 8
187  %2 = insertelement <2 x double> undef, double %1, i32 0
188  %3 = insertelement <2 x double> %2, double %1, i32 1
189  %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
190  ret <4 x double> %4
191}
192
193; CHECK-LABEL: splat_concat4
194; CHECK-NOT: vinsertf128
195; CHECK: vbroadcastsd (%
196; CHECK-NEXT: ret
197define <4 x double> @splat_concat4(double* %p) {
198  %1 = load double* %p, align 8
199  %2 = insertelement <2 x double> undef, double %1, i32 0
200  %3 = insertelement <2 x double> %2, double %1, i32 1
201  %4 = insertelement <2 x double> undef, double %1, i32 0
202  %5 = insertelement <2 x double> %2, double %1, i32 1
203  %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
204  ret <4 x double> %6
205}
206
207