• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
2
3define <8 x i8> @v_dup8(i8 %A) nounwind {
4;CHECK-LABEL: v_dup8:
5;CHECK: dup.8b
6	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
7	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
8	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
9	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
10	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
11	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
12	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
13	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
14	ret <8 x i8> %tmp8
15}
16
17define <4 x i16> @v_dup16(i16 %A) nounwind {
18;CHECK-LABEL: v_dup16:
19;CHECK: dup.4h
20	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
21	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
22	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
23	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
24	ret <4 x i16> %tmp4
25}
26
27define <2 x i32> @v_dup32(i32 %A) nounwind {
28;CHECK-LABEL: v_dup32:
29;CHECK: dup.2s
30	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
31	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
32	ret <2 x i32> %tmp2
33}
34
35define <2 x float> @v_dupfloat(float %A) nounwind {
36;CHECK-LABEL: v_dupfloat:
37;CHECK: dup.2s
38	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
39	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
40	ret <2 x float> %tmp2
41}
42
43define <16 x i8> @v_dupQ8(i8 %A) nounwind {
44;CHECK-LABEL: v_dupQ8:
45;CHECK: dup.16b
46	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
47	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
48	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
49	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
50	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
51	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
52	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
53	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
54	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
55	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
56	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
57	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
58	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
59	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
60	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
61	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
62	ret <16 x i8> %tmp16
63}
64
65define <8 x i16> @v_dupQ16(i16 %A) nounwind {
66;CHECK-LABEL: v_dupQ16:
67;CHECK: dup.8h
68	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
69	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
70	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
71	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
72	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
73	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
74	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
75	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
76	ret <8 x i16> %tmp8
77}
78
79define <4 x i32> @v_dupQ32(i32 %A) nounwind {
80;CHECK-LABEL: v_dupQ32:
81;CHECK: dup.4s
82	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
83	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
84	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
85	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
86	ret <4 x i32> %tmp4
87}
88
89define <4 x float> @v_dupQfloat(float %A) nounwind {
90;CHECK-LABEL: v_dupQfloat:
91;CHECK: dup.4s
92	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
93	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
94	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
95	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
96	ret <4 x float> %tmp4
97}
98
99; Check to make sure it works with shuffles, too.
100
101define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
102;CHECK-LABEL: v_shuffledup8:
103;CHECK: dup.8b
104	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
105	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
106	ret <8 x i8> %tmp2
107}
108
109define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
110;CHECK-LABEL: v_shuffledup16:
111;CHECK: dup.4h
112	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
113	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
114	ret <4 x i16> %tmp2
115}
116
117define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
118;CHECK-LABEL: v_shuffledup32:
119;CHECK: dup.2s
120	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
121	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
122	ret <2 x i32> %tmp2
123}
124
125define <2 x float> @v_shuffledupfloat(float %A) nounwind {
126;CHECK-LABEL: v_shuffledupfloat:
127;CHECK: dup.2s
128	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
129	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
130	ret <2 x float> %tmp2
131}
132
133define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
134;CHECK-LABEL: v_shuffledupQ8:
135;CHECK: dup.16b
136	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
137	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
138	ret <16 x i8> %tmp2
139}
140
141define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
142;CHECK-LABEL: v_shuffledupQ16:
143;CHECK: dup.8h
144	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
145	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
146	ret <8 x i16> %tmp2
147}
148
149define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
150;CHECK-LABEL: v_shuffledupQ32:
151;CHECK: dup.4s
152	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
153	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
154	ret <4 x i32> %tmp2
155}
156
157define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
158;CHECK-LABEL: v_shuffledupQfloat:
159;CHECK: dup.4s
160	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
161	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
162	ret <4 x float> %tmp2
163}
164
165define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
166;CHECK-LABEL: vduplane8:
167;CHECK: dup.8b
168	%tmp1 = load <8 x i8>, <8 x i8>* %A
169	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
170	ret <8 x i8> %tmp2
171}
172
173define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
174;CHECK-LABEL: vduplane16:
175;CHECK: dup.4h
176	%tmp1 = load <4 x i16>, <4 x i16>* %A
177	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
178	ret <4 x i16> %tmp2
179}
180
181define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
182;CHECK-LABEL: vduplane32:
183;CHECK: dup.2s
184	%tmp1 = load <2 x i32>, <2 x i32>* %A
185	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
186	ret <2 x i32> %tmp2
187}
188
189define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
190;CHECK-LABEL: vduplanefloat:
191;CHECK: dup.2s
192	%tmp1 = load <2 x float>, <2 x float>* %A
193	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
194	ret <2 x float> %tmp2
195}
196
197define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
198;CHECK-LABEL: vduplaneQ8:
199;CHECK: dup.16b
200	%tmp1 = load <8 x i8>, <8 x i8>* %A
201	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
202	ret <16 x i8> %tmp2
203}
204
205define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
206;CHECK-LABEL: vduplaneQ16:
207;CHECK: dup.8h
208	%tmp1 = load <4 x i16>, <4 x i16>* %A
209	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
210	ret <8 x i16> %tmp2
211}
212
213define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
214;CHECK-LABEL: vduplaneQ32:
215;CHECK: dup.4s
216	%tmp1 = load <2 x i32>, <2 x i32>* %A
217	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
218	ret <4 x i32> %tmp2
219}
220
221define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
222;CHECK-LABEL: vduplaneQfloat:
223;CHECK: dup.4s
224	%tmp1 = load <2 x float>, <2 x float>* %A
225	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
226	ret <4 x float> %tmp2
227}
228
229define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
230;CHECK-LABEL: foo:
231;CHECK: dup.2d
232entry:
233  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
234  ret <2 x i64> %0
235}
236
237define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
238;CHECK-LABEL: bar:
239;CHECK: dup.2d
240entry:
241  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
242  ret <2 x i64> %0
243}
244
245define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
246;CHECK-LABEL: baz:
247;CHECK: dup.2d
248entry:
249  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
250  ret <2 x double> %0
251}
252
253define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
254;CHECK-LABEL: qux:
255;CHECK: dup.2d
256entry:
257  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
258  ret <2 x double> %0
259}
260
261define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  {
262; CHECK-LABEL: f:
263; CHECK-NEXT: fmov s0, w0
264; CHECK-NEXT: ins.s v0[1], w1
265; CHECK-NEXT: ret
266  %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
267  %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
268  ret <2 x i32> %vecinit1
269}
270
271define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone  {
272; CHECK-LABEL: g:
273; CHECK-NEXT: fmov s0, w0
274; CHECK-NEXT: ins.s v0[1], w1
275; CHECK-NEXT: ins.s v0[2], w1
276; CHECK-NEXT: ins.s v0[3], w0
277; CHECK-NEXT: ret
278  %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
279  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
280  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
281  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3
282  ret <4 x i32> %vecinit3
283}
284
285define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone  {
286; CHECK-LABEL: h:
287; CHECK-NEXT: fmov d0, x0
288; CHECK-NEXT: ins.d v0[1], x1
289; CHECK-NEXT: ret
290  %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
291  %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
292  ret <2 x i64> %vecinit1
293}
294
295; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that
296; the single value needed was of the same type as the vector. This is false if
297; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16>
298; BUILD_VECTOR will have an i32 as its source). In that case, the operation is
299; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed.
300;
301; *However*, it is a dup vD.4h, vN.h[2*idx].
302define <4 x i16> @test_build_illegal(<4 x i32> %in) {
303; CHECK-LABEL: test_build_illegal:
304; CHECK: dup.4h v0, v0[6]
305  %val = extractelement <4 x i32> %in, i32 3
306  %smallval = trunc i32 %val to i16
307  %vec = insertelement <4x i16> undef, i16 %smallval, i32 3
308
309  ret <4 x i16> %vec
310}
311
312; We used to inherit an already extract_subvectored v4i16 from
313; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
314; the formation of an indexed-by-7 MLS.
315define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
316; CHECK-LABEL: test_high_splat:
317; CHECK: mls.4h v0, v1, v2[7]
318entry:
319  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
320  %mul = mul <4 x i16> %shuffle, %b
321  %sub = sub <4 x i16> %a, %mul
322  ret <4 x i16> %sub
323}
324
325; Also test the DUP path in the PerfectShuffle generator.
326
327; CHECK-LABEL: test_perfectshuffle_dupext_v4i16:
328; CHECK-NEXT: dup.4h v0, v0[0]
329; CHECK-NEXT: ext.8b v0, v0, v1, #4
330define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
331  %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
332  ret <4 x i16> %r
333}
334
335; CHECK-LABEL: test_perfectshuffle_dupext_v4f16:
336; CHECK-NEXT: dup.4h v0, v0[0]
337; CHECK-NEXT: ext.8b v0, v0, v1, #4
338; CHECK-NEXT: ret
339define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b) nounwind {
340  %r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
341  ret <4 x half> %r
342}
343
344; CHECK-LABEL: test_perfectshuffle_dupext_v4i32:
345; CHECK-NEXT: dup.4s v0, v0[0]
346; CHECK-NEXT: ext.16b v0, v0, v1, #8
347; CHECK-NEXT: ret
348define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
349  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
350  ret <4 x i32> %r
351}
352
353; CHECK-LABEL: test_perfectshuffle_dupext_v4f32:
354; CHECK-NEXT: dup.4s v0, v0[0]
355; CHECK-NEXT: ext.16b v0, v0, v1, #8
356; CHECK-NEXT: ret
357define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> %b) nounwind {
358  %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
359  ret <4 x float> %r
360}
361