• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -check-prefix=NO_SVE
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK
4; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5; RUN: llc -aarch64-sve-vector-bits-min=640  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6; RUN: llc -aarch64-sve-vector-bits-min=768  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7; RUN: llc -aarch64-sve-vector-bits-min=896  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
17
18target triple = "aarch64-unknown-linux-gnu"
19
20; Don't use SVE when its registers are no bigger than NEON.
21; NO_SVE-NOT: ptrue
22
23;
24; DUP (integer)
25;
26
27; Don't use SVE for 64-bit vectors.
28define <8 x i8> @splat_v8i8(i8 %a) #0 {
29; CHECK-LABEL: splat_v8i8:
30; CHECK: dup v0.8b, w0
31; CHECK-NEXT: ret
32  %insert = insertelement <8 x i8> undef, i8 %a, i64 0
33  %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer
34  ret <8 x i8> %splat
35}
36
37; Don't use SVE for 128-bit vectors.
38define <16 x i8> @splat_v16i8(i8 %a) #0 {
39; CHECK-LABEL: splat_v16i8:
40; CHECK: dup v0.16b, w0
41; CHECK-NEXT: ret
42  %insert = insertelement <16 x i8> undef, i8 %a, i64 0
43  %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
44  ret <16 x i8> %splat
45}
46
47define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 {
48; CHECK-LABEL: splat_v32i8:
49; CHECK-DAG: mov [[RES:z[0-9]+]].b, w0
50; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl32
51; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
52; CHECK-NEXT: ret
53  %insert = insertelement <32 x i8> undef, i8 %a, i64 0
54  %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
55  store <32 x i8> %splat, <32 x i8>* %b
56  ret void
57}
58
59define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 {
60; CHECK-LABEL: splat_v64i8:
61; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, w0
62; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64
63; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
64; VBITS_GE_512-NEXT: ret
65
66; Ensure sensible type legalisation.
67; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].b, w0
68; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
69; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32
70; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1]
71; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[OFFSET_HI]]
72; VBITS_EQ_256-NEXT: ret
73  %insert = insertelement <64 x i8> undef, i8 %a, i64 0
74  %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
75  store <64 x i8> %splat, <64 x i8>* %b
76  ret void
77}
78
79define void @splat_v128i8(i8 %a, <128 x i8>* %b) #0 {
80; CHECK-LABEL: splat_v128i8:
81; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].b, w0
82; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].b, vl128
83; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
84; VBITS_GE_1024-NEXT: ret
85  %insert = insertelement <128 x i8> undef, i8 %a, i64 0
86  %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
87  store <128 x i8> %splat, <128 x i8>* %b
88  ret void
89}
90
91define void @splat_v256i8(i8 %a, <256 x i8>* %b) #0 {
92; CHECK-LABEL: splat_v256i8:
93; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].b, w0
94; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].b, vl256
95; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
96; VBITS_GE_2048-NEXT: ret
97  %insert = insertelement <256 x i8> undef, i8 %a, i64 0
98  %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
99  store <256 x i8> %splat, <256 x i8>* %b
100  ret void
101}
102
103; Don't use SVE for 64-bit vectors.
104define <4 x i16> @splat_v4i16(i16 %a) #0 {
105; CHECK-LABEL: splat_v4i16:
106; CHECK: dup v0.4h, w0
107; CHECK-NEXT: ret
108  %insert = insertelement <4 x i16> undef, i16 %a, i64 0
109  %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
110  ret <4 x i16> %splat
111}
112
113; Don't use SVE for 128-bit vectors.
114define <8 x i16> @splat_v8i16(i16 %a) #0 {
115; CHECK-LABEL: splat_v8i16:
116; CHECK: dup v0.8h, w0
117; CHECK-NEXT: ret
118  %insert = insertelement <8 x i16> undef, i16 %a, i64 0
119  %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
120  ret <8 x i16> %splat
121}
122
123define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 {
124; CHECK-LABEL: splat_v16i16:
125; CHECK-DAG: mov [[RES:z[0-9]+]].h, w0
126; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16
127; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
128; CHECK-NEXT: ret
129  %insert = insertelement <16 x i16> undef, i16 %a, i64 0
130  %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
131  store <16 x i16> %splat, <16 x i16>* %b
132  ret void
133}
134
135define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 {
136; CHECK-LABEL: splat_v32i16:
137; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, w0
138; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
139; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
140; VBITS_GE_512-NEXT: ret
141
142; Ensure sensible type legalisation.
143; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, w0
144; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
145; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
146; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1]
147; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]]
148; VBITS_EQ_256-NEXT: ret
149  %insert = insertelement <32 x i16> undef, i16 %a, i64 0
150  %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
151  store <32 x i16> %splat, <32 x i16>* %b
152  ret void
153}
154
155define void @splat_v64i16(i16 %a, <64 x i16>* %b) #0 {
156; CHECK-LABEL: splat_v64i16:
157; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, w0
158; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64
159; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
160; VBITS_GE_1024-NEXT: ret
161  %insert = insertelement <64 x i16> undef, i16 %a, i64 0
162  %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
163  store <64 x i16> %splat, <64 x i16>* %b
164  ret void
165}
166
167define void @splat_v128i16(i16 %a, <128 x i16>* %b) #0 {
168; CHECK-LABEL: splat_v128i16:
169; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, w0
170; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128
171; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
172; VBITS_GE_2048-NEXT: ret
173  %insert = insertelement <128 x i16> undef, i16 %a, i64 0
174  %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
175  store <128 x i16> %splat, <128 x i16>* %b
176  ret void
177}
178
179; Don't use SVE for 64-bit vectors.
180define <2 x i32> @splat_v2i32(i32 %a) #0 {
181; CHECK-LABEL: splat_v2i32:
182; CHECK: dup v0.2s, w0
183; CHECK-NEXT: ret
184  %insert = insertelement <2 x i32> undef, i32 %a, i64 0
185  %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer
186  ret <2 x i32> %splat
187}
188
189; Don't use SVE for 128-bit vectors.
190define <4 x i32> @splat_v4i32(i32 %a) #0 {
191; CHECK-LABEL: splat_v4i32:
192; CHECK: dup v0.4s, w0
193; CHECK-NEXT: ret
194  %insert = insertelement <4 x i32> undef, i32 %a, i64 0
195  %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
196  ret <4 x i32> %splat
197}
198
199define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 {
200; CHECK-LABEL: splat_v8i32:
201; CHECK-DAG: mov [[RES:z[0-9]+]].s, w0
202; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8
203; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
204; CHECK-NEXT: ret
205  %insert = insertelement <8 x i32> undef, i32 %a, i64 0
206  %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
207  store <8 x i32> %splat, <8 x i32>* %b
208  ret void
209}
210
211define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 {
212; CHECK-LABEL: splat_v16i32:
213; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, w0
214; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
215; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
216; VBITS_GE_512-NEXT: ret
217
218; Ensure sensible type legalisation.
219; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, w0
220; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
221; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
222; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1]
223; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]]
224; VBITS_EQ_256-NEXT: ret
225  %insert = insertelement <16 x i32> undef, i32 %a, i64 0
226  %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
227  store <16 x i32> %splat, <16 x i32>* %b
228  ret void
229}
230
231define void @splat_v32i32(i32 %a, <32 x i32>* %b) #0 {
232; CHECK-LABEL: splat_v32i32:
233; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, w0
234; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32
235; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
236; VBITS_GE_1024-NEXT: ret
237  %insert = insertelement <32 x i32> undef, i32 %a, i64 0
238  %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
239  store <32 x i32> %splat, <32 x i32>* %b
240  ret void
241}
242
243define void @splat_v64i32(i32 %a, <64 x i32>* %b) #0 {
244; CHECK-LABEL: splat_v64i32:
245; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, w0
246; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64
247; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
248; VBITS_GE_2048-NEXT: ret
249  %insert = insertelement <64 x i32> undef, i32 %a, i64 0
250  %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
251  store <64 x i32> %splat, <64 x i32>* %b
252  ret void
253}
254
255; Don't use SVE for 64-bit vectors.
256define <1 x i64> @splat_v1i64(i64 %a) #0 {
257; CHECK-LABEL: splat_v1i64:
258; CHECK: fmov d0, x0
259; CHECK-NEXT: ret
260  %insert = insertelement <1 x i64> undef, i64 %a, i64 0
261  %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer
262  ret <1 x i64> %splat
263}
264
265; Don't use SVE for 128-bit vectors.
266define <2 x i64> @splat_v2i64(i64 %a) #0 {
267; CHECK-LABEL: splat_v2i64:
268; CHECK: dup v0.2d, x0
269; CHECK-NEXT: ret
270  %insert = insertelement <2 x i64> undef, i64 %a, i64 0
271  %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
272  ret <2 x i64> %splat
273}
274
275define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 {
276; CHECK-LABEL: splat_v4i64:
277; CHECK-DAG: mov [[RES:z[0-9]+]].d, x0
278; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4
279; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
280; CHECK-NEXT: ret
281  %insert = insertelement <4 x i64> undef, i64 %a, i64 0
282  %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
283  store <4 x i64> %splat, <4 x i64>* %b
284  ret void
285}
286
287define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 {
288; CHECK-LABEL: splat_v8i64:
289; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, x0
290; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
291; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
292; VBITS_GE_512-NEXT: ret
293
294; Ensure sensible type legalisation.
295; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, x0
296; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
297; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
298; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1]
299; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]]
300; VBITS_EQ_256-NEXT: ret
301  %insert = insertelement <8 x i64> undef, i64 %a, i64 0
302  %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
303  store <8 x i64> %splat, <8 x i64>* %b
304  ret void
305}
306
307define void @splat_v16i64(i64 %a, <16 x i64>* %b) #0 {
308; CHECK-LABEL: splat_v16i64:
309; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, x0
310; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16
311; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
312; VBITS_GE_1024-NEXT: ret
313  %insert = insertelement <16 x i64> undef, i64 %a, i64 0
314  %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
315  store <16 x i64> %splat, <16 x i64>* %b
316  ret void
317}
318
319define void @splat_v32i64(i64 %a, <32 x i64>* %b) #0 {
320; CHECK-LABEL: splat_v32i64:
321; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, x0
322; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32
323; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
324; VBITS_GE_2048-NEXT: ret
325  %insert = insertelement <32 x i64> undef, i64 %a, i64 0
326  %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
327  store <32 x i64> %splat, <32 x i64>* %b
328  ret void
329}
330
331;
332; DUP (floating-point)
333;
334
335; Don't use SVE for 64-bit vectors.
336define <4 x half> @splat_v4f16(half %a) #0 {
337; CHECK-LABEL: splat_v4f16:
338; CHECK: dup v0.4h, v0.h[0]
339; CHECK-NEXT: ret
340  %insert = insertelement <4 x half> undef, half %a, i64 0
341  %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer
342  ret <4 x half> %splat
343}
344
345; Don't use SVE for 128-bit vectors.
346define <8 x half> @splat_v8f16(half %a) #0 {
347; CHECK-LABEL: splat_v8f16:
348; CHECK: dup v0.8h, v0.h[0]
349; CHECK-NEXT: ret
350  %insert = insertelement <8 x half> undef, half %a, i64 0
351  %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer
352  ret <8 x half> %splat
353}
354
355define void @splat_v16f16(half %a, <16 x half>* %b) #0 {
356; CHECK-LABEL: splat_v16f16:
357; CHECK-DAG: mov [[RES:z[0-9]+]].h, h0
358; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16
359; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
360; CHECK-NEXT: ret
361  %insert = insertelement <16 x half> undef, half %a, i64 0
362  %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
363  store <16 x half> %splat, <16 x half>* %b
364  ret void
365}
366
367define void @splat_v32f16(half %a, <32 x half>* %b) #0 {
368; CHECK-LABEL: splat_v32f16:
369; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, h0
370; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
371; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
372; VBITS_GE_512-NEXT: ret
373
374; Ensure sensible type legalisation.
375; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, h0
376; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
377; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32
378; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0]
379; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]]
380; VBITS_EQ_256-NEXT: ret
381  %insert = insertelement <32 x half> undef, half %a, i64 0
382  %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
383  store <32 x half> %splat, <32 x half>* %b
384  ret void
385}
386
387define void @splat_v64f16(half %a, <64 x half>* %b) #0 {
388; CHECK-LABEL: splat_v64f16:
389; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, h0
390; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64
391; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
392; VBITS_GE_1024-NEXT: ret
393  %insert = insertelement <64 x half> undef, half %a, i64 0
394  %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
395  store <64 x half> %splat, <64 x half>* %b
396  ret void
397}
398
399define void @splat_v128f16(half %a, <128 x half>* %b) #0 {
400; CHECK-LABEL: splat_v128f16:
401; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, h0
402; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128
403; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
404; VBITS_GE_2048-NEXT: ret
405  %insert = insertelement <128 x half> undef, half %a, i64 0
406  %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
407  store <128 x half> %splat, <128 x half>* %b
408  ret void
409}
410
411; Don't use SVE for 64-bit vectors.
412define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 {
413; CHECK-LABEL: splat_v2f32:
414; CHECK: dup v0.2s, v0.s[0]
415; CHECK-NEXT: ret
416  %insert = insertelement <2 x float> undef, float %a, i64 0
417  %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer
418  ret <2 x float> %splat
419}
420
421; Don't use SVE for 128-bit vectors.
422define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 {
423; CHECK-LABEL: splat_v4f32:
424; CHECK: dup v0.4s, v0.s[0]
425; CHECK-NEXT: ret
426  %insert = insertelement <4 x float> undef, float %a, i64 0
427  %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
428  ret <4 x float> %splat
429}
430
431define void @splat_v8f32(float %a, <8 x float>* %b) #0 {
432; CHECK-LABEL: splat_v8f32:
433; CHECK-DAG: mov [[RES:z[0-9]+]].s, s0
434; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8
435; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
436; CHECK-NEXT: ret
437  %insert = insertelement <8 x float> undef, float %a, i64 0
438  %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
439  store <8 x float> %splat, <8 x float>* %b
440  ret void
441}
442
443define void @splat_v16f32(float %a, <16 x float>* %b) #0 {
444; CHECK-LABEL: splat_v16f32:
445; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, s0
446; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
447; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
448; VBITS_GE_512-NEXT: ret
449
450; Ensure sensible type legalisation.
451; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, s0
452; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
453; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32
454; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0]
455; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]]
456; VBITS_EQ_256-NEXT: ret
457  %insert = insertelement <16 x float> undef, float %a, i64 0
458  %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
459  store <16 x float> %splat, <16 x float>* %b
460  ret void
461}
462
463define void @splat_v32f32(float %a, <32 x float>* %b) #0 {
464; CHECK-LABEL: splat_v32f32:
465; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, s0
466; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32
467; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
468; VBITS_GE_1024-NEXT: ret
469  %insert = insertelement <32 x float> undef, float %a, i64 0
470  %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
471  store <32 x float> %splat, <32 x float>* %b
472  ret void
473}
474
475define void @splat_v64f32(float %a, <64 x float>* %b) #0 {
476; CHECK-LABEL: splat_v64f32:
477; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, s0
478; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64
479; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
480; VBITS_GE_2048-NEXT: ret
481  %insert = insertelement <64 x float> undef, float %a, i64 0
482  %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
483  store <64 x float> %splat, <64 x float>* %b
484  ret void
485}
486
487; Don't use SVE for 64-bit vectors.
488define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 {
489; CHECK-LABEL: splat_v1f64:
490; CHECK: // %bb.0:
491; CHECK-NEXT: ret
492  %insert = insertelement <1 x double> undef, double %a, i64 0
493  %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer
494  ret <1 x double> %splat
495}
496
497; Don't use SVE for 128-bit vectors.
498define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 {
499; CHECK-LABEL: splat_v2f64:
500; CHECK: dup v0.2d, v0.d[0]
501; CHECK-NEXT: ret
502  %insert = insertelement <2 x double> undef, double %a, i64 0
503  %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
504  ret <2 x double> %splat
505}
506
507define void @splat_v4f64(double %a, <4 x double>* %b) #0 {
508; CHECK-LABEL: splat_v4f64:
509; CHECK-DAG: mov [[RES:z[0-9]+]].d, d0
510; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4
511; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
512; CHECK-NEXT: ret
513  %insert = insertelement <4 x double> undef, double %a, i64 0
514  %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
515  store <4 x double> %splat, <4 x double>* %b
516  ret void
517}
518
519define void @splat_v8f64(double %a, <8 x double>* %b) #0 {
520; CHECK-LABEL: splat_v8f64:
521; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, d0
522; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
523; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
524; VBITS_GE_512-NEXT: ret
525
526; Ensure sensible type legalisation.
527; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, d0
528; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
529; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32
530; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0]
531; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]]
532; VBITS_EQ_256-NEXT: ret
533  %insert = insertelement <8 x double> undef, double %a, i64 0
534  %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
535  store <8 x double> %splat, <8 x double>* %b
536  ret void
537}
538
539define void @splat_v16f64(double %a, <16 x double>* %b) #0 {
540; CHECK-LABEL: splat_v16f64:
541; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, d0
542; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16
543; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
544; VBITS_GE_1024-NEXT: ret
545  %insert = insertelement <16 x double> undef, double %a, i64 0
546  %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
547  store <16 x double> %splat, <16 x double>* %b
548  ret void
549}
550
551define void @splat_v32f64(double %a, <32 x double>* %b) #0 {
552; CHECK-LABEL: splat_v32f64:
553; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, d0
554; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32
555; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
556; VBITS_GE_2048-NEXT: ret
557  %insert = insertelement <32 x double> undef, double %a, i64 0
558  %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
559  store <32 x double> %splat, <32 x double>* %b
560  ret void
561}
562
563;
564; DUP (integer immediate)
565;
566
567define void @splat_imm_v64i8(<64 x i8>* %a) #0 {
568; CHECK-LABEL: splat_imm_v64i8:
569; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, #1
570; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64
571; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
572; VBITS_GE_512-NEXT: ret
573  %insert = insertelement <64 x i8> undef, i8 1, i64 0
574  %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
575  store <64 x i8> %splat, <64 x i8>* %a
576  ret void
577}
578
579define void @splat_imm_v32i16(<32 x i16>* %a) #0 {
580; CHECK-LABEL: splat_imm_v32i16:
581; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, #2
582; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
583; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
584; VBITS_GE_512-NEXT: ret
585  %insert = insertelement <32 x i16> undef, i16 2, i64 0
586  %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
587  store <32 x i16> %splat, <32 x i16>* %a
588  ret void
589}
590
591define void @splat_imm_v16i32(<16 x i32>* %a) #0 {
592; CHECK-LABEL: splat_imm_v16i32:
593; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, #3
594; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
595; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
596; VBITS_GE_512-NEXT: ret
597  %insert = insertelement <16 x i32> undef, i32 3, i64 0
598  %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
599  store <16 x i32> %splat, <16 x i32>* %a
600  ret void
601}
602
603define void @splat_imm_v8i64(<8 x i64>* %a) #0 {
604; CHECK-LABEL: splat_imm_v8i64:
605; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, #4
606; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
607; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
608; VBITS_GE_512-NEXT: ret
609  %insert = insertelement <8 x i64> undef, i64 4, i64 0
610  %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
611  store <8 x i64> %splat, <8 x i64>* %a
612  ret void
613}
614
615;
616; DUP (floating-point immediate)
617;
618
619define void @splat_imm_v32f16(<32 x half>* %a) #0 {
620; CHECK-LABEL: splat_imm_v32f16:
621; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].h, #5.00000000
622; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
623; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
624; VBITS_GE_512-NEXT: ret
625  %insert = insertelement <32 x half> undef, half 5.0, i64 0
626  %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
627  store <32 x half> %splat, <32 x half>* %a
628  ret void
629}
630
631define void @splat_imm_v16f32(<16 x float>* %a) #0 {
632; CHECK-LABEL: splat_imm_v16f32:
633; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].s, #6.00000000
634; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
635; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
636; VBITS_GE_512-NEXT: ret
637  %insert = insertelement <16 x float> undef, float 6.0, i64 0
638  %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
639  store <16 x float> %splat, <16 x float>* %a
640  ret void
641}
642
643define void @splat_imm_v8f64(<8 x double>* %a) #0 {
644; CHECK-LABEL: splat_imm_v8f64:
645; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].d, #7.00000000
646; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
647; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
648; VBITS_GE_512-NEXT: ret
649  %insert = insertelement <8 x double> undef, double 7.0, i64 0
650  %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
651  store <8 x double> %splat, <8 x double>* %a
652  ret void
653}
654attributes #0 = { "target-features"="+sve" }
655