• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4
5define arm_aapcs_vfpcc <4 x i32> @vdup_i32(i32 %src) {
6; CHECK-LABEL: vdup_i32:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vdup.32 q0, r0
9; CHECK-NEXT:    bx lr
10entry:
11  %0 = insertelement <4 x i32> undef, i32 %src, i32 0
12  %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
13  ret <4 x i32> %out
14}
15
16define arm_aapcs_vfpcc <8 x i16> @vdup_i16(i16 %src) {
17; CHECK-LABEL: vdup_i16:
18; CHECK:       @ %bb.0: @ %entry
19; CHECK-NEXT:    vdup.16 q0, r0
20; CHECK-NEXT:    bx lr
21entry:
22  %0 = insertelement <8 x i16> undef, i16 %src, i32 0
23  %out = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
24  ret <8 x i16> %out
25}
26
27define arm_aapcs_vfpcc <16 x i8> @vdup_i8(i8 %src) {
28; CHECK-LABEL: vdup_i8:
29; CHECK:       @ %bb.0: @ %entry
30; CHECK-NEXT:    vdup.8 q0, r0
31; CHECK-NEXT:    bx lr
32entry:
33  %0 = insertelement <16 x i8> undef, i8 %src, i32 0
34  %out = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
35  ret <16 x i8> %out
36}
37
38define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
39; CHECK-LABEL: vdup_i64:
40; CHECK:       @ %bb.0: @ %entry
41; CHECK-NEXT:    vmov.32 q0[0], r0
42; CHECK-NEXT:    vmov.32 q0[1], r1
43; CHECK-NEXT:    vmov.32 q0[2], r0
44; CHECK-NEXT:    vmov.32 q0[3], r1
45; CHECK-NEXT:    bx lr
46entry:
47  %0 = insertelement <2 x i64> undef, i64 %src, i32 0
48  %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
49  ret <2 x i64> %out
50}
51
52define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
53; CHECK-LABEL: vdup_f32_1:
54; CHECK:       @ %bb.0: @ %entry
55; CHECK-NEXT:    vmov r0, s0
56; CHECK-NEXT:    vdup.32 q0, r0
57; CHECK-NEXT:    bx lr
58entry:
59  %0 = insertelement <4 x float> undef, float %src, i32 0
60  %out = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
61  ret <4 x float> %out
62}
63
64define arm_aapcs_vfpcc <4 x float> @vdup_f32_2(float %src1, float %src2) {
65; CHECK-LABEL: vdup_f32_2:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vadd.f32 s0, s0, s1
68; CHECK-NEXT:    vmov r0, s0
69; CHECK-NEXT:    vdup.32 q0, r0
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = fadd float %src1, %src2
73  %1 = insertelement <4 x float> undef, float %0, i32 0
74  %out = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
75  ret <4 x float> %out
76}
77
78define arm_aapcs_vfpcc <4 x float> @vdup_f32_1bc(float %src) {
79; CHECK-LABEL: vdup_f32_1bc:
80; CHECK:       @ %bb.0: @ %entry
81; CHECK-NEXT:    vmov r0, s0
82; CHECK-NEXT:    vdup.32 q0, r0
83; CHECK-NEXT:    bx lr
84entry:
85  %srcbc = bitcast float %src to i32
86  %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
87  %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
88  %outbc = bitcast <4 x i32> %out to <4 x float>
89  ret <4 x float> %outbc
90}
91
92define arm_aapcs_vfpcc <4 x float> @vdup_f32_2bc(float %src1, float %src2) {
93; CHECK-LABEL: vdup_f32_2bc:
94; CHECK:       @ %bb.0: @ %entry
95; CHECK-NEXT:    vadd.f32 s0, s0, s1
96; CHECK-NEXT:    vmov r0, s0
97; CHECK-NEXT:    vdup.32 q0, r0
98; CHECK-NEXT:    bx lr
99entry:
100  %0 = fadd float %src1, %src2
101  %bc = bitcast float %0 to i32
102  %1 = insertelement <4 x i32> undef, i32 %bc, i32 0
103  %out = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
104  %outbc = bitcast <4 x i32> %out to <4 x float>
105  ret <4 x float> %outbc
106}
107
108; TODO: Calling convention needs fixing to pass half types directly to functions
109define arm_aapcs_vfpcc <8 x half> @vdup_f16(half* %src1, half* %src2) {
110; CHECK-LABEL: vdup_f16:
111; CHECK:       @ %bb.0: @ %entry
112; CHECK-NEXT:    vldr.16 s0, [r1]
113; CHECK-NEXT:    vldr.16 s2, [r0]
114; CHECK-NEXT:    vadd.f16 s0, s2, s0
115; CHECK-NEXT:    vmov.f16 r0, s0
116; CHECK-NEXT:    vdup.16 q0, r0
117; CHECK-NEXT:    bx lr
118entry:
119  %0 = load half, half *%src1, align 2
120  %1 = load half, half *%src2, align 2
121  %2 = fadd half %0, %1
122  %3 = insertelement <8 x half> undef, half %2, i32 0
123  %out = shufflevector <8 x half> %3, <8 x half> undef, <8 x i32> zeroinitializer
124  ret <8 x half> %out
125}
126
127define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half* %src1, half* %src2) {
128; CHECK-LABEL: vdup_f16_bc:
129; CHECK:       @ %bb.0: @ %entry
130; CHECK-NEXT:    vldr.16 s0, [r1]
131; CHECK-NEXT:    vldr.16 s2, [r0]
132; CHECK-NEXT:    vadd.f16 s0, s2, s0
133; CHECK-NEXT:    vmov.f16 r0, s0
134; CHECK-NEXT:    vdup.16 q0, r0
135; CHECK-NEXT:    bx lr
136entry:
137  %0 = load half, half *%src1, align 2
138  %1 = load half, half *%src2, align 2
139  %2 = fadd half %0, %1
140  %bc = bitcast half %2 to i16
141  %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
142  %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
143  %outbc = bitcast <8 x i16> %out to <8 x half>
144  ret <8 x half> %outbc
145}
146
147define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
148; CHECK-LABEL: vdup_f64:
149; CHECK:       @ %bb.0: @ %entry
150; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
151; CHECK-NEXT:    vmov.f32 s2, s0
152; CHECK-NEXT:    vmov.f32 s3, s1
153; CHECK-NEXT:    bx lr
154entry:
155  %0 = insertelement <2 x double> undef, double %src, i32 0
156  %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
157  ret <2 x double> %out
158}
159
160
161
162define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
163; CHECK-LABEL: vduplane_i32:
164; CHECK:       @ %bb.0: @ %entry
165; CHECK-NEXT:    vmov r0, s3
166; CHECK-NEXT:    vdup.32 q0, r0
167; CHECK-NEXT:    bx lr
168entry:
169  %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
170  ret <4 x i32> %out
171}
172
173define arm_aapcs_vfpcc <8 x i16> @vduplane_i16(<8 x i16> %src) {
174; CHECK-LABEL: vduplane_i16:
175; CHECK:       @ %bb.0: @ %entry
176; CHECK-NEXT:    vmov.u16 r0, q0[3]
177; CHECK-NEXT:    vdup.16 q0, r0
178; CHECK-NEXT:    bx lr
179entry:
180  %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
181  ret <8 x i16> %out
182}
183
184define arm_aapcs_vfpcc <16 x i8> @vduplane_i8(<16 x i8> %src) {
185; CHECK-LABEL: vduplane_i8:
186; CHECK:       @ %bb.0: @ %entry
187; CHECK-NEXT:    vmov.u8 r0, q0[3]
188; CHECK-NEXT:    vdup.8 q0, r0
189; CHECK-NEXT:    bx lr
190entry:
191  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
192  ret <16 x i8> %out
193}
194
195define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
196; CHECK-LABEL: vduplane_i64:
197; CHECK:       @ %bb.0: @ %entry
198; CHECK-NEXT:    vmov.f32 s0, s2
199; CHECK-NEXT:    vmov.f32 s1, s3
200; CHECK-NEXT:    bx lr
201entry:
202  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
203  ret <2 x i64> %out
204}
205
206define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
207; CHECK-LABEL: vduplane_f32:
208; CHECK:       @ %bb.0: @ %entry
209; CHECK-NEXT:    vmov r0, s3
210; CHECK-NEXT:    vdup.32 q0, r0
211; CHECK-NEXT:    bx lr
212entry:
213  %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
214  ret <4 x float> %out
215}
216
217define arm_aapcs_vfpcc <8 x half> @vduplane_f16(<8 x half> %src) {
218; CHECK-LABEL: vduplane_f16:
219; CHECK:       @ %bb.0: @ %entry
220; CHECK-NEXT:    vmov.u16 r0, q0[3]
221; CHECK-NEXT:    vdup.16 q0, r0
222; CHECK-NEXT:    bx lr
223entry:
224  %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
225  ret <8 x half> %out
226}
227
228define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
229; CHECK-LABEL: vduplane_f64:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vmov.f32 s0, s2
232; CHECK-NEXT:    vmov.f32 s1, s3
233; CHECK-NEXT:    bx lr
234entry:
235  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
236  ret <2 x double> %out
237}
238
239
240define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
241; CHECK-LABEL: vdup_f32_extract:
242; CHECK:       @ %bb.0: @ %entry
243; CHECK-NEXT:    bx lr
244entry:
245  %srcbc = bitcast float %src to i32
246  %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
247  %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
248  %outbc = bitcast <4 x i32> %out to <4 x float>
249  %ext = extractelement <4 x float> %outbc, i32 2
250  ret float %ext
251}
252
253define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
254; CHECK-LABEL: vdup_f16_extract:
255; CHECK:       @ %bb.0: @ %entry
256; CHECK-NEXT:    vldr.16 s0, [r1]
257; CHECK-NEXT:    vldr.16 s2, [r0]
258; CHECK-NEXT:    vadd.f16 s0, s2, s0
259; CHECK-NEXT:    bx lr
260entry:
261  %0 = load half, half *%src1, align 2
262  %1 = load half, half *%src2, align 2
263  %2 = fadd half %0, %1
264  %bc = bitcast half %2 to i16
265  %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
266  %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
267  %outbc = bitcast <8 x i16> %out to <8 x half>
268  %ext = extractelement <8 x half> %outbc, i32 2
269  ret half %ext
270}
271