• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s
3
4; i32
5
6define void @vld2_v2i32(<4 x i32> *%src, <2 x i32> *%dst) {
7; CHECK-LABEL: vld2_v2i32:
8; CHECK:       @ %bb.0: @ %entry
9; CHECK-NEXT:    vldrw.u32 q0, [r0]
10; CHECK-NEXT:    vrev64.32 q1, q0
11; CHECK-NEXT:    vmov r2, s2
12; CHECK-NEXT:    vmov r0, s6
13; CHECK-NEXT:    vmov r3, s0
14; CHECK-NEXT:    add r0, r2
15; CHECK-NEXT:    vmov r2, s4
16; CHECK-NEXT:    add r2, r3
17; CHECK-NEXT:    strd r2, r0, [r1]
18; CHECK-NEXT:    bx lr
19entry:
20  %l1 = load <4 x i32>, <4 x i32>* %src, align 4
21  %s1 = shufflevector <4 x i32> %l1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
22  %s2 = shufflevector <4 x i32> %l1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
23  %a = add <2 x i32> %s1, %s2
24  store <2 x i32> %a, <2 x i32> *%dst
25  ret void
26}
27
28define void @vld2_v4i32(<8 x i32> *%src, <4 x i32> *%dst) {
29; CHECK-LABEL: vld2_v4i32:
30; CHECK:       @ %bb.0: @ %entry
31; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
32; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]
33; CHECK-NEXT:    vadd.i32 q0, q0, q1
34; CHECK-NEXT:    vstrw.32 q0, [r1]
35; CHECK-NEXT:    bx lr
36entry:
37  %l1 = load <8 x i32>, <8 x i32>* %src, align 4
38  %s1 = shufflevector <8 x i32> %l1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
39  %s2 = shufflevector <8 x i32> %l1, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
40  %a = add <4 x i32> %s1, %s2
41  store <4 x i32> %a, <4 x i32> *%dst
42  ret void
43}
44
45define void @vld2_v8i32(<16 x i32> *%src, <8 x i32> *%dst) {
46; CHECK-LABEL: vld2_v8i32:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
49; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]!
50; CHECK-NEXT:    vld20.32 {q2, q3}, [r0]
51; CHECK-NEXT:    vadd.i32 q0, q0, q1
52; CHECK-NEXT:    vld21.32 {q2, q3}, [r0]
53; CHECK-NEXT:    vstrw.32 q0, [r1]
54; CHECK-NEXT:    vadd.i32 q1, q2, q3
55; CHECK-NEXT:    vstrw.32 q1, [r1, #16]
56; CHECK-NEXT:    bx lr
57entry:
58  %l1 = load <16 x i32>, <16 x i32>* %src, align 4
59  %s1 = shufflevector <16 x i32> %l1, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
60  %s2 = shufflevector <16 x i32> %l1, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
61  %a = add <8 x i32> %s1, %s2
62  store <8 x i32> %a, <8 x i32> *%dst
63  ret void
64}
65
66define void @vld2_v16i32(<32 x i32> *%src, <16 x i32> *%dst) {
67; CHECK-LABEL: vld2_v16i32:
68; CHECK:       @ %bb.0: @ %entry
69; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
70; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
71; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
72; CHECK-NEXT:    add.w r2, r0, #96
73; CHECK-NEXT:    add.w r3, r0, #64
74; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]!
75; CHECK-NEXT:    vadd.i32 q0, q0, q1
76; CHECK-NEXT:    vld20.32 {q1, q2}, [r3]
77; CHECK-NEXT:    vld20.32 {q3, q4}, [r2]
78; CHECK-NEXT:    vld20.32 {q5, q6}, [r0]
79; CHECK-NEXT:    vld21.32 {q5, q6}, [r0]
80; CHECK-NEXT:    vld21.32 {q1, q2}, [r3]
81; CHECK-NEXT:    vld21.32 {q3, q4}, [r2]
82; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q1_q2
83; CHECK-NEXT:    vstrw.32 q0, [r1]
84; CHECK-NEXT:    vadd.i32 q5, q5, q6
85; CHECK-NEXT:    vadd.i32 q1, q1, q2
86; CHECK-NEXT:    vadd.i32 q3, q3, q4
87; CHECK-NEXT:    vstrw.32 q1, [r1, #32]
88; CHECK-NEXT:    vstrw.32 q3, [r1, #48]
89; CHECK-NEXT:    vstrw.32 q5, [r1, #16]
90; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
91; CHECK-NEXT:    bx lr
92entry:
93  %l1 = load <32 x i32>, <32 x i32>* %src, align 4
94  %s1 = shufflevector <32 x i32> %l1, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
95  %s2 = shufflevector <32 x i32> %l1, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
96  %a = add <16 x i32> %s1, %s2
97  store <16 x i32> %a, <16 x i32> *%dst
98  ret void
99}
100
101; i16
102
103define void @vld2_v2i16(<4 x i16> *%src, <2 x i16> *%dst) {
104; CHECK-LABEL: vld2_v2i16:
105; CHECK:       @ %bb.0: @ %entry
106; CHECK-NEXT:    vldrh.u32 q0, [r0]
107; CHECK-NEXT:    vrev64.32 q1, q0
108; CHECK-NEXT:    vmov r2, s2
109; CHECK-NEXT:    vmov r0, s6
110; CHECK-NEXT:    add r0, r2
111; CHECK-NEXT:    strh r0, [r1, #2]
112; CHECK-NEXT:    vmov r0, s4
113; CHECK-NEXT:    vmov r2, s0
114; CHECK-NEXT:    add r0, r2
115; CHECK-NEXT:    strh r0, [r1]
116; CHECK-NEXT:    bx lr
117entry:
118  %l1 = load <4 x i16>, <4 x i16>* %src, align 4
119  %s1 = shufflevector <4 x i16> %l1, <4 x i16> undef, <2 x i32> <i32 0, i32 2>
120  %s2 = shufflevector <4 x i16> %l1, <4 x i16> undef, <2 x i32> <i32 1, i32 3>
121  %a = add <2 x i16> %s1, %s2
122  store <2 x i16> %a, <2 x i16> *%dst
123  ret void
124}
125
126define void @vld2_v4i16(<8 x i16> *%src, <4 x i16> *%dst) {
127; CHECK-LABEL: vld2_v4i16:
128; CHECK:       @ %bb.0: @ %entry
129; CHECK-NEXT:    vldrw.u32 q0, [r0]
130; CHECK-NEXT:    vrev32.16 q1, q0
131; CHECK-NEXT:    vadd.i32 q0, q0, q1
132; CHECK-NEXT:    vstrh.32 q0, [r1]
133; CHECK-NEXT:    bx lr
134entry:
135  %l1 = load <8 x i16>, <8 x i16>* %src, align 4
136  %s1 = shufflevector <8 x i16> %l1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
137  %s2 = shufflevector <8 x i16> %l1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
138  %a = add <4 x i16> %s1, %s2
139  store <4 x i16> %a, <4 x i16> *%dst
140  ret void
141}
142
143define void @vld2_v8i16(<16 x i16> *%src, <8 x i16> *%dst) {
144; CHECK-LABEL: vld2_v8i16:
145; CHECK:       @ %bb.0: @ %entry
146; CHECK-NEXT:    vld20.16 {q0, q1}, [r0]
147; CHECK-NEXT:    vld21.16 {q0, q1}, [r0]
148; CHECK-NEXT:    vadd.i16 q0, q0, q1
149; CHECK-NEXT:    vstrw.32 q0, [r1]
150; CHECK-NEXT:    bx lr
151entry:
152  %l1 = load <16 x i16>, <16 x i16>* %src, align 4
153  %s1 = shufflevector <16 x i16> %l1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
154  %s2 = shufflevector <16 x i16> %l1, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
155  %a = add <8 x i16> %s1, %s2
156  store <8 x i16> %a, <8 x i16> *%dst
157  ret void
158}
159
160define void @vld2_v16i16(<32 x i16> *%src, <16 x i16> *%dst) {
161; CHECK-LABEL: vld2_v16i16:
162; CHECK:       @ %bb.0: @ %entry
163; CHECK-NEXT:    vld20.16 {q0, q1}, [r0]
164; CHECK-NEXT:    vld21.16 {q0, q1}, [r0]!
165; CHECK-NEXT:    vld20.16 {q2, q3}, [r0]
166; CHECK-NEXT:    vadd.i16 q0, q0, q1
167; CHECK-NEXT:    vld21.16 {q2, q3}, [r0]
168; CHECK-NEXT:    vstrw.32 q0, [r1]
169; CHECK-NEXT:    vadd.i16 q1, q2, q3
170; CHECK-NEXT:    vstrw.32 q1, [r1, #16]
171; CHECK-NEXT:    bx lr
172entry:
173  %l1 = load <32 x i16>, <32 x i16>* %src, align 4
174  %s1 = shufflevector <32 x i16> %l1, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
175  %s2 = shufflevector <32 x i16> %l1, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
176  %a = add <16 x i16> %s1, %s2
177  store <16 x i16> %a, <16 x i16> *%dst
178  ret void
179}
180
181; i8
182
183define void @vld2_v2i8(<4 x i8> *%src, <2 x i8> *%dst) {
184; CHECK-LABEL: vld2_v2i8:
185; CHECK:       @ %bb.0: @ %entry
186; CHECK-NEXT:    vldrb.u32 q0, [r0]
187; CHECK-NEXT:    vrev64.32 q1, q0
188; CHECK-NEXT:    vmov r2, s2
189; CHECK-NEXT:    vmov r0, s6
190; CHECK-NEXT:    add r0, r2
191; CHECK-NEXT:    strb r0, [r1, #1]
192; CHECK-NEXT:    vmov r0, s4
193; CHECK-NEXT:    vmov r2, s0
194; CHECK-NEXT:    add r0, r2
195; CHECK-NEXT:    strb r0, [r1]
196; CHECK-NEXT:    bx lr
197entry:
198  %l1 = load <4 x i8>, <4 x i8>* %src, align 4
199  %s1 = shufflevector <4 x i8> %l1, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
200  %s2 = shufflevector <4 x i8> %l1, <4 x i8> undef, <2 x i32> <i32 1, i32 3>
201  %a = add <2 x i8> %s1, %s2
202  store <2 x i8> %a, <2 x i8> *%dst
203  ret void
204}
205
206define void @vld2_v4i8(<8 x i8> *%src, <4 x i8> *%dst) {
207; CHECK-LABEL: vld2_v4i8:
208; CHECK:       @ %bb.0: @ %entry
209; CHECK-NEXT:    vldrb.u16 q0, [r0]
210; CHECK-NEXT:    vrev32.16 q1, q0
211; CHECK-NEXT:    vadd.i32 q0, q0, q1
212; CHECK-NEXT:    vstrb.32 q0, [r1]
213; CHECK-NEXT:    bx lr
214entry:
215  %l1 = load <8 x i8>, <8 x i8>* %src, align 4
216  %s1 = shufflevector <8 x i8> %l1, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
217  %s2 = shufflevector <8 x i8> %l1, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
218  %a = add <4 x i8> %s1, %s2
219  store <4 x i8> %a, <4 x i8> *%dst
220  ret void
221}
222
223define void @vld2_v8i8(<16 x i8> *%src, <8 x i8> *%dst) {
224; CHECK-LABEL: vld2_v8i8:
225; CHECK:       @ %bb.0: @ %entry
226; CHECK-NEXT:    vldrw.u32 q0, [r0]
227; CHECK-NEXT:    vrev16.8 q1, q0
228; CHECK-NEXT:    vadd.i16 q0, q0, q1
229; CHECK-NEXT:    vstrb.16 q0, [r1]
230; CHECK-NEXT:    bx lr
231entry:
232  %l1 = load <16 x i8>, <16 x i8>* %src, align 4
233  %s1 = shufflevector <16 x i8> %l1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
234  %s2 = shufflevector <16 x i8> %l1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
235  %a = add <8 x i8> %s1, %s2
236  store <8 x i8> %a, <8 x i8> *%dst
237  ret void
238}
239
240define void @vld2_v16i8(<32 x i8> *%src, <16 x i8> *%dst) {
241; CHECK-LABEL: vld2_v16i8:
242; CHECK:       @ %bb.0: @ %entry
243; CHECK-NEXT:    vld20.8 {q0, q1}, [r0]
244; CHECK-NEXT:    vld21.8 {q0, q1}, [r0]
245; CHECK-NEXT:    vadd.i8 q0, q0, q1
246; CHECK-NEXT:    vstrw.32 q0, [r1]
247; CHECK-NEXT:    bx lr
248entry:
249  %l1 = load <32 x i8>, <32 x i8>* %src, align 4
250  %s1 = shufflevector <32 x i8> %l1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
251  %s2 = shufflevector <32 x i8> %l1, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
252  %a = add <16 x i8> %s1, %s2
253  store <16 x i8> %a, <16 x i8> *%dst
254  ret void
255}
256
257; i64
258
259define void @vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
260; CHECK-LABEL: vld2_v2i64:
261; CHECK:       @ %bb.0: @ %entry
262; CHECK-NEXT:    .save {r4, lr}
263; CHECK-NEXT:    push {r4, lr}
264; CHECK-NEXT:    vldrw.u32 q0, [r0]
265; CHECK-NEXT:    vldrw.u32 q2, [r0, #16]
266; CHECK-NEXT:    vmov.f64 d2, d1
267; CHECK-NEXT:    vmov.f32 s5, s3
268; CHECK-NEXT:    vmov.f32 s6, s10
269; CHECK-NEXT:    vmov.f32 s2, s8
270; CHECK-NEXT:    vmov.f32 s3, s9
271; CHECK-NEXT:    vmov.f32 s7, s11
272; CHECK-NEXT:    vmov r3, s6
273; CHECK-NEXT:    vmov r0, s2
274; CHECK-NEXT:    vmov r4, s0
275; CHECK-NEXT:    vmov r2, s3
276; CHECK-NEXT:    vmov r12, s7
277; CHECK-NEXT:    adds.w lr, r0, r3
278; CHECK-NEXT:    vmov r0, s4
279; CHECK-NEXT:    vmov r3, s5
280; CHECK-NEXT:    adc.w r12, r12, r2
281; CHECK-NEXT:    vmov r2, s1
282; CHECK-NEXT:    adds r0, r0, r4
283; CHECK-NEXT:    vmov.32 q0[0], r0
284; CHECK-NEXT:    adcs r2, r3
285; CHECK-NEXT:    vmov.32 q0[1], r2
286; CHECK-NEXT:    vmov.32 q0[2], lr
287; CHECK-NEXT:    vmov.32 q0[3], r12
288; CHECK-NEXT:    vstrw.32 q0, [r1]
289; CHECK-NEXT:    pop {r4, pc}
290entry:
291  %l1 = load <4 x i64>, <4 x i64>* %src, align 4
292  %s1 = shufflevector <4 x i64> %l1, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
293  %s2 = shufflevector <4 x i64> %l1, <4 x i64> undef, <2 x i32> <i32 1, i32 3>
294  %a = add <2 x i64> %s1, %s2
295  store <2 x i64> %a, <2 x i64> *%dst
296  ret void
297}
298
299define void @vld2_v4i64(<8 x i64> *%src, <4 x i64> *%dst) {
300; CHECK-LABEL: vld2_v4i64:
301; CHECK:       @ %bb.0: @ %entry
302; CHECK-NEXT:    .save {r4, lr}
303; CHECK-NEXT:    push {r4, lr}
304; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
305; CHECK-NEXT:    vpush {d8, d9, d10, d11}
306; CHECK-NEXT:    vldrw.u32 q3, [r0, #32]
307; CHECK-NEXT:    vldrw.u32 q5, [r0, #48]
308; CHECK-NEXT:    vldrw.u32 q0, [r0]
309; CHECK-NEXT:    vldrw.u32 q2, [r0, #16]
310; CHECK-NEXT:    vmov.f64 d8, d7
311; CHECK-NEXT:    vmov.f32 s17, s15
312; CHECK-NEXT:    vmov.f32 s18, s22
313; CHECK-NEXT:    vmov.f32 s14, s20
314; CHECK-NEXT:    vmov.f32 s15, s21
315; CHECK-NEXT:    vmov.f32 s19, s23
316; CHECK-NEXT:    vmov r3, s18
317; CHECK-NEXT:    vmov r0, s14
318; CHECK-NEXT:    vmov r4, s12
319; CHECK-NEXT:    vmov.f64 d2, d1
320; CHECK-NEXT:    vmov r12, s19
321; CHECK-NEXT:    vmov r2, s15
322; CHECK-NEXT:    vmov.f32 s5, s3
323; CHECK-NEXT:    vmov.f32 s6, s10
324; CHECK-NEXT:    vmov.f32 s2, s8
325; CHECK-NEXT:    vmov.f32 s7, s11
326; CHECK-NEXT:    vmov.f32 s3, s9
327; CHECK-NEXT:    adds.w lr, r0, r3
328; CHECK-NEXT:    vmov r0, s16
329; CHECK-NEXT:    vmov r3, s17
330; CHECK-NEXT:    adc.w r12, r12, r2
331; CHECK-NEXT:    vmov r2, s13
332; CHECK-NEXT:    adds r0, r0, r4
333; CHECK-NEXT:    vmov r4, s2
334; CHECK-NEXT:    vmov.32 q3[0], r0
335; CHECK-NEXT:    vmov r0, s7
336; CHECK-NEXT:    adcs r2, r3
337; CHECK-NEXT:    vmov r3, s6
338; CHECK-NEXT:    vmov.32 q3[1], r2
339; CHECK-NEXT:    vmov r2, s3
340; CHECK-NEXT:    vmov.32 q3[2], lr
341; CHECK-NEXT:    vmov.32 q3[3], r12
342; CHECK-NEXT:    vstrw.32 q3, [r1, #16]
343; CHECK-NEXT:    adds.w lr, r4, r3
344; CHECK-NEXT:    vmov r3, s0
345; CHECK-NEXT:    adc.w r12, r2, r0
346; CHECK-NEXT:    vmov r0, s4
347; CHECK-NEXT:    vmov r2, s5
348; CHECK-NEXT:    vmov r4, s1
349; CHECK-NEXT:    adds r0, r0, r3
350; CHECK-NEXT:    vmov.32 q0[0], r0
351; CHECK-NEXT:    adcs r2, r4
352; CHECK-NEXT:    vmov.32 q0[1], r2
353; CHECK-NEXT:    vmov.32 q0[2], lr
354; CHECK-NEXT:    vmov.32 q0[3], r12
355; CHECK-NEXT:    vstrw.32 q0, [r1]
356; CHECK-NEXT:    vpop {d8, d9, d10, d11}
357; CHECK-NEXT:    pop {r4, pc}
358entry:
359  %l1 = load <8 x i64>, <8 x i64>* %src, align 4
360  %s1 = shufflevector <8 x i64> %l1, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
361  %s2 = shufflevector <8 x i64> %l1, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
362  %a = add <4 x i64> %s1, %s2
363  store <4 x i64> %a, <4 x i64> *%dst
364  ret void
365}
366
367; f32
368
369define void @vld2_v2f32(<4 x float> *%src, <2 x float> *%dst) {
370; CHECK-LABEL: vld2_v2f32:
371; CHECK:       @ %bb.0: @ %entry
372; CHECK-NEXT:    vldrw.u32 q0, [r0]
373; CHECK-NEXT:    vmov.f32 s4, s1
374; CHECK-NEXT:    vmov.f32 s5, s3
375; CHECK-NEXT:    vmov.f32 s1, s2
376; CHECK-NEXT:    vadd.f32 q0, q0, q1
377; CHECK-NEXT:    vstmia r1, {s0, s1}
378; CHECK-NEXT:    bx lr
379entry:
380  %l1 = load <4 x float>, <4 x float>* %src, align 4
381  %s1 = shufflevector <4 x float> %l1, <4 x float> undef, <2 x i32> <i32 0, i32 2>
382  %s2 = shufflevector <4 x float> %l1, <4 x float> undef, <2 x i32> <i32 1, i32 3>
383  %a = fadd <2 x float> %s1, %s2
384  store <2 x float> %a, <2 x float> *%dst
385  ret void
386}
387
388define void @vld2_v4f32(<8 x float> *%src, <4 x float> *%dst) {
389; CHECK-LABEL: vld2_v4f32:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
392; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]
393; CHECK-NEXT:    vadd.f32 q0, q0, q1
394; CHECK-NEXT:    vstrw.32 q0, [r1]
395; CHECK-NEXT:    bx lr
396entry:
397  %l1 = load <8 x float>, <8 x float>* %src, align 4
398  %s1 = shufflevector <8 x float> %l1, <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
399  %s2 = shufflevector <8 x float> %l1, <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
400  %a = fadd <4 x float> %s1, %s2
401  store <4 x float> %a, <4 x float> *%dst
402  ret void
403}
404
405define void @vld2_v8f32(<16 x float> *%src, <8 x float> *%dst) {
406; CHECK-LABEL: vld2_v8f32:
407; CHECK:       @ %bb.0: @ %entry
408; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
409; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]!
410; CHECK-NEXT:    vld20.32 {q2, q3}, [r0]
411; CHECK-NEXT:    vadd.f32 q0, q0, q1
412; CHECK-NEXT:    vld21.32 {q2, q3}, [r0]
413; CHECK-NEXT:    vstrw.32 q0, [r1]
414; CHECK-NEXT:    vadd.f32 q1, q2, q3
415; CHECK-NEXT:    vstrw.32 q1, [r1, #16]
416; CHECK-NEXT:    bx lr
417entry:
418  %l1 = load <16 x float>, <16 x float>* %src, align 4
419  %s1 = shufflevector <16 x float> %l1, <16 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
420  %s2 = shufflevector <16 x float> %l1, <16 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
421  %a = fadd <8 x float> %s1, %s2
422  store <8 x float> %a, <8 x float> *%dst
423  ret void
424}
425
426define void @vld2_v16f32(<32 x float> *%src, <16 x float> *%dst) {
427; CHECK-LABEL: vld2_v16f32:
428; CHECK:       @ %bb.0: @ %entry
429; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
430; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
431; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
432; CHECK-NEXT:    add.w r2, r0, #96
433; CHECK-NEXT:    add.w r3, r0, #64
434; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]!
435; CHECK-NEXT:    vadd.f32 q0, q0, q1
436; CHECK-NEXT:    vld20.32 {q1, q2}, [r3]
437; CHECK-NEXT:    vld20.32 {q3, q4}, [r2]
438; CHECK-NEXT:    vld20.32 {q5, q6}, [r0]
439; CHECK-NEXT:    vld21.32 {q5, q6}, [r0]
440; CHECK-NEXT:    vld21.32 {q1, q2}, [r3]
441; CHECK-NEXT:    vld21.32 {q3, q4}, [r2]
442; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q1_q2
443; CHECK-NEXT:    vstrw.32 q0, [r1]
444; CHECK-NEXT:    vadd.f32 q5, q5, q6
445; CHECK-NEXT:    vadd.f32 q1, q1, q2
446; CHECK-NEXT:    vadd.f32 q3, q3, q4
447; CHECK-NEXT:    vstrw.32 q1, [r1, #32]
448; CHECK-NEXT:    vstrw.32 q3, [r1, #48]
449; CHECK-NEXT:    vstrw.32 q5, [r1, #16]
450; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
451; CHECK-NEXT:    bx lr
452entry:
453  %l1 = load <32 x float>, <32 x float>* %src, align 4
454  %s1 = shufflevector <32 x float> %l1, <32 x float> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
455  %s2 = shufflevector <32 x float> %l1, <32 x float> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
456  %a = fadd <16 x float> %s1, %s2
457  store <16 x float> %a, <16 x float> *%dst
458  ret void
459}
460
461; f16
462
463define void @vld2_v2f16(<4 x half> *%src, <2 x half> *%dst) {
464; CHECK-LABEL: vld2_v2f16:
465; CHECK:       @ %bb.0: @ %entry
466; CHECK-NEXT:    ldrd r2, r0, [r0]
467; CHECK-NEXT:    vmov.32 q0[0], r2
468; CHECK-NEXT:    vmov.32 q0[1], r0
469; CHECK-NEXT:    vmovx.f16 s4, s1
470; CHECK-NEXT:    vmov r0, s4
471; CHECK-NEXT:    vmovx.f16 s4, s0
472; CHECK-NEXT:    vmov r2, s4
473; CHECK-NEXT:    vmov.16 q1[0], r2
474; CHECK-NEXT:    vmov r2, s0
475; CHECK-NEXT:    vmov.16 q1[1], r0
476; CHECK-NEXT:    vmov r0, s1
477; CHECK-NEXT:    vmov.16 q0[0], r2
478; CHECK-NEXT:    vmov.16 q0[1], r0
479; CHECK-NEXT:    vadd.f16 q0, q0, q1
480; CHECK-NEXT:    vmov r0, s0
481; CHECK-NEXT:    str r0, [r1]
482; CHECK-NEXT:    bx lr
483entry:
484  %l1 = load <4 x half>, <4 x half>* %src, align 4
485  %s1 = shufflevector <4 x half> %l1, <4 x half> undef, <2 x i32> <i32 0, i32 2>
486  %s2 = shufflevector <4 x half> %l1, <4 x half> undef, <2 x i32> <i32 1, i32 3>
487  %a = fadd <2 x half> %s1, %s2
488  store <2 x half> %a, <2 x half> *%dst
489  ret void
490}
491
492define void @vld2_v4f16(<8 x half> *%src, <4 x half> *%dst) {
493; CHECK-LABEL: vld2_v4f16:
494; CHECK:       @ %bb.0: @ %entry
495; CHECK-NEXT:    vldrw.u32 q0, [r0]
496; CHECK-NEXT:    vmov r2, s0
497; CHECK-NEXT:    vmovx.f16 s8, s0
498; CHECK-NEXT:    vmov r0, s1
499; CHECK-NEXT:    vmov.16 q1[0], r2
500; CHECK-NEXT:    vmov.16 q1[1], r0
501; CHECK-NEXT:    vmov r0, s2
502; CHECK-NEXT:    vmov.16 q1[2], r0
503; CHECK-NEXT:    vmov r0, s8
504; CHECK-NEXT:    vmovx.f16 s8, s1
505; CHECK-NEXT:    vmovx.f16 s12, s2
506; CHECK-NEXT:    vmov r2, s8
507; CHECK-NEXT:    vmov.16 q2[0], r0
508; CHECK-NEXT:    vmov r0, s12
509; CHECK-NEXT:    vmov.16 q2[1], r2
510; CHECK-NEXT:    vmovx.f16 s12, s3
511; CHECK-NEXT:    vmov.16 q2[2], r0
512; CHECK-NEXT:    vmov r0, s12
513; CHECK-NEXT:    vmov.16 q2[3], r0
514; CHECK-NEXT:    vmov r0, s3
515; CHECK-NEXT:    vmov.16 q1[3], r0
516; CHECK-NEXT:    vadd.f16 q0, q1, q2
517; CHECK-NEXT:    vmov r2, s1
518; CHECK-NEXT:    vmov r0, s0
519; CHECK-NEXT:    strd r0, r2, [r1]
520; CHECK-NEXT:    bx lr
521entry:
522  %l1 = load <8 x half>, <8 x half>* %src, align 4
523  %s1 = shufflevector <8 x half> %l1, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
524  %s2 = shufflevector <8 x half> %l1, <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
525  %a = fadd <4 x half> %s1, %s2
526  store <4 x half> %a, <4 x half> *%dst
527  ret void
528}
529
530define void @vld2_v8f16(<16 x half> *%src, <8 x half> *%dst) {
531; CHECK-LABEL: vld2_v8f16:
532; CHECK:       @ %bb.0: @ %entry
533; CHECK-NEXT:    vld20.16 {q0, q1}, [r0]
534; CHECK-NEXT:    vld21.16 {q0, q1}, [r0]
535; CHECK-NEXT:    vadd.f16 q0, q0, q1
536; CHECK-NEXT:    vstrw.32 q0, [r1]
537; CHECK-NEXT:    bx lr
538entry:
539  %l1 = load <16 x half>, <16 x half>* %src, align 4
540  %s1 = shufflevector <16 x half> %l1, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
541  %s2 = shufflevector <16 x half> %l1, <16 x half> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
542  %a = fadd <8 x half> %s1, %s2
543  store <8 x half> %a, <8 x half> *%dst
544  ret void
545}
546
547define void @vld2_v16f16(<32 x half> *%src, <16 x half> *%dst) {
548; CHECK-LABEL: vld2_v16f16:
549; CHECK:       @ %bb.0: @ %entry
550; CHECK-NEXT:    vld20.16 {q0, q1}, [r0]
551; CHECK-NEXT:    vld21.16 {q0, q1}, [r0]!
552; CHECK-NEXT:    vld20.16 {q2, q3}, [r0]
553; CHECK-NEXT:    vadd.f16 q0, q0, q1
554; CHECK-NEXT:    vld21.16 {q2, q3}, [r0]
555; CHECK-NEXT:    vstrw.32 q0, [r1]
556; CHECK-NEXT:    vadd.f16 q2, q2, q3
557; CHECK-NEXT:    vstrw.32 q2, [r1, #16]
558; CHECK-NEXT:    bx lr
559entry:
560  %l1 = load <32 x half>, <32 x half>* %src, align 4
561  %s1 = shufflevector <32 x half> %l1, <32 x half> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
562  %s2 = shufflevector <32 x half> %l1, <32 x half> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
563  %a = fadd <16 x half> %s1, %s2
564  store <16 x half> %a, <16 x half> *%dst
565  ret void
566}
567
568; f64
569
570define void @vld2_v2f64(<4 x double> *%src, <2 x double> *%dst) {
571; CHECK-LABEL: vld2_v2f64:
572; CHECK:       @ %bb.0: @ %entry
573; CHECK-NEXT:    vldrw.u32 q0, [r0, #16]
574; CHECK-NEXT:    vldrw.u32 q1, [r0]
575; CHECK-NEXT:    vadd.f64 d1, d0, d1
576; CHECK-NEXT:    vadd.f64 d0, d2, d3
577; CHECK-NEXT:    vstrw.32 q0, [r1]
578; CHECK-NEXT:    bx lr
579entry:
580  %l1 = load <4 x double>, <4 x double>* %src, align 4
581  %s1 = shufflevector <4 x double> %l1, <4 x double> undef, <2 x i32> <i32 0, i32 2>
582  %s2 = shufflevector <4 x double> %l1, <4 x double> undef, <2 x i32> <i32 1, i32 3>
583  %a = fadd <2 x double> %s1, %s2
584  store <2 x double> %a, <2 x double> *%dst
585  ret void
586}
587
588define void @vld2_v4f64(<8 x double> *%src, <4 x double> *%dst) {
589; CHECK-LABEL: vld2_v4f64:
590; CHECK:       @ %bb.0: @ %entry
591; CHECK-NEXT:    vldrw.u32 q0, [r0, #48]
592; CHECK-NEXT:    vldrw.u32 q1, [r0, #32]
593; CHECK-NEXT:    vldrw.u32 q2, [r0]
594; CHECK-NEXT:    vadd.f64 d1, d0, d1
595; CHECK-NEXT:    vadd.f64 d0, d2, d3
596; CHECK-NEXT:    vldrw.u32 q1, [r0, #16]
597; CHECK-NEXT:    vadd.f64 d3, d2, d3
598; CHECK-NEXT:    vstrw.32 q0, [r1, #16]
599; CHECK-NEXT:    vadd.f64 d2, d4, d5
600; CHECK-NEXT:    vstrw.32 q1, [r1]
601; CHECK-NEXT:    bx lr
602entry:
603  %l1 = load <8 x double>, <8 x double>* %src, align 4
604  %s1 = shufflevector <8 x double> %l1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
605  %s2 = shufflevector <8 x double> %l1, <8 x double> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
606  %a = fadd <4 x double> %s1, %s2
607  store <4 x double> %a, <4 x double> *%dst
608  ret void
609}
610