• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
3
4; Same as vecreduce-fadd-legalization.ll, but without fmf.
5
6declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
7declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
8declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
9declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
10
11declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
12declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>)
13declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
14declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
15
16define half @test_v1f16(<1 x half> %a, half %s) nounwind {
17; CHECK-LABEL: test_v1f16:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    fcvt s0, h0
20; CHECK-NEXT:    fcvt s1, h1
21; CHECK-NEXT:    fadd s0, s1, s0
22; CHECK-NEXT:    fcvt h0, s0
23; CHECK-NEXT:    ret
24  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a)
25  ret half %b
26}
27
28define half @test_v1f16_neutral(<1 x half> %a) nounwind {
29; CHECK-LABEL: test_v1f16_neutral:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    ret
32  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
33  ret half %b
34}
35
36define float @test_v1f32(<1 x float> %a, float %s) nounwind {
37; CHECK-LABEL: test_v1f32:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
40; CHECK-NEXT:    fadd s0, s1, s0
41; CHECK-NEXT:    ret
42  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
43  ret float %b
44}
45
46define float @test_v1f32_neutral(<1 x float> %a) nounwind {
47; CHECK-LABEL: test_v1f32_neutral:
48; CHECK:       // %bb.0:
49; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
50; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
51; CHECK-NEXT:    ret
52  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
53  ret float %b
54}
55
56define double @test_v1f64(<1 x double> %a, double %s) nounwind {
57; CHECK-LABEL: test_v1f64:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    fadd d0, d1, d0
60; CHECK-NEXT:    ret
61  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a)
62  ret double %b
63}
64
65define double @test_v1f64_neutral(<1 x double> %a) nounwind {
66; CHECK-LABEL: test_v1f64_neutral:
67; CHECK:       // %bb.0:
68; CHECK-NEXT:    ret
69  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a)
70  ret double %b
71}
72
73define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind {
74; CHECK-LABEL: test_v1f128:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
77; CHECK-NEXT:    mov v2.16b, v0.16b
78; CHECK-NEXT:    mov v0.16b, v1.16b
79; CHECK-NEXT:    mov v1.16b, v2.16b
80; CHECK-NEXT:    bl __addtf3
81; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
82; CHECK-NEXT:    ret
83  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a)
84  ret fp128 %b
85}
86
87define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind {
88; CHECK-LABEL: test_v1f128_neutral:
89; CHECK:       // %bb.0:
90; CHECK-NEXT:    ret
91  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a)
92  ret fp128 %b
93}
94
95define float @test_v3f32(<3 x float> %a, float %s) nounwind {
96; CHECK-LABEL: test_v3f32:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    fadd s1, s1, s0
99; CHECK-NEXT:    mov s2, v0.s[1]
100; CHECK-NEXT:    fadd s1, s1, s2
101; CHECK-NEXT:    mov s0, v0.s[2]
102; CHECK-NEXT:    fadd s0, s1, s0
103; CHECK-NEXT:    ret
104  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a)
105  ret float %b
106}
107
108define float @test_v3f32_neutral(<3 x float> %a) nounwind {
109; CHECK-LABEL: test_v3f32_neutral:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    mov s1, v0.s[2]
112; CHECK-NEXT:    faddp s0, v0.2s
113; CHECK-NEXT:    fadd s0, s0, s1
114; CHECK-NEXT:    ret
115  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
116  ret float %b
117}
118
119define float @test_v5f32(<5 x float> %a, float %s) nounwind {
120; CHECK-LABEL: test_v5f32:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    fadd s0, s5, s0
123; CHECK-NEXT:    fadd s0, s0, s1
124; CHECK-NEXT:    fadd s0, s0, s2
125; CHECK-NEXT:    fadd s0, s0, s3
126; CHECK-NEXT:    fadd s0, s0, s4
127; CHECK-NEXT:    ret
128  %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a)
129  ret float %b
130}
131
132define float @test_v5f32_neutral(<5 x float> %a) nounwind {
133; CHECK-LABEL: test_v5f32_neutral:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    fadd s0, s0, s1
136; CHECK-NEXT:    fadd s0, s0, s2
137; CHECK-NEXT:    fadd s0, s0, s3
138; CHECK-NEXT:    fadd s0, s0, s4
139; CHECK-NEXT:    ret
140  %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
141  ret float %b
142}
143
144define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind {
145; CHECK-LABEL: test_v2f128:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    sub sp, sp, #32 // =32
148; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
149; CHECK-NEXT:    mov v1.16b, v0.16b
150; CHECK-NEXT:    mov v0.16b, v2.16b
151; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
152; CHECK-NEXT:    bl __addtf3
153; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
154; CHECK-NEXT:    bl __addtf3
155; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
156; CHECK-NEXT:    add sp, sp, #32 // =32
157; CHECK-NEXT:    ret
158  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a)
159  ret fp128 %b
160}
161
162define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind {
163; CHECK-LABEL: test_v2f128_neutral:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
166; CHECK-NEXT:    bl __addtf3
167; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
168; CHECK-NEXT:    ret
169  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
170  ret fp128 %b
171}
172
173define float @test_v16f32(<16 x float> %a, float %s) nounwind {
174; CHECK-LABEL: test_v16f32:
175; CHECK:       // %bb.0:
176; CHECK-NEXT:    mov s22, v0.s[3]
177; CHECK-NEXT:    mov s23, v0.s[2]
178; CHECK-NEXT:    mov s24, v0.s[1]
179; CHECK-NEXT:    fadd s0, s4, s0
180; CHECK-NEXT:    fadd s0, s0, s24
181; CHECK-NEXT:    fadd s0, s0, s23
182; CHECK-NEXT:    fadd s0, s0, s22
183; CHECK-NEXT:    mov s21, v1.s[1]
184; CHECK-NEXT:    fadd s0, s0, s1
185; CHECK-NEXT:    mov s20, v1.s[2]
186; CHECK-NEXT:    fadd s0, s0, s21
187; CHECK-NEXT:    mov s19, v1.s[3]
188; CHECK-NEXT:    fadd s0, s0, s20
189; CHECK-NEXT:    fadd s0, s0, s19
190; CHECK-NEXT:    mov s18, v2.s[1]
191; CHECK-NEXT:    fadd s0, s0, s2
192; CHECK-NEXT:    mov s17, v2.s[2]
193; CHECK-NEXT:    fadd s0, s0, s18
194; CHECK-NEXT:    mov s16, v2.s[3]
195; CHECK-NEXT:    fadd s0, s0, s17
196; CHECK-NEXT:    fadd s0, s0, s16
197; CHECK-NEXT:    mov s7, v3.s[1]
198; CHECK-NEXT:    fadd s0, s0, s3
199; CHECK-NEXT:    mov s6, v3.s[2]
200; CHECK-NEXT:    fadd s0, s0, s7
201; CHECK-NEXT:    mov s5, v3.s[3]
202; CHECK-NEXT:    fadd s0, s0, s6
203; CHECK-NEXT:    fadd s0, s0, s5
204; CHECK-NEXT:    ret
205  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a)
206  ret float %b
207}
208
209define float @test_v16f32_neutral(<16 x float> %a) nounwind {
210; CHECK-LABEL: test_v16f32_neutral:
211; CHECK:       // %bb.0:
212; CHECK-NEXT:    mov s21, v0.s[3]
213; CHECK-NEXT:    mov s22, v0.s[2]
214; CHECK-NEXT:    faddp s0, v0.2s
215; CHECK-NEXT:    fadd s0, s0, s22
216; CHECK-NEXT:    fadd s0, s0, s21
217; CHECK-NEXT:    mov s20, v1.s[1]
218; CHECK-NEXT:    fadd s0, s0, s1
219; CHECK-NEXT:    mov s19, v1.s[2]
220; CHECK-NEXT:    fadd s0, s0, s20
221; CHECK-NEXT:    mov s18, v1.s[3]
222; CHECK-NEXT:    fadd s0, s0, s19
223; CHECK-NEXT:    fadd s0, s0, s18
224; CHECK-NEXT:    mov s17, v2.s[1]
225; CHECK-NEXT:    fadd s0, s0, s2
226; CHECK-NEXT:    mov s16, v2.s[2]
227; CHECK-NEXT:    fadd s0, s0, s17
228; CHECK-NEXT:    mov s7, v2.s[3]
229; CHECK-NEXT:    fadd s0, s0, s16
230; CHECK-NEXT:    fadd s0, s0, s7
231; CHECK-NEXT:    mov s6, v3.s[1]
232; CHECK-NEXT:    fadd s0, s0, s3
233; CHECK-NEXT:    mov s5, v3.s[2]
234; CHECK-NEXT:    fadd s0, s0, s6
235; CHECK-NEXT:    mov s4, v3.s[3]
236; CHECK-NEXT:    fadd s0, s0, s5
237; CHECK-NEXT:    fadd s0, s0, s4
238; CHECK-NEXT:    ret
239  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)
240  ret float %b
241}
242