• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
2 
3 // Test new aarch64 intrinsics and types
4 
5 #include <arm_neon.h>
6 
7 // CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
8 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
9 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
10 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
11 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
12 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)13 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
14   return vmla_n_f32(a, b, c);
15 }
16 
17 // CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
18 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
19 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
20 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
21 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
22 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
23 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
24 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)25 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
26   return vmlaq_n_f32(a, b, c);
27 }
28 
29 // CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
30 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
31 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
32 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
33 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]]
34 // CHECK:   ret <2 x double> [[ADD_I]]
test_vmlaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)35 float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
36   return vmlaq_n_f64(a, b, c);
37 }
38 
39 // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
40 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
41 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
42 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
43 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
44 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
45 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
46 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)47 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
48   return vmlsq_n_f32(a, b, c);
49 }
50 
51 // CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
52 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
53 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
54 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
55 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
56 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)57 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
58   return vmls_n_f32(a, b, c);
59 }
60 
61 // CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
62 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
63 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
64 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
65 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]]
66 // CHECK:   ret <2 x double> [[SUB_I]]
test_vmlsq_n_f64(float64x2_t a,float64x2_t b,float64_t c)67 float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
68   return vmlsq_n_f64(a, b, c);
69 }
70 
71 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
72 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
73 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
74 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
75 // CHECK:   ret <2 x float> [[ADD]]
test_vmla_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)76 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
77   return vmla_lane_f32(a, b, v, 0);
78 }
79 
80 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
81 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
82 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
83 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
84 // CHECK:   ret <4 x float> [[ADD]]
test_vmlaq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)85 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
86   return vmlaq_lane_f32(a, b, v, 0);
87 }
88 
89 // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
90 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
91 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
92 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
93 // CHECK:   ret <2 x float> [[ADD]]
test_vmla_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)94 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
95   return vmla_laneq_f32(a, b, v, 0);
96 }
97 
98 // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
99 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
100 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
101 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
102 // CHECK:   ret <4 x float> [[ADD]]
test_vmlaq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)103 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
104   return vmlaq_laneq_f32(a, b, v, 0);
105 }
106 
107 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
108 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
109 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
110 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
111 // CHECK:   ret <2 x float> [[SUB]]
test_vmls_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)112 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
113   return vmls_lane_f32(a, b, v, 0);
114 }
115 
116 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
117 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
118 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
119 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
120 // CHECK:   ret <4 x float> [[SUB]]
test_vmlsq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)121 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
122   return vmlsq_lane_f32(a, b, v, 0);
123 }
124 
125 // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
126 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
127 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
128 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
129 // CHECK:   ret <2 x float> [[SUB]]
test_vmls_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)130 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
131   return vmls_laneq_f32(a, b, v, 0);
132 }
133 
134 // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
135 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
136 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
137 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
138 // CHECK:   ret <4 x float> [[SUB]]
test_vmlsq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)139 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
140   return vmlsq_laneq_f32(a, b, v, 0);
141 }
142 
143 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
144 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
145 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
146 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
147 // CHECK:   ret <2 x float> [[ADD]]
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)148 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
149   return vmla_lane_f32(a, b, v, 1);
150 }
151 
152 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
153 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
154 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
155 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
156 // CHECK:   ret <4 x float> [[ADD]]
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)157 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
158   return vmlaq_lane_f32(a, b, v, 1);
159 }
160 
161 // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
162 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
163 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
164 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
165 // CHECK:   ret <2 x float> [[ADD]]
test_vmla_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)166 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
167   return vmla_laneq_f32(a, b, v, 3);
168 }
169 
170 // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
171 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
172 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
173 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
174 // CHECK:   ret <4 x float> [[ADD]]
test_vmlaq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)175 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
176   return vmlaq_laneq_f32(a, b, v, 3);
177 }
178 
179 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
180 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
181 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
182 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
183 // CHECK:   ret <2 x float> [[SUB]]
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)184 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
185   return vmls_lane_f32(a, b, v, 1);
186 }
187 
188 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
189 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
190 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
191 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
192 // CHECK:   ret <4 x float> [[SUB]]
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)193 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
194   return vmlsq_lane_f32(a, b, v, 1);
195 }
196 // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
197 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
198 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
199 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
200 // CHECK:   ret <2 x float> [[SUB]]
test_vmls_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)201 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
202   return vmls_laneq_f32(a, b, v, 3);
203 }
204 
205 // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
206 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
207 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
208 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
209 // CHECK:   ret <4 x float> [[SUB]]
test_vmlsq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)210 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
211   return vmlsq_laneq_f32(a, b, v, 3);
212 }
213 
214 // CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
215 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
216 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
217 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
218 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
219 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
220 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
221 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
222 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
223 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2
224 // CHECK:   ret <2 x double> [[TMP6]]
test_vfmaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)225 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
226   return vfmaq_n_f64(a, b, c);
227 }
228 
229 // CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
230 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
231 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
232 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
233 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
234 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
235 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
236 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
237 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
238 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
239 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2
240 // CHECK:   ret <2 x double> [[TMP6]]
test_vfmsq_n_f64(float64x2_t a,float64x2_t b,float64_t c)241 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
242   return vfmsq_n_f64(a, b, c);
243 }
244