Lines Matching +full:- +full:i
1 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
2 ; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s
5 ; CHECK-LABEL: test_vmull_high_n_s16:
6 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
7 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
8 ; CHECK-NEXT: ret
10 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
11 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
12 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
13 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
14 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
15 …%vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %ve…
16 ret <4 x i32> %vmull15.i.i
20 ; CHECK-LABEL: test_vmull_high_n_s16_imm:
21 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
22 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
23 ; CHECK-NEXT: ret
25 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
26 …%vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i1…
27 ret <4 x i32> %vmull15.i.i
31 ; CHECK-LABEL: test_vmull_high_n_s32:
32 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
33 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
34 ; CHECK-NEXT: ret
36 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
37 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
38 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
39 …%vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vec…
40 ret <2 x i64> %vmull9.i.i
44 ; CHECK-LABEL: test_vmull_high_n_s32_imm:
45 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #1, msl #8
46 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
47 ; CHECK-NEXT: ret
49 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
50 …%vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32…
51 ret <2 x i64> %vmull9.i.i
55 ; CHECK-LABEL: test_vmull_high_n_u16:
56 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
57 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
58 ; CHECK-NEXT: ret
60 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
61 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
62 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
63 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
64 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
65 …%vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %ve…
66 ret <4 x i32> %vmull15.i.i
70 ; CHECK-LABEL: test_vmull_high_n_u16_imm:
71 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #17, lsl #8
72 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
73 ; CHECK-NEXT: ret
75 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
76 …%vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i1…
77 ret <4 x i32> %vmull15.i.i
81 ; CHECK-LABEL: test_vmull_high_n_u32:
82 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
83 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
84 ; CHECK-NEXT: ret
86 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
87 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
88 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
89 …%vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vec…
90 ret <2 x i64> %vmull9.i.i
94 ; CHECK-LABEL: test_vmull_high_n_u32_imm:
95 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #1, msl #8
96 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
97 ; CHECK-NEXT: ret
99 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
100 …%vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32…
101 ret <2 x i64> %vmull9.i.i
105 ; CHECK-LABEL: test_vqdmull_high_n_s16:
106 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
107 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
108 ; CHECK-NEXT: ret
110 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
111 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
112 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
113 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
114 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
115 …%vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16>…
116 ret <4 x i32> %vqdmull15.i.i
120 ; CHECK-LABEL: test_vqdmull_high_n_s16_imm:
121 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #17, lsl #8
122 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
123 ; CHECK-NEXT: ret
125 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
126 …%vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16>…
127 ret <4 x i32> %vqdmull15.i.i
131 ; CHECK-LABEL: test_vqdmull_high_n_s32:
132 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
133 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
134 ; CHECK-NEXT: ret
136 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
137 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
138 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
139 …%vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> …
140 ret <2 x i64> %vqdmull9.i.i
144 ; CHECK-LABEL: test_vqdmull_high_n_s32_imm:
145 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
146 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
147 ; CHECK-NEXT: ret
149 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
150 …%vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> …
151 ret <2 x i64> %vqdmull9.i.i
155 ; CHECK-LABEL: test_vmlal_high_n_s16:
156 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
157 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
158 ; CHECK-NEXT: ret
160 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
161 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
162 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
163 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
164 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
165 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %v…
166 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
167 ret <4 x i32> %add.i.i
171 ; CHECK-LABEL: test_vmlal_high_n_s16_imm:
172 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
173 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
174 ; CHECK-NEXT: ret
176 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
177 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i…
178 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
179 ret <4 x i32> %add.i.i
183 ; CHECK-LABEL: test_vmlal_high_n_s32:
184 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
185 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
186 ; CHECK-NEXT: ret
188 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
189 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
190 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
191 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %v…
192 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
193 ret <2 x i64> %add.i.i
197 ; CHECK-LABEL: test_vmlal_high_n_s32_imm:
198 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
199 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
200 ; CHECK-NEXT: ret
202 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
203 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i…
204 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
205 ret <2 x i64> %add.i.i
209 ; CHECK-LABEL: test_vmlal_high_n_u16:
210 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
211 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
212 ; CHECK-NEXT: ret
214 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
215 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
216 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
217 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
218 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
219 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %v…
220 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
221 ret <4 x i32> %add.i.i
225 ; CHECK-LABEL: test_vmlal_high_n_u16_imm:
226 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
227 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
228 ; CHECK-NEXT: ret
230 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
231 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i…
232 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
233 ret <4 x i32> %add.i.i
237 ; CHECK-LABEL: test_vmlal_high_n_u32:
238 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
239 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
240 ; CHECK-NEXT: ret
242 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
243 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
244 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
245 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %v…
246 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
247 ret <2 x i64> %add.i.i
251 ; CHECK-LABEL: test_vmlal_high_n_u32_imm:
252 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
253 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
254 ; CHECK-NEXT: ret
256 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
257 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i…
258 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
259 ret <2 x i64> %add.i.i
263 ; CHECK-LABEL: test_vqdmlal_high_n_s16:
264 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
265 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
266 ; CHECK-NEXT: ret
268 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
269 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
270 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
271 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
272 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
273 …%vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16>…
274 …%vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.…
275 ret <4 x i32> %vqdmlal17.i.i
279 ; CHECK-LABEL: test_vqdmlal_high_n_s16_imm:
280 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
281 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
282 ; CHECK-NEXT: ret
284 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
285 …%vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16>…
286 …%vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.…
287 ret <4 x i32> %vqdmlal17.i.i
291 ; CHECK-LABEL: test_vqdmlal_high_n_s32:
292 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
293 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
294 ; CHECK-NEXT: ret
296 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
297 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
298 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
299 …%vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> …
300 …%vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i…
301 ret <2 x i64> %vqdmlal11.i.i
305 ; CHECK-LABEL: test_vqdmlal_high_n_s32_imm:
306 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
307 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
308 ; CHECK-NEXT: ret
310 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
311 …%vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> …
312 …%vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i…
313 ret <2 x i64> %vqdmlal11.i.i
317 ; CHECK-LABEL: test_vmlsl_high_n_s16:
318 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
319 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
320 ; CHECK-NEXT: ret
322 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
323 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
324 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
325 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
326 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
327 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %v…
328 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
329 ret <4 x i32> %sub.i.i
333 ; CHECK-LABEL: test_vmlsl_high_n_s16_imm:
334 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
335 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
336 ; CHECK-NEXT: ret
338 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
339 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i…
340 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
341 ret <4 x i32> %sub.i.i
345 ; CHECK-LABEL: test_vmlsl_high_n_s32:
346 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
347 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
348 ; CHECK-NEXT: ret
350 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
351 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
352 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
353 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %v…
354 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
355 ret <2 x i64> %sub.i.i
359 ; CHECK-LABEL: test_vmlsl_high_n_s32_imm:
360 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
361 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
362 ; CHECK-NEXT: ret
364 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
365 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i…
366 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
367 ret <2 x i64> %sub.i.i
371 ; CHECK-LABEL: test_vmlsl_high_n_u16:
372 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
373 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
374 ; CHECK-NEXT: ret
376 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
377 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
378 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
379 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
380 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
381 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %v…
382 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
383 ret <4 x i32> %sub.i.i
387 ; CHECK-LABEL: test_vmlsl_high_n_u16_imm:
388 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
389 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
390 ; CHECK-NEXT: ret
392 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
393 …%vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i…
394 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
395 ret <4 x i32> %sub.i.i
399 ; CHECK-LABEL: test_vmlsl_high_n_u32:
400 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
401 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
402 ; CHECK-NEXT: ret
404 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
405 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
406 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
407 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %v…
408 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
409 ret <2 x i64> %sub.i.i
413 ; CHECK-LABEL: test_vmlsl_high_n_u32_imm:
414 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
415 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
416 ; CHECK-NEXT: ret
418 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
419 …%vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i…
420 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
421 ret <2 x i64> %sub.i.i
425 ; CHECK-LABEL: test_vqdmlsl_high_n_s16:
426 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
427 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
428 ; CHECK-NEXT: ret
430 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
431 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
432 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
433 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
434 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
435 …%vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16>…
436 …%vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.…
437 ret <4 x i32> %vqdmlsl17.i.i
441 ; CHECK-LABEL: test_vqdmlsl_high_n_s16_imm:
442 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
443 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
444 ; CHECK-NEXT: ret
446 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
447 …%vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16>…
448 …%vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.…
449 ret <4 x i32> %vqdmlsl17.i.i
453 ; CHECK-LABEL: test_vqdmlsl_high_n_s32:
454 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
455 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
456 ; CHECK-NEXT: ret
458 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
459 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
460 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
461 …%vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> …
462 …%vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i…
463 ret <2 x i64> %vqdmlsl11.i.i
467 ; CHECK-LABEL: test_vqdmlsl_high_n_s32_imm:
468 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
469 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
470 ; CHECK-NEXT: ret
472 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
473 …%vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> …
474 …%vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i…
475 ret <2 x i64> %vqdmlsl11.i.i
479 ; CHECK-LABEL: test_vmul_n_f32:
480 ; CHECK-NEXT: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
481 ; CHECK-NEXT: ret
483 %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
484 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
485 %mul.i = fmul <2 x float> %vecinit1.i, %a
486 ret <2 x float> %mul.i
490 ; CHECK-LABEL: test_vmulq_n_f32:
491 ; CHECK-NEXT: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
492 ; CHECK-NEXT: ret
494 %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
495 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
496 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
497 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
498 %mul.i = fmul <4 x float> %vecinit3.i, %a
499 ret <4 x float> %mul.i
503 ; CHECK-LABEL: test_vmulq_n_f64:
504 ; CHECK-NEXT: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
505 ; CHECK-NEXT: ret
507 %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
508 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
509 %mul.i = fmul <2 x double> %vecinit1.i, %a
510 ret <2 x double> %mul.i
514 ; CHECK-LABEL: test_vfma_n_f32:
515 ; CHECK-NEXT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
516 ; CHECK-NEXT: ret
518 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
519 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
520 %0 = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
525 ; CHECK-LABEL: test_vfmaq_n_f32:
526 ; CHECK-NEXT: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
527 ; CHECK-NEXT: ret
529 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
530 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
531 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
532 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
533 %0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
538 ; CHECK-LABEL: test_vfms_n_f32:
539 ; CHECK-NEXT: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
540 ; CHECK-NEXT: ret
542 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
543 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
544 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
545 %1 = call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
550 ; CHECK-LABEL: test_vfmsq_n_f32:
551 ; CHECK-NEXT: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
552 ; CHECK-NEXT: ret
554 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
555 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
556 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
557 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
558 …%0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000…
559 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)