• Home
  • Raw
  • Download

Lines Matching full:x

3 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
6 %tmp1 = load <8 x i8>, <8 x i8>* %A
7 %tmp2 = load <8 x i8>, <8 x i8>* %B
8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
9 ret <8 x i8> %tmp3
12 define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
15 %tmp1 = load <16 x i8>, <16 x i8>* %A
16 %tmp2 = load <16 x i8>, <16 x i8>* %B
17 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
18 ret <16 x i8> %tmp3
21 define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
24 %tmp1 = load <4 x i16>, <4 x i16>* %A
25 %tmp2 = load <4 x i16>, <4 x i16>* %B
26 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
27 ret <4 x i16> %tmp3
30 define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
33 %tmp1 = load <8 x i16>, <8 x i16>* %A
34 %tmp2 = load <8 x i16>, <8 x i16>* %B
35 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
36 ret <8 x i16> %tmp3
39 define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
42 %tmp1 = load <2 x i32>, <2 x i32>* %A
43 %tmp2 = load <2 x i32>, <2 x i32>* %B
44 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
45 ret <2 x i32> %tmp3
48 define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
51 %tmp1 = load <4 x i32>, <4 x i32>* %A
52 %tmp2 = load <4 x i32>, <4 x i32>* %B
53 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
54 ret <4 x i32> %tmp3
57 declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
58 declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
59 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
60 declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
61 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
62 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
64 define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
67 %tmp1 = load <8 x i8>, <8 x i8>* %A
68 %tmp2 = load <8 x i8>, <8 x i8>* %B
69 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
70 ret <8 x i8> %tmp3
73 define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
76 %tmp1 = load <16 x i8>, <16 x i8>* %A
77 %tmp2 = load <16 x i8>, <16 x i8>* %B
78 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
79 ret <16 x i8> %tmp3
82 define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
85 %tmp1 = load <4 x i16>, <4 x i16>* %A
86 %tmp2 = load <4 x i16>, <4 x i16>* %B
87 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
88 ret <4 x i16> %tmp3
91 define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
94 %tmp1 = load <8 x i16>, <8 x i16>* %A
95 %tmp2 = load <8 x i16>, <8 x i16>* %B
96 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
97 ret <8 x i16> %tmp3
100 define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
103 %tmp1 = load <2 x i32>, <2 x i32>* %A
104 %tmp2 = load <2 x i32>, <2 x i32>* %B
105 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
106 ret <2 x i32> %tmp3
109 define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
112 %tmp1 = load <4 x i32>, <4 x i32>* %A
113 %tmp2 = load <4 x i32>, <4 x i32>* %B
114 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
115 ret <4 x i32> %tmp3
118 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
119 declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
120 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
121 declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
122 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
123 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
125 define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
128 %tmp1 = load <8 x i8>, <8 x i8>* %A
129 %tmp2 = load <8 x i8>, <8 x i8>* %B
130 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
131 ret <8 x i8> %tmp3
134 define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
137 %tmp1 = load <16 x i8>, <16 x i8>* %A
138 %tmp2 = load <16 x i8>, <16 x i8>* %B
139 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
140 ret <16 x i8> %tmp3
143 define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
146 %tmp1 = load <4 x i16>, <4 x i16>* %A
147 %tmp2 = load <4 x i16>, <4 x i16>* %B
148 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
149 ret <4 x i16> %tmp3
152 define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
155 %tmp1 = load <8 x i16>, <8 x i16>* %A
156 %tmp2 = load <8 x i16>, <8 x i16>* %B
157 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
158 ret <8 x i16> %tmp3
161 define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
164 %tmp1 = load <2 x i32>, <2 x i32>* %A
165 %tmp2 = load <2 x i32>, <2 x i32>* %B
166 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
167 ret <2 x i32> %tmp3
170 define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
173 %tmp1 = load <4 x i32>, <4 x i32>* %A
174 %tmp2 = load <4 x i32>, <4 x i32>* %B
175 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
176 ret <4 x i32> %tmp3
179 declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
180 declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
181 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
182 declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
183 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
184 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
186 define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
189 %tmp1 = load <8 x i8>, <8 x i8>* %A
190 %tmp2 = load <8 x i8>, <8 x i8>* %B
191 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
192 ret <8 x i8> %tmp3
195 define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
198 %tmp1 = load <16 x i8>, <16 x i8>* %A
199 %tmp2 = load <16 x i8>, <16 x i8>* %B
200 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
201 ret <16 x i8> %tmp3
204 define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
207 %tmp1 = load <4 x i16>, <4 x i16>* %A
208 %tmp2 = load <4 x i16>, <4 x i16>* %B
209 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
210 ret <4 x i16> %tmp3
213 define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
216 %tmp1 = load <8 x i16>, <8 x i16>* %A
217 %tmp2 = load <8 x i16>, <8 x i16>* %B
218 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
219 ret <8 x i16> %tmp3
222 define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
225 %tmp1 = load <2 x i32>, <2 x i32>* %A
226 %tmp2 = load <2 x i32>, <2 x i32>* %B
227 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
228 ret <2 x i32> %tmp3
231 define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
234 %tmp1 = load <4 x i32>, <4 x i32>* %A
235 %tmp2 = load <4 x i32>, <4 x i32>* %B
236 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
237 ret <4 x i32> %tmp3
240 declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
241 declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
242 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
243 declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
244 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
245 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
249 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
252 %tmp1 = load <8 x i8>, <8 x i8>* %A
253 %tmp2 = load <8 x i8>, <8 x i8>* %B
254 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
255 ret <8 x i8> %tmp3
258 define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
261 %tmp1 = load <16 x i8>, <16 x i8>* %A
262 %tmp2 = load <16 x i8>, <16 x i8>* %B
263 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
264 ret <16 x i8> %tmp3
267 define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
270 %tmp1 = load <4 x i16>, <4 x i16>* %A
271 %tmp2 = load <4 x i16>, <4 x i16>* %B
272 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
273 ret <4 x i16> %tmp3
276 define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
279 %tmp1 = load <8 x i16>, <8 x i16>* %A
280 %tmp2 = load <8 x i16>, <8 x i16>* %B
281 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
282 ret <8 x i16> %tmp3
285 define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
288 %tmp1 = load <2 x i32>, <2 x i32>* %A
289 %tmp2 = load <2 x i32>, <2 x i32>* %B
290 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
291 ret <2 x i32> %tmp3
294 define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
297 %tmp1 = load <4 x i32>, <4 x i32>* %A
298 %tmp2 = load <4 x i32>, <4 x i32>* %B
299 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
300 ret <4 x i32> %tmp3
303 declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
304 declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
305 declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
306 declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
307 declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
308 declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
310 define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
313 %tmp1 = load <8 x i8>, <8 x i8>* %A
314 %tmp2 = load <8 x i8>, <8 x i8>* %B
315 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
316 ret <8 x i8> %tmp3
319 define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
322 %tmp1 = load <16 x i8>, <16 x i8>* %A
323 %tmp2 = load <16 x i8>, <16 x i8>* %B
324 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
325 ret <16 x i8> %tmp3
328 define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
331 %tmp1 = load <4 x i16>, <4 x i16>* %A
332 %tmp2 = load <4 x i16>, <4 x i16>* %B
333 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
334 ret <4 x i16> %tmp3
337 define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
340 %tmp1 = load <8 x i16>, <8 x i16>* %A
341 %tmp2 = load <8 x i16>, <8 x i16>* %B
342 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
343 ret <8 x i16> %tmp3
346 define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
349 %tmp1 = load <2 x i32>, <2 x i32>* %A
350 %tmp2 = load <2 x i32>, <2 x i32>* %B
351 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
352 ret <2 x i32> %tmp3
355 define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
358 %tmp1 = load <4 x i32>, <4 x i32>* %A
359 %tmp2 = load <4 x i32>, <4 x i32>* %B
360 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
361 ret <4 x i32> %tmp3
364 declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
365 declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
366 declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
367 declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
368 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
369 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
373 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
376 %tmp1 = load <8 x i8>, <8 x i8>* %A
377 %tmp2 = load <8 x i8>, <8 x i8>* %B
378 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
379 ret <8 x i8> %tmp3
382 define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
385 %tmp1 = load <16 x i8>, <16 x i8>* %A
386 %tmp2 = load <16 x i8>, <16 x i8>* %B
387 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
388 ret <16 x i8> %tmp3
391 define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
394 %tmp1 = load <4 x i16>, <4 x i16>* %A
395 %tmp2 = load <4 x i16>, <4 x i16>* %B
396 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
397 ret <4 x i16> %tmp3
400 define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
403 %tmp1 = load <8 x i16>, <8 x i16>* %A
404 %tmp2 = load <8 x i16>, <8 x i16>* %B
405 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
406 ret <8 x i16> %tmp3
409 define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
412 %tmp1 = load <2 x i32>, <2 x i32>* %A
413 %tmp2 = load <2 x i32>, <2 x i32>* %B
414 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
415 ret <2 x i32> %tmp3
418 define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
421 %tmp1 = load <4 x i32>, <4 x i32>* %A
422 %tmp2 = load <4 x i32>, <4 x i32>* %B
423 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
424 ret <4 x i32> %tmp3
427 declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
428 declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
429 declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
430 declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
431 declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
432 declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
434 define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
437 %tmp1 = load <8 x i8>, <8 x i8>* %A
438 %tmp2 = load <8 x i8>, <8 x i8>* %B
439 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
440 ret <8 x i8> %tmp3
443 define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
446 %tmp1 = load <16 x i8>, <16 x i8>* %A
447 %tmp2 = load <16 x i8>, <16 x i8>* %B
448 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
449 ret <16 x i8> %tmp3
452 define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
455 %tmp1 = load <4 x i16>, <4 x i16>* %A
456 %tmp2 = load <4 x i16>, <4 x i16>* %B
457 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
458 ret <4 x i16> %tmp3
461 define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
464 %tmp1 = load <8 x i16>, <8 x i16>* %A
465 %tmp2 = load <8 x i16>, <8 x i16>* %B
466 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
467 ret <8 x i16> %tmp3
470 define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
473 %tmp1 = load <2 x i32>, <2 x i32>* %A
474 %tmp2 = load <2 x i32>, <2 x i32>* %B
475 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
476 ret <2 x i32> %tmp3
479 define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
482 %tmp1 = load <4 x i32>, <4 x i32>* %A
483 %tmp2 = load <4 x i32>, <4 x i32>* %B
484 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
485 ret <4 x i32> %tmp3
488 declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
489 declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
490 declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
491 declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
492 declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
493 declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
495 define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
498 %tmp1 = load <2 x float>, <2 x float>* %A
499 %tmp2 = load <2 x float>, <2 x float>* %B
500 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
501 ret <2 x float> %tmp3
504 define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
507 %tmp1 = load <4 x float>, <4 x float>* %A
508 %tmp2 = load <4 x float>, <4 x float>* %B
509 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
510 ret <4 x float> %tmp3
513 define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
516 %tmp1 = load <2 x double>, <2 x double>* %A
517 %tmp2 = load <2 x double>, <2 x double>* %B
518 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
519 ret <2 x double> %tmp3
522 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
523 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
524 declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
526 define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
529 %tmp1 = load <2 x float>, <2 x float>* %A
530 %tmp2 = load <2 x float>, <2 x float>* %B
531 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
532 ret <2 x float> %tmp3
535 define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
538 %tmp1 = load <4 x float>, <4 x float>* %A
539 %tmp2 = load <4 x float>, <4 x float>* %B
540 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
541 ret <4 x float> %tmp3
544 define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
547 %tmp1 = load <2 x double>, <2 x double>* %A
548 %tmp2 = load <2 x double>, <2 x double>* %B
549 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
550 ret <2 x double> %tmp3
553 declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
554 declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
555 declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
557 define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
560 %tmp1 = load <2 x float>, <2 x float>* %A
561 %tmp2 = load <2 x float>, <2 x float>* %B
562 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
563 ret <2 x float> %tmp3
566 define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
569 %tmp1 = load <4 x float>, <4 x float>* %A
570 %tmp2 = load <4 x float>, <4 x float>* %B
571 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
572 ret <4 x float> %tmp3
575 define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
578 %tmp1 = load <2 x double>, <2 x double>* %A
579 %tmp2 = load <2 x double>, <2 x double>* %B
580 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
581 ret <2 x double> %tmp3
584 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
585 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
586 declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
588 define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
591 %tmp1 = load <2 x float>, <2 x float>* %A
592 %tmp2 = load <2 x float>, <2 x float>* %B
593 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
594 ret <2 x float> %tmp3
597 define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
600 %tmp1 = load <4 x float>, <4 x float>* %A
601 %tmp2 = load <4 x float>, <4 x float>* %B
602 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
603 ret <4 x float> %tmp3
606 define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
609 %tmp1 = load <2 x double>, <2 x double>* %A
610 %tmp2 = load <2 x double>, <2 x double>* %B
611 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
612 ret <2 x double> %tmp3
615 declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
616 declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
617 declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
619 define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
622 %tmp1 = load <2 x float>, <2 x float>* %A
623 %tmp2 = load <2 x float>, <2 x float>* %B
624 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
625 ret <2 x float> %tmp3
628 define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
631 %tmp1 = load <4 x float>, <4 x float>* %A
632 %tmp2 = load <4 x float>, <4 x float>* %B
633 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
634 ret <4 x float> %tmp3
637 define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
640 %tmp1 = load <2 x double>, <2 x double>* %A
641 %tmp2 = load <2 x double>, <2 x double>* %B
642 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
643 ret <2 x double> %tmp3
646 declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
647 declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
648 declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
650 define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
653 %tmp1 = load <2 x float>, <2 x float>* %A
654 %tmp2 = load <2 x float>, <2 x float>* %B
655 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
656 ret <2 x float> %tmp3
659 define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
662 %tmp1 = load <4 x float>, <4 x float>* %A
663 %tmp2 = load <4 x float>, <4 x float>* %B
664 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
665 ret <4 x float> %tmp3
668 define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
671 %tmp1 = load <2 x double>, <2 x double>* %A
672 %tmp2 = load <2 x double>, <2 x double>* %B
673 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
674 ret <2 x double> %tmp3
677 declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
678 declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
679 declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone