• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
2
3declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>)
4declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>)
5
6define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
7; Using registers other than v0, v1 are possible, but would be odd.
8; CHECK: test_smax_v8i8:
9  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
10; CHECK: smax v0.8b, v0.8b, v1.8b
11  ret <8 x i8> %tmp1
12}
13
14define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
15  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
16; CHECK: umax v0.8b, v0.8b, v1.8b
17  ret <8 x i8> %tmp1
18}
19
20declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>)
21declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>)
22
23define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
24; CHECK: test_smax_v16i8:
25  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
26; CHECK: smax v0.16b, v0.16b, v1.16b
27  ret <16 x i8> %tmp1
28}
29
30define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
31; CHECK: test_umax_v16i8:
32  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
33; CHECK: umax v0.16b, v0.16b, v1.16b
34  ret <16 x i8> %tmp1
35}
36
37declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>)
38declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>)
39
40define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
41; CHECK: test_smax_v4i16:
42  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
43; CHECK: smax v0.4h, v0.4h, v1.4h
44  ret <4 x i16> %tmp1
45}
46
47define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
48; CHECK: test_umax_v4i16:
49  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
50; CHECK: umax v0.4h, v0.4h, v1.4h
51  ret <4 x i16> %tmp1
52}
53
54
55declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>)
56declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>)
57
58define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
59; CHECK: test_smax_v8i16:
60  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
61; CHECK: smax v0.8h, v0.8h, v1.8h
62  ret <8 x i16> %tmp1
63}
64
65define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
66; CHECK: test_umax_v8i16:
67  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
68; CHECK: umax v0.8h, v0.8h, v1.8h
69  ret <8 x i16> %tmp1
70}
71
72
73declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>)
74declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>)
75
76define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
77; CHECK: test_smax_v2i32:
78  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
79; CHECK: smax v0.2s, v0.2s, v1.2s
80  ret <2 x i32> %tmp1
81}
82
83define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
84; CHECK: test_umax_v2i32:
85  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
86; CHECK: umax v0.2s, v0.2s, v1.2s
87  ret <2 x i32> %tmp1
88}
89
90declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>)
91declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>)
92
93define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
94; CHECK: test_smax_v4i32:
95  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
96; CHECK: smax v0.4s, v0.4s, v1.4s
97  ret <4 x i32> %tmp1
98}
99
100define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
101; CHECK: test_umax_v4i32:
102  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
103; CHECK: umax v0.4s, v0.4s, v1.4s
104  ret <4 x i32> %tmp1
105}
106
107declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>)
108declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>)
109
110define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
111; Using registers other than v0, v1 are possible, but would be odd.
112; CHECK: test_smin_v8i8:
113  %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
114; CHECK: smin v0.8b, v0.8b, v1.8b
115  ret <8 x i8> %tmp1
116}
117
118define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
119  %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
120; CHECK: umin v0.8b, v0.8b, v1.8b
121  ret <8 x i8> %tmp1
122}
123
124declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>)
125declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>)
126
127define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
128; CHECK: test_smin_v16i8:
129  %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
130; CHECK: smin v0.16b, v0.16b, v1.16b
131  ret <16 x i8> %tmp1
132}
133
134define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
135; CHECK: test_umin_v16i8:
136  %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
137; CHECK: umin v0.16b, v0.16b, v1.16b
138  ret <16 x i8> %tmp1
139}
140
141declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>)
142declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>)
143
144define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
145; CHECK: test_smin_v4i16:
146  %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
147; CHECK: smin v0.4h, v0.4h, v1.4h
148  ret <4 x i16> %tmp1
149}
150
151define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
152; CHECK: test_umin_v4i16:
153  %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
154; CHECK: umin v0.4h, v0.4h, v1.4h
155  ret <4 x i16> %tmp1
156}
157
158
159declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>)
160declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>)
161
162define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
163; CHECK: test_smin_v8i16:
164  %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
165; CHECK: smin v0.8h, v0.8h, v1.8h
166  ret <8 x i16> %tmp1
167}
168
169define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
170; CHECK: test_umin_v8i16:
171  %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
172; CHECK: umin v0.8h, v0.8h, v1.8h
173  ret <8 x i16> %tmp1
174}
175
176
177declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>)
178declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>)
179
180define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
181; CHECK: test_smin_v2i32:
182  %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
183; CHECK: smin v0.2s, v0.2s, v1.2s
184  ret <2 x i32> %tmp1
185}
186
187define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
188; CHECK: test_umin_v2i32:
189  %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
190; CHECK: umin v0.2s, v0.2s, v1.2s
191  ret <2 x i32> %tmp1
192}
193
194declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>)
195declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>)
196
197define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
198; CHECK: test_smin_v4i32:
199  %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
200; CHECK: smin v0.4s, v0.4s, v1.4s
201  ret <4 x i32> %tmp1
202}
203
204define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
205; CHECK: test_umin_v4i32:
206  %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
207; CHECK: umin v0.4s, v0.4s, v1.4s
208  ret <4 x i32> %tmp1
209}
210
211declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>)
212declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>)
213declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>)
214
215define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
216; CHECK: test_fmax_v2f32:
217        %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
218; CHECK: fmax v0.2s, v0.2s, v1.2s
219        ret <2 x float> %val
220}
221
222define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
223; CHECK: test_fmax_v4f32:
224        %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
225; CHECK: fmax v0.4s, v0.4s, v1.4s
226        ret <4 x float> %val
227}
228
229define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
230; CHECK: test_fmax_v2f64:
231        %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
232; CHECK: fmax v0.2d, v0.2d, v1.2d
233        ret <2 x double> %val
234}
235
236declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>)
237declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>)
238declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>)
239
240define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
241; CHECK: test_fmin_v2f32:
242        %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
243; CHECK: fmin v0.2s, v0.2s, v1.2s
244        ret <2 x float> %val
245}
246
247define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
248; CHECK: test_fmin_v4f32:
249        %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
250; CHECK: fmin v0.4s, v0.4s, v1.4s
251        ret <4 x float> %val
252}
253
254define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
255; CHECK: test_fmin_v2f64:
256        %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
257; CHECK: fmin v0.2d, v0.2d, v1.2d
258        ret <2 x double> %val
259}
260
261
262declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>)
263declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>)
264declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>)
265
266define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
267; CHECK: test_fmaxnm_v2f32:
268        %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
269; CHECK: fmaxnm v0.2s, v0.2s, v1.2s
270        ret <2 x float> %val
271}
272
273define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
274; CHECK: test_fmaxnm_v4f32:
275        %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
276; CHECK: fmaxnm v0.4s, v0.4s, v1.4s
277        ret <4 x float> %val
278}
279
280define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
281; CHECK: test_fmaxnm_v2f64:
282        %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
283; CHECK: fmaxnm v0.2d, v0.2d, v1.2d
284        ret <2 x double> %val
285}
286
287declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>)
288declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>)
289declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>)
290
291define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
292; CHECK: test_fminnm_v2f32:
293        %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
294; CHECK: fminnm v0.2s, v0.2s, v1.2s
295        ret <2 x float> %val
296}
297
298define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
299; CHECK: test_fminnm_v4f32:
300        %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
301; CHECK: fminnm v0.4s, v0.4s, v1.4s
302        ret <4 x float> %val
303}
304
305define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
306; CHECK: test_fminnm_v2f64:
307        %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
308; CHECK: fminnm v0.2d, v0.2d, v1.2d
309        ret <2 x double> %val
310}
311