• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
17
18target triple = "aarch64-unknown-linux-gnu"
19
20; Don't use SVE when its registers are no bigger than NEON.
21; NO_SVE-NOT: ptrue
22
23;
24; SMAX
25;
26
27; Don't use SVE for 64-bit vectors.
28define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
29; CHECK-LABEL: smax_v8i8:
30; CHECK: smax v0.8b, v0.8b, v1.8b
31; CHECK: ret
32  %res = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
33  ret <8 x i8> %res
34}
35
36; Don't use SVE for 128-bit vectors.
37define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
38; CHECK-LABEL: smax_v16i8:
39; CHECK: smax v0.16b, v0.16b, v1.16b
40; CHECK: ret
41  %res = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
42  ret <16 x i8> %res
43}
44
45define void @smax_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
46; CHECK-LABEL: smax_v32i8:
47; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
48; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
49; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
50; CHECK-NEXT: smax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
51; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
52; CHECK-NEXT: ret
53  %op1 = load <32 x i8>, <32 x i8>* %a
54  %op2 = load <32 x i8>, <32 x i8>* %b
55  %res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
56  store <32 x i8> %res, <32 x i8>* %a
57  ret void
58}
59
60define void @smax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
61; CHECK-LABEL: smax_v64i8:
62; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
63; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
64; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
65; VBITS_GE_512-NEXT: smax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
66; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
67; VBITS_GE_512-NEXT: ret
68;
69; Ensure sensible type legalisation.
70; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
71; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
72; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
73; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0]
74; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
75; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1]
76; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
77; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
78; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]]
79; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0]
80; VBITS_EQ_256-NEXT: ret
81  %op1 = load <64 x i8>, <64 x i8>* %a
82  %op2 = load <64 x i8>, <64 x i8>* %b
83  %res = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
84  store <64 x i8> %res, <64 x i8>* %a
85  ret void
86}
87
88define void @smax_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
89; CHECK-LABEL: smax_v128i8:
90; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
91; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
92; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
93; VBITS_GE_1024-NEXT: smax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
94; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
95; VBITS_GE_1024-NEXT: ret
96  %op1 = load <128 x i8>, <128 x i8>* %a
97  %op2 = load <128 x i8>, <128 x i8>* %b
98  %res = call <128 x i8> @llvm.smax.v128i8(<128 x i8> %op1, <128 x i8> %op2)
99  store <128 x i8> %res, <128 x i8>* %a
100  ret void
101}
102
103define void @smax_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
104; CHECK-LABEL: smax_v256i8:
105; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
106; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
107; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
108; VBITS_GE_2048-NEXT: smax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
109; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
110; VBITS_GE_2048-NEXT: ret
111  %op1 = load <256 x i8>, <256 x i8>* %a
112  %op2 = load <256 x i8>, <256 x i8>* %b
113  %res = call <256 x i8> @llvm.smax.v256i8(<256 x i8> %op1, <256 x i8> %op2)
114  store <256 x i8> %res, <256 x i8>* %a
115  ret void
116}
117
118; Don't use SVE for 64-bit vectors.
119define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
120; CHECK-LABEL: smax_v4i16:
121; CHECK: smax v0.4h, v0.4h, v1.4h
122; CHECK-NEXT: ret
123  %res = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
124  ret <4 x i16> %res
125}
126
127; Don't use SVE for 128-bit vectors.
128define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
129; CHECK-LABEL: smax_v8i16:
130; CHECK: smax v0.8h, v0.8h, v1.8h
131; CHECK-NEXT: ret
132  %res = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
133  ret <8 x i16> %res
134}
135
136define void @smax_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
137; CHECK-LABEL: smax_v16i16:
138; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
139; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
140; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
141; CHECK-NEXT: smax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
142; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
143; CHECK-NEXT: ret
144  %op1 = load <16 x i16>, <16 x i16>* %a
145  %op2 = load <16 x i16>, <16 x i16>* %b
146  %res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
147  store <16 x i16> %res, <16 x i16>* %a
148  ret void
149}
150
151define void @smax_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
152; CHECK-LABEL: smax_v32i16:
153; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
154; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
155; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
156; VBITS_GE_512-NEXT: smax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
157; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
158; VBITS_GE_512-NEXT: ret
159
160; Ensure sensible type legalisation.
161; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
162; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
163; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
164; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
165; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
166; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
167; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
168; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
169; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
170; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
171; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
172; VBITS_EQ_256-NEXT: ret
173  %op1 = load <32 x i16>, <32 x i16>* %a
174  %op2 = load <32 x i16>, <32 x i16>* %b
175  %res = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
176  store <32 x i16> %res, <32 x i16>* %a
177  ret void
178}
179
180define void @smax_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
181; CHECK-LABEL: smax_v64i16:
182; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
183; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
184; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
185; VBITS_GE_1024-NEXT: smax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
186; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
187; VBITS_GE_1024-NEXT: ret
188  %op1 = load <64 x i16>, <64 x i16>* %a
189  %op2 = load <64 x i16>, <64 x i16>* %b
190  %res = call <64 x i16> @llvm.smax.v64i16(<64 x i16> %op1, <64 x i16> %op2)
191  store <64 x i16> %res, <64 x i16>* %a
192  ret void
193}
194
195define void @smax_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
196; CHECK-LABEL: smax_v128i16:
197; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
198; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
199; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
200; VBITS_GE_2048-NEXT: smax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
201; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
202; VBITS_GE_2048-NEXT: ret
203  %op1 = load <128 x i16>, <128 x i16>* %a
204  %op2 = load <128 x i16>, <128 x i16>* %b
205  %res = call <128 x i16> @llvm.smax.v128i16(<128 x i16> %op1, <128 x i16> %op2)
206  store <128 x i16> %res, <128 x i16>* %a
207  ret void
208}
209
210; Don't use SVE for 64-bit vectors.
211define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
212; CHECK-LABEL: smax_v2i32:
213; CHECK: smax v0.2s, v0.2s, v1.2s
214; CHECK-NEXT: ret
215  %res = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
216  ret <2 x i32> %res
217}
218
219; Don't use SVE for 128-bit vectors.
220define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
221; CHECK-LABEL: smax_v4i32:
222; CHECK: smax v0.4s, v0.4s, v1.4s
223; CHECK-NEXT: ret
224  %res = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
225  ret <4 x i32> %res
226}
227
228define void @smax_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
229; CHECK-LABEL: smax_v8i32:
230; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
231; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
232; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
233; CHECK-NEXT: smax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
234; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
235; CHECK-NEXT: ret
236  %op1 = load <8 x i32>, <8 x i32>* %a
237  %op2 = load <8 x i32>, <8 x i32>* %b
238  %res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
239  store <8 x i32> %res, <8 x i32>* %a
240  ret void
241}
242
243define void @smax_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
244; CHECK-LABEL: smax_v16i32:
245; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
246; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
247; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
248; VBITS_GE_512-NEXT: smax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
249; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
250; VBITS_GE_512-NEXT: ret
251
252; Ensure sensible type legalisation.
253; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
254; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
255; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
256; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
257; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
258; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
259; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
260; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
261; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
262; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
263; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
264; VBITS_EQ_256-NEXT: ret
265  %op1 = load <16 x i32>, <16 x i32>* %a
266  %op2 = load <16 x i32>, <16 x i32>* %b
267  %res = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
268  store <16 x i32> %res, <16 x i32>* %a
269  ret void
270}
271
272define void @smax_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
273; CHECK-LABEL: smax_v32i32:
274; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
275; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
276; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
277; VBITS_GE_1024-NEXT: smax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
278; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
279; VBITS_GE_1024-NEXT: ret
280  %op1 = load <32 x i32>, <32 x i32>* %a
281  %op2 = load <32 x i32>, <32 x i32>* %b
282  %res = call <32 x i32> @llvm.smax.v32i32(<32 x i32> %op1, <32 x i32> %op2)
283  store <32 x i32> %res, <32 x i32>* %a
284  ret void
285}
286
287define void @smax_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
288; CHECK-LABEL: smax_v64i32:
289; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
290; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
291; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
292; VBITS_GE_2048-NEXT: smax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
293; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
294; VBITS_GE_2048-NEXT: ret
295  %op1 = load <64 x i32>, <64 x i32>* %a
296  %op2 = load <64 x i32>, <64 x i32>* %b
297  %res = call <64 x i32> @llvm.smax.v64i32(<64 x i32> %op1, <64 x i32> %op2)
298  store <64 x i32> %res, <64 x i32>* %a
299  ret void
300}
301
302; Vector i64 max are not legal for NEON so use SVE when available.
303define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
304; CHECK-LABEL: smax_v1i64:
305; CHECK: ptrue [[PG:p[0-9]+]].d, vl1
306; CHECK-NEXT: smax z0.d, [[PG]]/m, z0.d, z1.d
307; CHECK-NEXT: ret
308  %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
309  ret <1 x i64> %res
310}
311
312; Vector i64 max are not legal for NEON so use SVE when available.
313define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
314; CHECK-LABEL: smax_v2i64:
315; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
316; CHECK-NEXT: smax z0.d, [[PG]]/m, z0.d, z1.d
317; CHECK-NEXT: ret
318  %res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
319  ret <2 x i64> %res
320}
321
322define void @smax_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
323; CHECK-LABEL: smax_v4i64:
324; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
325; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
326; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
327; CHECK-NEXT: smax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
328; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
329; CHECK-NEXT: ret
330  %op1 = load <4 x i64>, <4 x i64>* %a
331  %op2 = load <4 x i64>, <4 x i64>* %b
332  %res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
333  store <4 x i64> %res, <4 x i64>* %a
334  ret void
335}
336
337define void @smax_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
338; CHECK-LABEL: smax_v8i64:
339; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
340; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
341; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
342; VBITS_GE_512-NEXT: smax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
343; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
344; VBITS_GE_512-NEXT: ret
345
346; Ensure sensible type legalisation.
347; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
348; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
349; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
350; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
351; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
352; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
353; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
354; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
355; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
356; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
357; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
358; VBITS_EQ_256-NEXT: ret
359  %op1 = load <8 x i64>, <8 x i64>* %a
360  %op2 = load <8 x i64>, <8 x i64>* %b
361  %res = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
362  store <8 x i64> %res, <8 x i64>* %a
363  ret void
364}
365
366define void @smax_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
367; CHECK-LABEL: smax_v16i64:
368; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
369; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
370; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
371; VBITS_GE_1024-NEXT: smax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
372; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
373; VBITS_GE_1024-NEXT: ret
374  %op1 = load <16 x i64>, <16 x i64>* %a
375  %op2 = load <16 x i64>, <16 x i64>* %b
376  %res = call <16 x i64> @llvm.smax.v16i64(<16 x i64> %op1, <16 x i64> %op2)
377  store <16 x i64> %res, <16 x i64>* %a
378  ret void
379}
380
381define void @smax_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
382; CHECK-LABEL: smax_v32i64:
383; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
384; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
385; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
386; VBITS_GE_2048-NEXT: smax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
387; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
388; VBITS_GE_2048-NEXT: ret
389  %op1 = load <32 x i64>, <32 x i64>* %a
390  %op2 = load <32 x i64>, <32 x i64>* %b
391  %res = call <32 x i64> @llvm.smax.v32i64(<32 x i64> %op1, <32 x i64> %op2)
392  store <32 x i64> %res, <32 x i64>* %a
393  ret void
394}
395
396;
397; SMIN
398;
399
400; Don't use SVE for 64-bit vectors.
401define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
402; CHECK-LABEL: smin_v8i8:
403; CHECK: smin v0.8b, v0.8b, v1.8b
404; CHECK-NEXT: ret
405  %res = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
406  ret <8 x i8> %res
407}
408
409; Don't use SVE for 128-bit vectors.
410define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
411; CHECK-LABEL: smin_v16i8:
412; CHECK: smin v0.16b, v0.16b, v1.16b
413; CHECK: ret
414  %res = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
415  ret <16 x i8> %res
416}
417
418define void @smin_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
419; CHECK-LABEL: smin_v32i8:
420; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
421; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
422; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
423; CHECK-NEXT: smin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
424; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
425; CHECK-NEXT: ret
426  %op1 = load <32 x i8>, <32 x i8>* %a
427  %op2 = load <32 x i8>, <32 x i8>* %b
428  %res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
429  store <32 x i8> %res, <32 x i8>* %a
430  ret void
431}
432
433define void @smin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
434; CHECK-LABEL: smin_v64i8:
435; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
436; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
437; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
438; VBITS_GE_512-NEXT: smin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
439; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
440; VBITS_GE_512-NEXT: ret
441;
442; Ensure sensible type legalisation.
443; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
444; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
445; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
446; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0]
447; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
448; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1]
449; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
450; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
451; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]]
452; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0]
453  %op1 = load <64 x i8>, <64 x i8>* %a
454  %op2 = load <64 x i8>, <64 x i8>* %b
455  %res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
456  store <64 x i8> %res, <64 x i8>* %a
457  ret void
458}
459
460define void @smin_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
461; CHECK-LABEL: smin_v128i8:
462; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
463; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
464; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
465; VBITS_GE_1024-NEXT: smin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
466; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
467; VBITS_GE_1024-NEXT: ret
468  %op1 = load <128 x i8>, <128 x i8>* %a
469  %op2 = load <128 x i8>, <128 x i8>* %b
470  %res = call <128 x i8> @llvm.smin.v128i8(<128 x i8> %op1, <128 x i8> %op2)
471  store <128 x i8> %res, <128 x i8>* %a
472  ret void
473}
474
475define void @smin_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
476; CHECK-LABEL: smin_v256i8:
477; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
478; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
479; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
480; VBITS_GE_2048-NEXT: smin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
481; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
482; VBITS_GE_2048-NEXT: ret
483  %op1 = load <256 x i8>, <256 x i8>* %a
484  %op2 = load <256 x i8>, <256 x i8>* %b
485  %res = call <256 x i8> @llvm.smin.v256i8(<256 x i8> %op1, <256 x i8> %op2)
486  store <256 x i8> %res, <256 x i8>* %a
487  ret void
488}
489
490; Don't use SVE for 64-bit vectors.
491define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
492; CHECK-LABEL: smin_v4i16:
493; CHECK: smin v0.4h, v0.4h, v1.4h
494; CHECK-NEXT: ret
495  %res = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
496  ret <4 x i16> %res
497}
498
499; Don't use SVE for 128-bit vectors.
500define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
501; CHECK-LABEL: smin_v8i16:
502; CHECK: smin v0.8h, v0.8h, v1.8h
503; CHECK-NEXT: ret
504  %res = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
505  ret <8 x i16> %res
506}
507
508define void @smin_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
509; CHECK-LABEL: smin_v16i16:
510; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
511; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
512; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
513; CHECK-NEXT: smin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
514; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
515; CHECK-NEXT: ret
516  %op1 = load <16 x i16>, <16 x i16>* %a
517  %op2 = load <16 x i16>, <16 x i16>* %b
518  %res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
519  store <16 x i16> %res, <16 x i16>* %a
520  ret void
521}
522
523define void @smin_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
524; CHECK-LABEL: smin_v32i16:
525; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
526; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
527; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
528; VBITS_GE_512-NEXT: smin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
529; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
530; VBITS_GE_512-NEXT: ret
531
532; Ensure sensible type legalisation.
533; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
534; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
535; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
536; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
537; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
538; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
539; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
540; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
541; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
542; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
543; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
544; VBITS_EQ_256-NEXT: ret
545  %op1 = load <32 x i16>, <32 x i16>* %a
546  %op2 = load <32 x i16>, <32 x i16>* %b
547  %res = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
548  store <32 x i16> %res, <32 x i16>* %a
549  ret void
550}
551
552define void @smin_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
553; CHECK-LABEL: smin_v64i16:
554; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
555; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
556; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
557; VBITS_GE_1024-NEXT: smin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
558; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
559; VBITS_GE_1024-NEXT: ret
560  %op1 = load <64 x i16>, <64 x i16>* %a
561  %op2 = load <64 x i16>, <64 x i16>* %b
562  %res = call <64 x i16> @llvm.smin.v64i16(<64 x i16> %op1, <64 x i16> %op2)
563  store <64 x i16> %res, <64 x i16>* %a
564  ret void
565}
566
567define void @smin_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
568; CHECK-LABEL: smin_v128i16:
569; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
570; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
571; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
572; VBITS_GE_2048-NEXT: smin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
573; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
574; VBITS_GE_2048-NEXT: ret
575  %op1 = load <128 x i16>, <128 x i16>* %a
576  %op2 = load <128 x i16>, <128 x i16>* %b
577  %res = call <128 x i16> @llvm.smin.v128i16(<128 x i16> %op1, <128 x i16> %op2)
578  store <128 x i16> %res, <128 x i16>* %a
579  ret void
580}
581
582; Don't use SVE for 64-bit vectors.
583define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
584; CHECK-LABEL: smin_v2i32:
585; CHECK: smin v0.2s, v0.2s, v1.2s
586; CHECK-NEXT: ret
587  %res = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
588  ret <2 x i32> %res
589}
590
591; Don't use SVE for 128-bit vectors.
592define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
593; CHECK-LABEL: smin_v4i32:
594; CHECK: smin v0.4s, v0.4s, v1.4s
595; CHECK-NEXT: ret
596  %res = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
597  ret <4 x i32> %res
598}
599
600define void @smin_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
601; CHECK-LABEL: smin_v8i32:
602; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
603; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
604; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
605; CHECK-NEXT: smin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
606; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
607; CHECK-NEXT: ret
608  %op1 = load <8 x i32>, <8 x i32>* %a
609  %op2 = load <8 x i32>, <8 x i32>* %b
610  %res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
611  store <8 x i32> %res, <8 x i32>* %a
612  ret void
613}
614
615define void @smin_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
616; CHECK-LABEL: smin_v16i32:
617; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
618; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
619; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
620; VBITS_GE_512-NEXT: smin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
621; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
622; VBITS_GE_512-NEXT: ret
623
624; Ensure sensible type legalisation.
625; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
626; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
627; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
628; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
629; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
630; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
631; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
632; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
633; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
634; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
635; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
636; VBITS_EQ_256-NEXT: ret
637  %op1 = load <16 x i32>, <16 x i32>* %a
638  %op2 = load <16 x i32>, <16 x i32>* %b
639  %res = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
640  store <16 x i32> %res, <16 x i32>* %a
641  ret void
642}
643
644define void @smin_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
645; CHECK-LABEL: smin_v32i32:
646; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
647; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
648; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
649; VBITS_GE_1024-NEXT: smin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
650; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
651; VBITS_GE_1024-NEXT: ret
652  %op1 = load <32 x i32>, <32 x i32>* %a
653  %op2 = load <32 x i32>, <32 x i32>* %b
654  %res = call <32 x i32> @llvm.smin.v32i32(<32 x i32> %op1, <32 x i32> %op2)
655  store <32 x i32> %res, <32 x i32>* %a
656  ret void
657}
658
659define void @smin_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
660; CHECK-LABEL: smin_v64i32:
661; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
662; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
663; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
664; VBITS_GE_2048-NEXT: smin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
665; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
666; VBITS_GE_2048-NEXT: ret
667  %op1 = load <64 x i32>, <64 x i32>* %a
668  %op2 = load <64 x i32>, <64 x i32>* %b
669  %res = call <64 x i32> @llvm.smin.v64i32(<64 x i32> %op1, <64 x i32> %op2)
670  store <64 x i32> %res, <64 x i32>* %a
671  ret void
672}
673
674; Vector i64 min are not legal for NEON so use SVE when available.
675define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
676; CHECK-LABEL: smin_v1i64:
677; CHECK: ptrue [[PG:p[0-9]+]].d, vl1
678; CHECK-NEXT: smin z0.d, [[PG]]/m, z0.d, z1.d
679; CHECK-NEXT: ret
680  %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
681  ret <1 x i64> %res
682}
683
684; Vector i64 min are not legal for NEON so use SVE when available.
685define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
686; CHECK-LABEL: smin_v2i64:
687; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
688; CHECK-NEXT: smin z0.d, [[PG]]/m, z0.d, z1.d
689; CHECK-NEXT: ret
690  %res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
691  ret <2 x i64> %res
692}
693
694define void @smin_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
695; CHECK-LABEL: smin_v4i64:
696; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
697; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
698; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
699; CHECK-NEXT: smin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
700; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
701; CHECK-NEXT: ret
702  %op1 = load <4 x i64>, <4 x i64>* %a
703  %op2 = load <4 x i64>, <4 x i64>* %b
704  %res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
705  store <4 x i64> %res, <4 x i64>* %a
706  ret void
707}
708
709define void @smin_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
710; CHECK-LABEL: smin_v8i64:
711; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
712; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
713; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
714; VBITS_GE_512-NEXT: smin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
715; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
716; VBITS_GE_512-NEXT: ret
717
718; Ensure sensible type legalisation.
719; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
720; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
721; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
722; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
723; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
724; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
725; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
726; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
727; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
728; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
729; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
730; VBITS_EQ_256-NEXT: ret
731  %op1 = load <8 x i64>, <8 x i64>* %a
732  %op2 = load <8 x i64>, <8 x i64>* %b
733  %res = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
734  store <8 x i64> %res, <8 x i64>* %a
735  ret void
736}
737
738define void @smin_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
739; CHECK-LABEL: smin_v16i64:
740; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
741; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
742; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
743; VBITS_GE_1024-NEXT: smin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
744; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
745; VBITS_GE_1024-NEXT: ret
746  %op1 = load <16 x i64>, <16 x i64>* %a
747  %op2 = load <16 x i64>, <16 x i64>* %b
748  %res = call <16 x i64> @llvm.smin.v16i64(<16 x i64> %op1, <16 x i64> %op2)
749  store <16 x i64> %res, <16 x i64>* %a
750  ret void
751}
752
753define void @smin_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
754; CHECK-LABEL: smin_v32i64:
755; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
756; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
757; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
758; VBITS_GE_2048-NEXT: smin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
759; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
760; VBITS_GE_2048-NEXT: ret
761  %op1 = load <32 x i64>, <32 x i64>* %a
762  %op2 = load <32 x i64>, <32 x i64>* %b
763  %res = call <32 x i64> @llvm.smin.v32i64(<32 x i64> %op1, <32 x i64> %op2)
764  store <32 x i64> %res, <32 x i64>* %a
765  ret void
766}
767
768;
769; UMAX
770;
771
772; Don't use SVE for 64-bit vectors.
773define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
774; CHECK-LABEL: umax_v8i8:
775; CHECK: umax v0.8b, v0.8b, v1.8b
776; CHECK: ret
777  %res = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
778  ret <8 x i8> %res
779}
780
781; Don't use SVE for 128-bit vectors.
782define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
783; CHECK-LABEL: umax_v16i8:
784; CHECK: umax v0.16b, v0.16b, v1.16b
785; CHECK: ret
786  %res = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
787  ret <16 x i8> %res
788}
789
790define void @umax_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
791; CHECK-LABEL: umax_v32i8:
792; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
793; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
794; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
795; CHECK-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
796; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
797; CHECK-NEXT: ret
798  %op1 = load <32 x i8>, <32 x i8>* %a
799  %op2 = load <32 x i8>, <32 x i8>* %b
800  %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
801  store <32 x i8> %res, <32 x i8>* %a
802  ret void
803}
804
805define void @umax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
806; CHECK-LABEL: umax_v64i8:
807; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
808; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
809; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
810; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
811; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
812; VBITS_GE_512-NEXT: ret
813;
814; Ensure sensible type legalisation.
815; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
816; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
817; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
818; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0]
819; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
820; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1]
821; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
822; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
823; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]]
824; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0]
825; VBITS_EQ_256-NEXT: ret
826  %op1 = load <64 x i8>, <64 x i8>* %a
827  %op2 = load <64 x i8>, <64 x i8>* %b
828  %res = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
829  store <64 x i8> %res, <64 x i8>* %a
830  ret void
831}
832
833define void @umax_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
834; CHECK-LABEL: umax_v128i8:
835; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
836; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
837; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
838; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
839; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
840; VBITS_GE_1024-NEXT: ret
841  %op1 = load <128 x i8>, <128 x i8>* %a
842  %op2 = load <128 x i8>, <128 x i8>* %b
843  %res = call <128 x i8> @llvm.umax.v128i8(<128 x i8> %op1, <128 x i8> %op2)
844  store <128 x i8> %res, <128 x i8>* %a
845  ret void
846}
847
848define void @umax_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
849; CHECK-LABEL: umax_v256i8:
850; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
851; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
852; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
853; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
854; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
855; VBITS_GE_2048-NEXT: ret
856  %op1 = load <256 x i8>, <256 x i8>* %a
857  %op2 = load <256 x i8>, <256 x i8>* %b
858  %res = call <256 x i8> @llvm.umax.v256i8(<256 x i8> %op1, <256 x i8> %op2)
859  store <256 x i8> %res, <256 x i8>* %a
860  ret void
861}
862
863; Don't use SVE for 64-bit vectors.
864define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
865; CHECK-LABEL: umax_v4i16:
866; CHECK: umax v0.4h, v0.4h, v1.4h
867; CHECK-NEXT: ret
868  %res = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
869  ret <4 x i16> %res
870}
871
872; Don't use SVE for 128-bit vectors.
873define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
874; CHECK-LABEL: umax_v8i16:
875; CHECK: umax v0.8h, v0.8h, v1.8h
876; CHECK-NEXT: ret
877  %res = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
878  ret <8 x i16> %res
879}
880
881define void @umax_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
882; CHECK-LABEL: umax_v16i16:
883; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
884; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
885; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
886; CHECK-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
887; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
888; CHECK-NEXT: ret
889  %op1 = load <16 x i16>, <16 x i16>* %a
890  %op2 = load <16 x i16>, <16 x i16>* %b
891  %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
892  store <16 x i16> %res, <16 x i16>* %a
893  ret void
894}
895
896define void @umax_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
897; CHECK-LABEL: umax_v32i16:
898; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
899; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
900; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
901; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
902; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
903; VBITS_GE_512-NEXT: ret
904
905; Ensure sensible type legalisation.
906; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
907; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
908; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
909; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
910; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
911; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
912; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
913; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
914; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
915; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
916; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
917; VBITS_EQ_256-NEXT: ret
918  %op1 = load <32 x i16>, <32 x i16>* %a
919  %op2 = load <32 x i16>, <32 x i16>* %b
920  %res = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
921  store <32 x i16> %res, <32 x i16>* %a
922  ret void
923}
924
925define void @umax_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
926; CHECK-LABEL: umax_v64i16:
927; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
928; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
929; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
930; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
931; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
932; VBITS_GE_1024-NEXT: ret
933  %op1 = load <64 x i16>, <64 x i16>* %a
934  %op2 = load <64 x i16>, <64 x i16>* %b
935  %res = call <64 x i16> @llvm.umax.v64i16(<64 x i16> %op1, <64 x i16> %op2)
936  store <64 x i16> %res, <64 x i16>* %a
937  ret void
938}
939
940define void @umax_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
941; CHECK-LABEL: umax_v128i16:
942; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
943; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
944; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
945; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
946; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
947; VBITS_GE_2048-NEXT: ret
948  %op1 = load <128 x i16>, <128 x i16>* %a
949  %op2 = load <128 x i16>, <128 x i16>* %b
950  %res = call <128 x i16> @llvm.umax.v128i16(<128 x i16> %op1, <128 x i16> %op2)
951  store <128 x i16> %res, <128 x i16>* %a
952  ret void
953}
954
955; Don't use SVE for 64-bit vectors.
956define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
957; CHECK-LABEL: umax_v2i32:
958; CHECK: umax v0.2s, v0.2s, v1.2s
959; CHECK-NEXT: ret
960  %res = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
961  ret <2 x i32> %res
962}
963
964; Don't use SVE for 128-bit vectors.
965define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
966; CHECK-LABEL: umax_v4i32:
967; CHECK: umax v0.4s, v0.4s, v1.4s
968; CHECK-NEXT: ret
969  %res = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
970  ret <4 x i32> %res
971}
972
973define void @umax_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
974; CHECK-LABEL: umax_v8i32:
975; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
976; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
977; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
978; CHECK-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
979; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
980; CHECK-NEXT: ret
981  %op1 = load <8 x i32>, <8 x i32>* %a
982  %op2 = load <8 x i32>, <8 x i32>* %b
983  %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
984  store <8 x i32> %res, <8 x i32>* %a
985  ret void
986}
987
988define void @umax_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
989; CHECK-LABEL: umax_v16i32:
990; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
991; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
992; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
993; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
994; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
995; VBITS_GE_512-NEXT: ret
996
997; Ensure sensible type legalisation.
998; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
999; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1000; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
1001; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1002; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
1003; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
1004; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
1005; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
1006; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
1007; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1008; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
1009; VBITS_EQ_256-NEXT: ret
1010  %op1 = load <16 x i32>, <16 x i32>* %a
1011  %op2 = load <16 x i32>, <16 x i32>* %b
1012  %res = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
1013  store <16 x i32> %res, <16 x i32>* %a
1014  ret void
1015}
1016
1017define void @umax_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
1018; CHECK-LABEL: umax_v32i32:
1019; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1020; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1021; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1022; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1023; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1024; VBITS_GE_1024-NEXT: ret
1025  %op1 = load <32 x i32>, <32 x i32>* %a
1026  %op2 = load <32 x i32>, <32 x i32>* %b
1027  %res = call <32 x i32> @llvm.umax.v32i32(<32 x i32> %op1, <32 x i32> %op2)
1028  store <32 x i32> %res, <32 x i32>* %a
1029  ret void
1030}
1031
1032define void @umax_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
1033; CHECK-LABEL: umax_v64i32:
1034; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1035; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1036; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1037; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1038; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1039; VBITS_GE_2048-NEXT: ret
1040  %op1 = load <64 x i32>, <64 x i32>* %a
1041  %op2 = load <64 x i32>, <64 x i32>* %b
1042  %res = call <64 x i32> @llvm.umax.v64i32(<64 x i32> %op1, <64 x i32> %op2)
1043  store <64 x i32> %res, <64 x i32>* %a
1044  ret void
1045}
1046
1047; Vector i64 max are not legal for NEON so use SVE when available.
1048define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
1049; CHECK-LABEL: umax_v1i64:
1050; CHECK: ptrue [[PG:p[0-9]+]].d, vl1
1051; CHECK-NEXT: umax z0.d, [[PG]]/m, z0.d, z1.d
1052; CHECK-NEXT: ret
1053  %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
1054  ret <1 x i64> %res
1055}
1056
1057; Vector i64 max are not legal for NEON so use SVE when available.
1058define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
1059; CHECK-LABEL: umax_v2i64:
1060; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
1061; CHECK-NEXT: umax z0.d, [[PG]]/m, z0.d, z1.d
1062; CHECK-NEXT: ret
1063  %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
1064  ret <2 x i64> %res
1065}
1066
1067define void @umax_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
1068; CHECK-LABEL: umax_v4i64:
1069; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1070; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1071; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1072; CHECK-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1073; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1074; CHECK-NEXT: ret
1075  %op1 = load <4 x i64>, <4 x i64>* %a
1076  %op2 = load <4 x i64>, <4 x i64>* %b
1077  %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
1078  store <4 x i64> %res, <4 x i64>* %a
1079  ret void
1080}
1081
1082define void @umax_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
1083; CHECK-LABEL: umax_v8i64:
1084; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1085; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1086; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1087; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1088; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1089; VBITS_GE_512-NEXT: ret
1090
1091; Ensure sensible type legalisation.
1092; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1093; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1094; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
1095; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1096; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
1097; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
1098; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
1099; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
1100; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
1101; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1102; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
1103; VBITS_EQ_256-NEXT: ret
1104  %op1 = load <8 x i64>, <8 x i64>* %a
1105  %op2 = load <8 x i64>, <8 x i64>* %b
1106  %res = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
1107  store <8 x i64> %res, <8 x i64>* %a
1108  ret void
1109}
1110
1111define void @umax_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
1112; CHECK-LABEL: umax_v16i64:
1113; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1114; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1115; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1116; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1117; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1118; VBITS_GE_1024-NEXT: ret
1119  %op1 = load <16 x i64>, <16 x i64>* %a
1120  %op2 = load <16 x i64>, <16 x i64>* %b
1121  %res = call <16 x i64> @llvm.umax.v16i64(<16 x i64> %op1, <16 x i64> %op2)
1122  store <16 x i64> %res, <16 x i64>* %a
1123  ret void
1124}
1125
1126define void @umax_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
1127; CHECK-LABEL: umax_v32i64:
1128; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1129; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1130; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1131; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1132; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1133; VBITS_GE_2048-NEXT: ret
1134  %op1 = load <32 x i64>, <32 x i64>* %a
1135  %op2 = load <32 x i64>, <32 x i64>* %b
1136  %res = call <32 x i64> @llvm.umax.v32i64(<32 x i64> %op1, <32 x i64> %op2)
1137  store <32 x i64> %res, <32 x i64>* %a
1138  ret void
1139}
1140
1141;
1142; UMIN
1143;
1144
1145; Don't use SVE for 64-bit vectors.
1146define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
1147; CHECK-LABEL: umin_v8i8:
1148; CHECK: umin v0.8b, v0.8b, v1.8b
1149; CHECK-NEXT: ret
1150  %res = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
1151  ret <8 x i8> %res
1152}
1153
1154; Don't use SVE for 128-bit vectors.
1155define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
1156; CHECK-LABEL: umin_v16i8:
1157; CHECK: umin v0.16b, v0.16b, v1.16b
1158; CHECK: ret
1159  %res = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
1160  ret <16 x i8> %res
1161}
1162
1163define void @umin_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
1164; CHECK-LABEL: umin_v32i8:
1165; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
1166; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
1167; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
1168; CHECK-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
1169; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
1170; CHECK-NEXT: ret
1171  %op1 = load <32 x i8>, <32 x i8>* %a
1172  %op2 = load <32 x i8>, <32 x i8>* %b
1173  %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
1174  store <32 x i8> %res, <32 x i8>* %a
1175  ret void
1176}
1177
1178define void @umin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
1179; CHECK-LABEL: umin_v64i8:
1180; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
1181; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
1182; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
1183; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
1184; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
1185; VBITS_GE_512-NEXT: ret
1186;
1187; Ensure sensible type legalisation.
1188; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
1189; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
1190; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
1191; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0]
1192; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
1193; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1]
1194; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
1195; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
1196; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]]
1197; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0]
1198  %op1 = load <64 x i8>, <64 x i8>* %a
1199  %op2 = load <64 x i8>, <64 x i8>* %b
1200  %res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
1201  store <64 x i8> %res, <64 x i8>* %a
1202  ret void
1203}
1204
1205define void @umin_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
1206; CHECK-LABEL: umin_v128i8:
1207; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
1208; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
1209; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
1210; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
1211; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
1212; VBITS_GE_1024-NEXT: ret
1213  %op1 = load <128 x i8>, <128 x i8>* %a
1214  %op2 = load <128 x i8>, <128 x i8>* %b
1215  %res = call <128 x i8> @llvm.umin.v128i8(<128 x i8> %op1, <128 x i8> %op2)
1216  store <128 x i8> %res, <128 x i8>* %a
1217  ret void
1218}
1219
1220define void @umin_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
1221; CHECK-LABEL: umin_v256i8:
1222; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
1223; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
1224; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
1225; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b
1226; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
1227; VBITS_GE_2048-NEXT: ret
1228  %op1 = load <256 x i8>, <256 x i8>* %a
1229  %op2 = load <256 x i8>, <256 x i8>* %b
1230  %res = call <256 x i8> @llvm.umin.v256i8(<256 x i8> %op1, <256 x i8> %op2)
1231  store <256 x i8> %res, <256 x i8>* %a
1232  ret void
1233}
1234
1235; Don't use SVE for 64-bit vectors.
1236define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
1237; CHECK-LABEL: umin_v4i16:
1238; CHECK: umin v0.4h, v0.4h, v1.4h
1239; CHECK-NEXT: ret
1240  %res = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
1241  ret <4 x i16> %res
1242}
1243
1244; Don't use SVE for 128-bit vectors.
1245define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
1246; CHECK-LABEL: umin_v8i16:
1247; CHECK: umin v0.8h, v0.8h, v1.8h
1248; CHECK-NEXT: ret
1249  %res = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
1250  ret <8 x i16> %res
1251}
1252
1253define void @umin_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
1254; CHECK-LABEL: umin_v16i16:
1255; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1256; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
1257; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
1258; CHECK-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
1259; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1260; CHECK-NEXT: ret
1261  %op1 = load <16 x i16>, <16 x i16>* %a
1262  %op2 = load <16 x i16>, <16 x i16>* %b
1263  %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
1264  store <16 x i16> %res, <16 x i16>* %a
1265  ret void
1266}
1267
1268define void @umin_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
1269; CHECK-LABEL: umin_v32i16:
1270; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1271; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
1272; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
1273; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
1274; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1275; VBITS_GE_512-NEXT: ret
1276
1277; Ensure sensible type legalisation.
1278; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1279; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1280; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
1281; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1282; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
1283; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
1284; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
1285; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
1286; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
1287; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1288; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
1289; VBITS_EQ_256-NEXT: ret
1290  %op1 = load <32 x i16>, <32 x i16>* %a
1291  %op2 = load <32 x i16>, <32 x i16>* %b
1292  %res = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
1293  store <32 x i16> %res, <32 x i16>* %a
1294  ret void
1295}
1296
1297define void @umin_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
1298; CHECK-LABEL: umin_v64i16:
1299; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1300; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
1301; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
1302; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
1303; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1304; VBITS_GE_1024-NEXT: ret
1305  %op1 = load <64 x i16>, <64 x i16>* %a
1306  %op2 = load <64 x i16>, <64 x i16>* %b
1307  %res = call <64 x i16> @llvm.umin.v64i16(<64 x i16> %op1, <64 x i16> %op2)
1308  store <64 x i16> %res, <64 x i16>* %a
1309  ret void
1310}
1311
1312define void @umin_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
1313; CHECK-LABEL: umin_v128i16:
1314; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1315; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
1316; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
1317; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
1318; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1319; VBITS_GE_2048-NEXT: ret
1320  %op1 = load <128 x i16>, <128 x i16>* %a
1321  %op2 = load <128 x i16>, <128 x i16>* %b
1322  %res = call <128 x i16> @llvm.umin.v128i16(<128 x i16> %op1, <128 x i16> %op2)
1323  store <128 x i16> %res, <128 x i16>* %a
1324  ret void
1325}
1326
1327; Don't use SVE for 64-bit vectors.
1328define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
1329; CHECK-LABEL: umin_v2i32:
1330; CHECK: umin v0.2s, v0.2s, v1.2s
1331; CHECK-NEXT: ret
1332  %res = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
1333  ret <2 x i32> %res
1334}
1335
1336; Don't use SVE for 128-bit vectors.
1337define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
1338; CHECK-LABEL: umin_v4i32:
1339; CHECK: umin v0.4s, v0.4s, v1.4s
1340; CHECK-NEXT: ret
1341  %res = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
1342  ret <4 x i32> %res
1343}
1344
1345define void @umin_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
1346; CHECK-LABEL: umin_v8i32:
1347; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1348; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1349; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1350; CHECK-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1351; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1352; CHECK-NEXT: ret
1353  %op1 = load <8 x i32>, <8 x i32>* %a
1354  %op2 = load <8 x i32>, <8 x i32>* %b
1355  %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
1356  store <8 x i32> %res, <8 x i32>* %a
1357  ret void
1358}
1359
1360define void @umin_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
1361; CHECK-LABEL: umin_v16i32:
1362; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1363; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1364; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1365; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1366; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1367; VBITS_GE_512-NEXT: ret
1368
1369; Ensure sensible type legalisation.
1370; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1371; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1372; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
1373; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1374; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
1375; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
1376; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
1377; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
1378; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
1379; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1380; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
1381; VBITS_EQ_256-NEXT: ret
1382  %op1 = load <16 x i32>, <16 x i32>* %a
1383  %op2 = load <16 x i32>, <16 x i32>* %b
1384  %res = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
1385  store <16 x i32> %res, <16 x i32>* %a
1386  ret void
1387}
1388
1389define void @umin_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
1390; CHECK-LABEL: umin_v32i32:
1391; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1392; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1393; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1394; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1395; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1396; VBITS_GE_1024-NEXT: ret
1397  %op1 = load <32 x i32>, <32 x i32>* %a
1398  %op2 = load <32 x i32>, <32 x i32>* %b
1399  %res = call <32 x i32> @llvm.umin.v32i32(<32 x i32> %op1, <32 x i32> %op2)
1400  store <32 x i32> %res, <32 x i32>* %a
1401  ret void
1402}
1403
1404define void @umin_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
1405; CHECK-LABEL: umin_v64i32:
1406; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1407; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
1408; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
1409; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
1410; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1411; VBITS_GE_2048-NEXT: ret
1412  %op1 = load <64 x i32>, <64 x i32>* %a
1413  %op2 = load <64 x i32>, <64 x i32>* %b
1414  %res = call <64 x i32> @llvm.umin.v64i32(<64 x i32> %op1, <64 x i32> %op2)
1415  store <64 x i32> %res, <64 x i32>* %a
1416  ret void
1417}
1418
1419; Vector i64 min are not legal for NEON so use SVE when available.
1420define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
1421; CHECK-LABEL: umin_v1i64:
1422; CHECK: ptrue [[PG:p[0-9]+]].d, vl1
1423; CHECK-NEXT: umin z0.d, [[PG]]/m, z0.d, z1.d
1424; CHECK-NEXT: ret
1425  %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
1426  ret <1 x i64> %res
1427}
1428
1429; Vector i64 min are not legal for NEON so use SVE when available.
1430define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
1431; CHECK-LABEL: umin_v2i64:
1432; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
1433; CHECK-NEXT: umin z0.d, [[PG]]/m, z0.d, z1.d
1434; CHECK-NEXT: ret
1435  %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
1436  ret <2 x i64> %res
1437}
1438
1439define void @umin_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
1440; CHECK-LABEL: umin_v4i64:
1441; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1442; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1443; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1444; CHECK-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1445; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1446; CHECK-NEXT: ret
1447  %op1 = load <4 x i64>, <4 x i64>* %a
1448  %op2 = load <4 x i64>, <4 x i64>* %b
1449  %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
1450  store <4 x i64> %res, <4 x i64>* %a
1451  ret void
1452}
1453
1454define void @umin_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
1455; CHECK-LABEL: umin_v8i64:
1456; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1457; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1458; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1459; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1460; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1461; VBITS_GE_512-NEXT: ret
1462
1463; Ensure sensible type legalisation.
1464; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1465; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1466; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
1467; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1468; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
1469; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
1470; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
1471; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
1472; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
1473; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1474; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
1475; VBITS_EQ_256-NEXT: ret
1476  %op1 = load <8 x i64>, <8 x i64>* %a
1477  %op2 = load <8 x i64>, <8 x i64>* %b
1478  %res = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
1479  store <8 x i64> %res, <8 x i64>* %a
1480  ret void
1481}
1482
1483define void @umin_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
1484; CHECK-LABEL: umin_v16i64:
1485; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1486; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1487; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1488; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1489; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1490; VBITS_GE_1024-NEXT: ret
1491  %op1 = load <16 x i64>, <16 x i64>* %a
1492  %op2 = load <16 x i64>, <16 x i64>* %b
1493  %res = call <16 x i64> @llvm.umin.v16i64(<16 x i64> %op1, <16 x i64> %op2)
1494  store <16 x i64> %res, <16 x i64>* %a
1495  ret void
1496}
1497
1498define void @umin_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
1499; CHECK-LABEL: umin_v32i64:
1500; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1501; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
1502; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
1503; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
1504; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1505; VBITS_GE_2048-NEXT: ret
1506  %op1 = load <32 x i64>, <32 x i64>* %a
1507  %op2 = load <32 x i64>, <32 x i64>* %b
1508  %res = call <32 x i64> @llvm.umin.v32i64(<32 x i64> %op1, <32 x i64> %op2)
1509  store <32 x i64> %res, <32 x i64>* %a
1510  ret void
1511}
1512
1513attributes #0 = { "target-features"="+sve" }
1514
1515declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>)
1516declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
1517declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
1518declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>)
1519declare <128 x i8> @llvm.smin.v128i8(<128 x i8>, <128 x i8>)
1520declare <256 x i8> @llvm.smin.v256i8(<256 x i8>, <256 x i8>)
1521declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
1522declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
1523declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
1524declare <32 x i16> @llvm.smin.v32i16(<32 x i16>, <32 x i16>)
1525declare <64 x i16> @llvm.smin.v64i16(<64 x i16>, <64 x i16>)
1526declare <128 x i16> @llvm.smin.v128i16(<128 x i16>, <128 x i16>)
1527declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
1528declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
1529declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
1530declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
1531declare <32 x i32> @llvm.smin.v32i32(<32 x i32>, <32 x i32>)
1532declare <64 x i32> @llvm.smin.v64i32(<64 x i32>, <64 x i32>)
1533declare <1 x i64> @llvm.smin.v1i64(<1 x i64>, <1 x i64>)
1534declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
1535declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
1536declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>)
1537declare <16 x i64> @llvm.smin.v16i64(<16 x i64>, <16 x i64>)
1538declare <32 x i64> @llvm.smin.v32i64(<32 x i64>, <32 x i64>)
1539
1540declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
1541declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
1542declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
1543declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>)
1544declare <128 x i8> @llvm.smax.v128i8(<128 x i8>, <128 x i8>)
1545declare <256 x i8> @llvm.smax.v256i8(<256 x i8>, <256 x i8>)
1546declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
1547declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
1548declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
1549declare <32 x i16> @llvm.smax.v32i16(<32 x i16>, <32 x i16>)
1550declare <64 x i16> @llvm.smax.v64i16(<64 x i16>, <64 x i16>)
1551declare <128 x i16> @llvm.smax.v128i16(<128 x i16>, <128 x i16>)
1552declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
1553declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
1554declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
1555declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
1556declare <32 x i32> @llvm.smax.v32i32(<32 x i32>, <32 x i32>)
1557declare <64 x i32> @llvm.smax.v64i32(<64 x i32>, <64 x i32>)
1558declare <1 x i64> @llvm.smax.v1i64(<1 x i64>, <1 x i64>)
1559declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
1560declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
1561declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>)
1562declare <16 x i64> @llvm.smax.v16i64(<16 x i64>, <16 x i64>)
1563declare <32 x i64> @llvm.smax.v32i64(<32 x i64>, <32 x i64>)
1564
1565declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
1566declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
1567declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
1568declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>)
1569declare <128 x i8> @llvm.umin.v128i8(<128 x i8>, <128 x i8>)
1570declare <256 x i8> @llvm.umin.v256i8(<256 x i8>, <256 x i8>)
1571declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
1572declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
1573declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
1574declare <32 x i16> @llvm.umin.v32i16(<32 x i16>, <32 x i16>)
1575declare <64 x i16> @llvm.umin.v64i16(<64 x i16>, <64 x i16>)
1576declare <128 x i16> @llvm.umin.v128i16(<128 x i16>, <128 x i16>)
1577declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
1578declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
1579declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
1580declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>)
1581declare <32 x i32> @llvm.umin.v32i32(<32 x i32>, <32 x i32>)
1582declare <64 x i32> @llvm.umin.v64i32(<64 x i32>, <64 x i32>)
1583declare <1 x i64> @llvm.umin.v1i64(<1 x i64>, <1 x i64>)
1584declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
1585declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
1586declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>)
1587declare <16 x i64> @llvm.umin.v16i64(<16 x i64>, <16 x i64>)
1588declare <32 x i64> @llvm.umin.v32i64(<32 x i64>, <32 x i64>)
1589
1590declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
1591declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
1592declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
1593declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>)
1594declare <128 x i8> @llvm.umax.v128i8(<128 x i8>, <128 x i8>)
1595declare <256 x i8> @llvm.umax.v256i8(<256 x i8>, <256 x i8>)
1596declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
1597declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
1598declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
1599declare <32 x i16> @llvm.umax.v32i16(<32 x i16>, <32 x i16>)
1600declare <64 x i16> @llvm.umax.v64i16(<64 x i16>, <64 x i16>)
1601declare <128 x i16> @llvm.umax.v128i16(<128 x i16>, <128 x i16>)
1602declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>)
1603declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
1604declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
1605declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>)
1606declare <32 x i32> @llvm.umax.v32i32(<32 x i32>, <32 x i32>)
1607declare <64 x i32> @llvm.umax.v64i32(<64 x i32>, <64 x i32>)
1608declare <1 x i64> @llvm.umax.v1i64(<1 x i64>, <1 x i64>)
1609declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
1610declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
1611declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>)
1612declare <16 x i64> @llvm.umax.v16i64(<16 x i64>, <16 x i64>)
1613declare <32 x i64> @llvm.umax.v32i64(<32 x i64>, <32 x i64>)
1614
1615