• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
17
18target triple = "aarch64-unknown-linux-gnu"
19
20; Don't use SVE when its registers are no bigger than NEON.
21; NO_SVE-NOT: z{0-9}
22
23;
24; ICMP EQ
25;
26
27; Don't use SVE for 64-bit vectors.
28define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
29; CHECK-LABEL: icmp_eq_v8i8:
30; CHECK: cmeq v0.8b, v0.8b, v1.8b
31; CHECK-NEXT: ret
32  %cmp = icmp eq <8 x i8> %op1, %op2
33  %sext = sext <8 x i1> %cmp to <8 x i8>
34  ret <8 x i8> %sext
35}
36
37; Don't use SVE for 128-bit vectors.
38define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
39; CHECK-LABEL: icmp_eq_v16i8:
40; CHECK: cmeq v0.16b, v0.16b, v1.16b
41; CHECK-NEXT: ret
42  %cmp = icmp eq <16 x i8> %op1, %op2
43  %sext = sext <16 x i1> %cmp to <16 x i8>
44  ret <16 x i8> %sext
45}
46
47define void @icmp_eq_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
48; CHECK-LABEL: icmp_eq_v32i8:
49; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
50; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
51; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
52; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
53; CHECK-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
54; CHECK-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
55; CHECK-NEXT: ret
56  %op1 = load <32 x i8>, <32 x i8>* %a
57  %op2 = load <32 x i8>, <32 x i8>* %b
58  %cmp = icmp eq <32 x i8> %op1, %op2
59  %sext = sext <32 x i1> %cmp to <32 x i8>
60  store <32 x i8> %sext, <32 x i8>* %a
61  ret void
62}
63
64define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
65; CHECK-LABEL: icmp_eq_v64i8:
66; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
67; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
68; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
69; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
70; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
71; VBITS_GE_512-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
72; VBITS_GE_512-NEXT: ret
73
74; Ensure sensible type legalisation
75; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
76; VBITS_EQ_256-DAG: mov w[[OFF_HI:[0-9]+]], #32
77; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
78; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_HI]]]
79; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
80; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_HI]]]
81; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].b, [[PG]]/z, [[OP1_LO]].b, [[OP2_LO]].b
82; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].b, [[PG]]/z, [[OP1_HI]].b, [[OP2_HI]].b
83; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].b, [[CMP_LO]]/z, #-1
84; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].b, [[CMP_HI]]/z, #-1
85; VBITS_EQ_256-DAG: st1b { [[SEXT_LO]].b }, [[PG]], [x0]
86; VBITS_EQ_256-DAG: st1b { [[SEXT_HI]].b }, [[PG]], [x0, x[[OFF_HI]]]
87; VBITS_EQ_256-NEXT: ret
88  %op1 = load <64 x i8>, <64 x i8>* %a
89  %op2 = load <64 x i8>, <64 x i8>* %b
90  %cmp = icmp eq <64 x i8> %op1, %op2
91  %sext = sext <64 x i1> %cmp to <64 x i8>
92  store <64 x i8> %sext, <64 x i8>* %a
93  ret void
94}
95
96define void @icmp_eq_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
97; CHECK-LABEL: icmp_eq_v128i8:
98; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
99; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
100; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
101; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
102; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
103; VBITS_GE_1024-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
104; VBITS_GE_1024-NEXT: ret
105  %op1 = load <128 x i8>, <128 x i8>* %a
106  %op2 = load <128 x i8>, <128 x i8>* %b
107  %cmp = icmp eq <128 x i8> %op1, %op2
108  %sext = sext <128 x i1> %cmp to <128 x i8>
109  store <128 x i8> %sext, <128 x i8>* %a
110  ret void
111}
112
113define void @icmp_eq_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
114; CHECK-LABEL: icmp_eq_v256i8:
115; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
116; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
117; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
118; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
119; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
120; VBITS_GE_2048-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
121; VBITS_GE_2048-NEXT: ret
122  %op1 = load <256 x i8>, <256 x i8>* %a
123  %op2 = load <256 x i8>, <256 x i8>* %b
124  %cmp = icmp eq <256 x i8> %op1, %op2
125  %sext = sext <256 x i1> %cmp to <256 x i8>
126  store <256 x i8> %sext, <256 x i8>* %a
127  ret void
128}
129
130; Don't use SVE for 64-bit vectors.
131define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
132; CHECK-LABEL: icmp_eq_v4i16:
133; CHECK: cmeq v0.4h, v0.4h, v1.4h
134; CHECK-NEXT: ret
135  %cmp = icmp eq <4 x i16> %op1, %op2
136  %sext = sext <4 x i1> %cmp to <4 x i16>
137  ret <4 x i16> %sext
138}
139
140; Don't use SVE for 128-bit vectors.
141define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
142; CHECK-LABEL: icmp_eq_v8i16:
143; CHECK: cmeq v0.8h, v0.8h, v1.8h
144; CHECK-NEXT: ret
145  %cmp = icmp eq <8 x i16> %op1, %op2
146  %sext = sext <8 x i1> %cmp to <8 x i16>
147  ret <8 x i16> %sext
148}
149
150define void @icmp_eq_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
151; CHECK-LABEL: icmp_eq_v16i16:
152; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
153; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
154; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
155; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
156; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
157; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
158; CHECK-NEXT: ret
159  %op1 = load <16 x i16>, <16 x i16>* %a
160  %op2 = load <16 x i16>, <16 x i16>* %b
161  %cmp = icmp eq <16 x i16> %op1, %op2
162  %sext = sext <16 x i1> %cmp to <16 x i16>
163  store <16 x i16> %sext, <16 x i16>* %a
164  ret void
165}
166
167define void @icmp_eq_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
168; CHECK-LABEL: icmp_eq_v32i16:
169; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
170; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
171; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
172; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
173; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
174; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
175; VBITS_GE_512-NEXT: ret
176
177; Ensure sensible type legalisation
178; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
179; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
180; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
181; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
182; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
183; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
184; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
185; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
186; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
187; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
188; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
189; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x0]
190; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[A_HI]]]
191; VBITS_EQ_256-NEXT: ret
192  %op1 = load <32 x i16>, <32 x i16>* %a
193  %op2 = load <32 x i16>, <32 x i16>* %b
194  %cmp = icmp eq <32 x i16> %op1, %op2
195  %sext = sext <32 x i1> %cmp to <32 x i16>
196  store <32 x i16> %sext, <32 x i16>* %a
197  ret void
198}
199
200define void @icmp_eq_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
201; CHECK-LABEL: icmp_eq_v64i16:
202; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
203; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
204; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
205; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
206; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
207; VBITS_GE_1024-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
208; VBITS_GE_1024-NEXT: ret
209  %op1 = load <64 x i16>, <64 x i16>* %a
210  %op2 = load <64 x i16>, <64 x i16>* %b
211  %cmp = icmp eq <64 x i16> %op1, %op2
212  %sext = sext <64 x i1> %cmp to <64 x i16>
213  store <64 x i16> %sext, <64 x i16>* %a
214  ret void
215}
216
217define void @icmp_eq_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
218; CHECK-LABEL: icmp_eq_v128i16:
219; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
220; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
221; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
222; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
223; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
224; VBITS_GE_2048-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
225; VBITS_GE_2048-NEXT: ret
226  %op1 = load <128 x i16>, <128 x i16>* %a
227  %op2 = load <128 x i16>, <128 x i16>* %b
228  %cmp = icmp eq <128 x i16> %op1, %op2
229  %sext = sext <128 x i1> %cmp to <128 x i16>
230  store <128 x i16> %sext, <128 x i16>* %a
231  ret void
232}
233
234; Don't use SVE for 64-bit vectors.
235define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
236; CHECK-LABEL: icmp_eq_v2i32:
237; CHECK: cmeq v0.2s, v0.2s, v1.2s
238; CHECK-NEXT: ret
239  %cmp = icmp eq <2 x i32> %op1, %op2
240  %sext = sext <2 x i1> %cmp to <2 x i32>
241  ret <2 x i32> %sext
242}
243
244; Don't use SVE for 128-bit vectors.
245define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
246; CHECK-LABEL: icmp_eq_v4i32:
247; CHECK: cmeq v0.4s, v0.4s, v1.4s
248; CHECK-NEXT: ret
249  %cmp = icmp eq <4 x i32> %op1, %op2
250  %sext = sext <4 x i1> %cmp to <4 x i32>
251  ret <4 x i32> %sext
252}
253
254define void @icmp_eq_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
255; CHECK-LABEL: icmp_eq_v8i32:
256; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
257; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
258; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
259; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
260; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
261; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
262; CHECK-NEXT: ret
263  %op1 = load <8 x i32>, <8 x i32>* %a
264  %op2 = load <8 x i32>, <8 x i32>* %b
265  %cmp = icmp eq <8 x i32> %op1, %op2
266  %sext = sext <8 x i1> %cmp to <8 x i32>
267  store <8 x i32> %sext, <8 x i32>* %a
268  ret void
269}
270
271define void @icmp_eq_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
272; CHECK-LABEL: icmp_eq_v16i32:
273; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
274; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
275; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
276; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
277; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
278; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
279; VBITS_GE_512-NEXT: ret
280
281; Ensure sensible type legalisation
282; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
283; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
284; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
285; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
286; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
287; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
288; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
289; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
290; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
291; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
292; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
293; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x0]
294; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[A_HI]]]
295; VBITS_EQ_256-NEXT: ret
296  %op1 = load <16 x i32>, <16 x i32>* %a
297  %op2 = load <16 x i32>, <16 x i32>* %b
298  %cmp = icmp eq <16 x i32> %op1, %op2
299  %sext = sext <16 x i1> %cmp to <16 x i32>
300  store <16 x i32> %sext, <16 x i32>* %a
301  ret void
302}
303
304define void @icmp_eq_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
305; CHECK-LABEL: icmp_eq_v32i32:
306; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
307; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
308; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
309; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
310; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
311; VBITS_GE_1024-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
312; VBITS_GE_1024-NEXT: ret
313  %op1 = load <32 x i32>, <32 x i32>* %a
314  %op2 = load <32 x i32>, <32 x i32>* %b
315  %cmp = icmp eq <32 x i32> %op1, %op2
316  %sext = sext <32 x i1> %cmp to <32 x i32>
317  store <32 x i32> %sext, <32 x i32>* %a
318  ret void
319}
320
321define void @icmp_eq_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
322; CHECK-LABEL: icmp_eq_v64i32:
323; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
324; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
325; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
326; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
327; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
328; VBITS_GE_2048-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
329; VBITS_GE_2048-NEXT: ret
330  %op1 = load <64 x i32>, <64 x i32>* %a
331  %op2 = load <64 x i32>, <64 x i32>* %b
332  %cmp = icmp eq <64 x i32> %op1, %op2
333  %sext = sext <64 x i1> %cmp to <64 x i32>
334  store <64 x i32> %sext, <64 x i32>* %a
335  ret void
336}
337
338; Don't use SVE for 64-bit vectors.
339define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
340; CHECK-LABEL: icmp_eq_v1i64:
341; CHECK: cmeq d0, d0, d1
342; CHECK-NEXT: ret
343  %cmp = icmp eq <1 x i64> %op1, %op2
344  %sext = sext <1 x i1> %cmp to <1 x i64>
345  ret <1 x i64> %sext
346}
347
348; Don't use SVE for 128-bit vectors.
349define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
350; CHECK-LABEL: icmp_eq_v2i64:
351; CHECK: cmeq v0.2d, v0.2d, v1.2d
352; CHECK-NEXT: ret
353  %cmp = icmp eq <2 x i64> %op1, %op2
354  %sext = sext <2 x i1> %cmp to <2 x i64>
355  ret <2 x i64> %sext
356}
357
358define void @icmp_eq_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
359; CHECK-LABEL: icmp_eq_v4i64:
360; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
361; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
362; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
363; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
364; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
365; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
366; CHECK-NEXT: ret
367  %op1 = load <4 x i64>, <4 x i64>* %a
368  %op2 = load <4 x i64>, <4 x i64>* %b
369  %cmp = icmp eq <4 x i64> %op1, %op2
370  %sext = sext <4 x i1> %cmp to <4 x i64>
371  store <4 x i64> %sext, <4 x i64>* %a
372  ret void
373}
374
375define void @icmp_eq_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
376; CHECK-LABEL: icmp_eq_v8i64:
377; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
378; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
379; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
380; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
381; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
382; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
383; VBITS_GE_512-NEXT: ret
384
385; Ensure sensible type legalisation
386; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
387; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
388; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
389; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
390; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
391; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
392; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
393; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
394; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
395; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
396; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
397; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x0]
398; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[A_HI]]]
399; VBITS_EQ_256-NEXT: ret
400  %op1 = load <8 x i64>, <8 x i64>* %a
401  %op2 = load <8 x i64>, <8 x i64>* %b
402  %cmp = icmp eq <8 x i64> %op1, %op2
403  %sext = sext <8 x i1> %cmp to <8 x i64>
404  store <8 x i64> %sext, <8 x i64>* %a
405  ret void
406}
407
408define void @icmp_eq_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
409; CHECK-LABEL: icmp_eq_v16i64:
410; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
411; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
412; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
413; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
414; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
415; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
416; VBITS_GE_1024-NEXT: ret
417  %op1 = load <16 x i64>, <16 x i64>* %a
418  %op2 = load <16 x i64>, <16 x i64>* %b
419  %cmp = icmp eq <16 x i64> %op1, %op2
420  %sext = sext <16 x i1> %cmp to <16 x i64>
421  store <16 x i64> %sext, <16 x i64>* %a
422  ret void
423}
424
425define void @icmp_eq_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
426; CHECK-LABEL: icmp_eq_v32i64:
427; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
428; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
429; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
430; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
431; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
432; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
433; VBITS_GE_2048-NEXT: ret
434  %op1 = load <32 x i64>, <32 x i64>* %a
435  %op2 = load <32 x i64>, <32 x i64>* %b
436  %cmp = icmp eq <32 x i64> %op1, %op2
437  %sext = sext <32 x i1> %cmp to <32 x i64>
438  store <32 x i64> %sext, <32 x i64>* %a
439  ret void
440}
441
442;
443; ICMP NE
444;
445
446define void @icmp_ne_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
447; CHECK-LABEL: icmp_ne_v32i8:
448; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
449; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
450; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
451; CHECK-NEXT: cmpne [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
452; CHECK-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
453; CHECK-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
454; CHECK-NEXT: ret
455  %op1 = load <32 x i8>, <32 x i8>* %a
456  %op2 = load <32 x i8>, <32 x i8>* %b
457  %cmp = icmp ne <32 x i8> %op1, %op2
458  %sext = sext <32 x i1> %cmp to <32 x i8>
459  store <32 x i8> %sext, <32 x i8>* %a
460  ret void
461}
462
463;
464; ICMP SGE
465;
466
467define void @icmp_sge_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
468; CHECK-LABEL: icmp_sge_v32i16:
469; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
470; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
471; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
472; VBITS_GE_512-NEXT: cmpge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
473; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
474; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
475; VBITS_GE_512-NEXT: ret
476  %op1 = load <32 x i16>, <32 x i16>* %a
477  %op2 = load <32 x i16>, <32 x i16>* %b
478  %cmp = icmp sge <32 x i16> %op1, %op2
479  %sext = sext <32 x i1> %cmp to <32 x i16>
480  store <32 x i16> %sext, <32 x i16>* %a
481  ret void
482}
483
484;
485; ICMP SGT
486;
487
488define void @icmp_sgt_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
489; CHECK-LABEL: icmp_sgt_v16i16:
490; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
491; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
492; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
493; CHECK-NEXT: cmpgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
494; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
495; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
496; CHECK-NEXT: ret
497  %op1 = load <16 x i16>, <16 x i16>* %a
498  %op2 = load <16 x i16>, <16 x i16>* %b
499  %cmp = icmp sgt <16 x i16> %op1, %op2
500  %sext = sext <16 x i1> %cmp to <16 x i16>
501  store <16 x i16> %sext, <16 x i16>* %a
502  ret void
503}
504
505;
506; ICMP SLE
507;
508
509define void @icmp_sle_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
510; CHECK-LABEL: icmp_sle_v16i32:
511; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
512; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
513; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
514; VBITS_GE_512-NEXT: cmpge [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP2]].s, [[OP1]].s
515; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
516; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
517; VBITS_GE_512-NEXT: ret
518  %op1 = load <16 x i32>, <16 x i32>* %a
519  %op2 = load <16 x i32>, <16 x i32>* %b
520  %cmp = icmp sle <16 x i32> %op1, %op2
521  %sext = sext <16 x i1> %cmp to <16 x i32>
522  store <16 x i32> %sext, <16 x i32>* %a
523  ret void
524}
525
526;
527; ICMP SLT
528;
529
530define void @icmp_slt_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
531; CHECK-LABEL: icmp_slt_v8i32:
532; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
533; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
534; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
535; CHECK-NEXT: cmpgt [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP2]].s, [[OP1]].s
536; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
537; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
538; CHECK-NEXT: ret
539  %op1 = load <8 x i32>, <8 x i32>* %a
540  %op2 = load <8 x i32>, <8 x i32>* %b
541  %cmp = icmp slt <8 x i32> %op1, %op2
542  %sext = sext <8 x i1> %cmp to <8 x i32>
543  store <8 x i32> %sext, <8 x i32>* %a
544  ret void
545}
546
547;
548; ICMP UGE
549;
550
551define void @icmp_uge_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
552; CHECK-LABEL: icmp_uge_v8i64:
553; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
554; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
555; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
556; VBITS_GE_512-NEXT: cmphs [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
557; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
558; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
559; VBITS_GE_512-NEXT: ret
560  %op1 = load <8 x i64>, <8 x i64>* %a
561  %op2 = load <8 x i64>, <8 x i64>* %b
562  %cmp = icmp uge <8 x i64> %op1, %op2
563  %sext = sext <8 x i1> %cmp to <8 x i64>
564  store <8 x i64> %sext, <8 x i64>* %a
565  ret void
566}
567
568;
569; ICMP UGT
570;
571
572define void @icmp_ugt_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
573; CHECK-LABEL: icmp_ugt_v4i64:
574; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
575; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
576; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
577; CHECK-NEXT: cmphi [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
578; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
579; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
580; CHECK-NEXT: ret
581  %op1 = load <4 x i64>, <4 x i64>* %a
582  %op2 = load <4 x i64>, <4 x i64>* %b
583  %cmp = icmp ugt <4 x i64> %op1, %op2
584  %sext = sext <4 x i1> %cmp to <4 x i64>
585  store <4 x i64> %sext, <4 x i64>* %a
586  ret void
587}
588
589;
590; ICMP ULE
591;
592
593define void @icmp_ule_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
594; CHECK-LABEL: icmp_ule_v16i64:
595; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
596; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
597; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
598; VBITS_GE_1024-NEXT: cmphs [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP2]].d, [[OP1]].d
599; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
600; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
601; VBITS_GE_1024-NEXT: ret
602  %op1 = load <16 x i64>, <16 x i64>* %a
603  %op2 = load <16 x i64>, <16 x i64>* %b
604  %cmp = icmp ule <16 x i64> %op1, %op2
605  %sext = sext <16 x i1> %cmp to <16 x i64>
606  store <16 x i64> %sext, <16 x i64>* %a
607  ret void
608}
609
610;
611; ICMP ULT
612;
613
614define void @icmp_ult_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
615; CHECK-LABEL: icmp_ult_v32i64:
616; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
617; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
618; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
619; VBITS_GE_2048-NEXT: cmphi [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP2]].d, [[OP1]].d
620; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
621; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
622; VBITS_GE_2048-NEXT: ret
623  %op1 = load <32 x i64>, <32 x i64>* %a
624  %op2 = load <32 x i64>, <32 x i64>* %b
625  %cmp = icmp ult <32 x i64> %op1, %op2
626  %sext = sext <32 x i1> %cmp to <32 x i64>
627  store <32 x i64> %sext, <32 x i64>* %a
628  ret void
629}
630
631attributes #0 = { "target-features"="+sve" }
632