1; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE 2; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 3; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK 4; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 6; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 7; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 8; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 9; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 10; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 11; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 12; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 13; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 14; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 15; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 16; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 17 18target triple = "aarch64-unknown-linux-gnu" 19 20; Don't use SVE when its registers are no bigger than NEON. 21; NO_SVE-NOT: z{0-9} 22 23; 24; ICMP EQ 25; 26 27; Don't use SVE for 64-bit vectors. 28define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { 29; CHECK-LABEL: icmp_eq_v8i8: 30; CHECK: cmeq v0.8b, v0.8b, v1.8b 31; CHECK-NEXT: ret 32 %cmp = icmp eq <8 x i8> %op1, %op2 33 %sext = sext <8 x i1> %cmp to <8 x i8> 34 ret <8 x i8> %sext 35} 36 37; Don't use SVE for 128-bit vectors. 38define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { 39; CHECK-LABEL: icmp_eq_v16i8: 40; CHECK: cmeq v0.16b, v0.16b, v1.16b 41; CHECK-NEXT: ret 42 %cmp = icmp eq <16 x i8> %op1, %op2 43 %sext = sext <16 x i1> %cmp to <16 x i8> 44 ret <16 x i8> %sext 45} 46 47define void @icmp_eq_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 { 48; CHECK-LABEL: icmp_eq_v32i8: 49; CHECK: ptrue [[PG:p[0-9]+]].b, vl32 50; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] 51; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] 52; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b 53; CHECK-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1 54; CHECK-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0] 55; CHECK-NEXT: ret 56 %op1 = load <32 x i8>, <32 x i8>* %a 57 %op2 = load <32 x i8>, <32 x i8>* %b 58 %cmp = icmp eq <32 x i8> %op1, %op2 59 %sext = sext <32 x i1> %cmp to <32 x i8> 60 store <32 x i8> %sext, <32 x i8>* %a 61 ret void 62} 63 64define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { 65; CHECK-LABEL: icmp_eq_v64i8: 66; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64 67; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] 68; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] 69; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b 70; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1 71; VBITS_GE_512-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0] 72; VBITS_GE_512-NEXT: ret 73 74; Ensure sensible type legalisation 75; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 76; VBITS_EQ_256-DAG: mov w[[OFF_HI:[0-9]+]], #32 77; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] 78; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_HI]]] 79; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] 80; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_HI]]] 81; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].b, [[PG]]/z, [[OP1_LO]].b, [[OP2_LO]].b 82; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].b, [[PG]]/z, [[OP1_HI]].b, [[OP2_HI]].b 83; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].b, [[CMP_LO]]/z, #-1 84; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].b, [[CMP_HI]]/z, #-1 85; VBITS_EQ_256-DAG: st1b { [[SEXT_LO]].b }, [[PG]], [x0] 86; VBITS_EQ_256-DAG: st1b { [[SEXT_HI]].b }, [[PG]], [x0, x[[OFF_HI]]] 87; VBITS_EQ_256-NEXT: ret 88 %op1 = load <64 x i8>, <64 x i8>* %a 89 %op2 = load <64 x i8>, <64 x i8>* %b 90 %cmp = icmp eq <64 x i8> %op1, %op2 91 %sext = sext <64 x i1> %cmp to <64 x i8> 92 store <64 x i8> %sext, <64 x i8>* %a 93 ret void 94} 95 96define void @icmp_eq_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 { 97; CHECK-LABEL: icmp_eq_v128i8: 98; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128 99; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] 100; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] 101; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b 102; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1 103; VBITS_GE_1024-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0] 104; VBITS_GE_1024-NEXT: ret 105 %op1 = load <128 x i8>, <128 x i8>* %a 106 %op2 = load <128 x i8>, <128 x i8>* %b 107 %cmp = icmp eq <128 x i8> %op1, %op2 108 %sext = sext <128 x i1> %cmp to <128 x i8> 109 store <128 x i8> %sext, <128 x i8>* %a 110 ret void 111} 112 113define void @icmp_eq_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 { 114; CHECK-LABEL: icmp_eq_v256i8: 115; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256 116; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] 117; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] 118; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b 119; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1 120; VBITS_GE_2048-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0] 121; VBITS_GE_2048-NEXT: ret 122 %op1 = load <256 x i8>, <256 x i8>* %a 123 %op2 = load <256 x i8>, <256 x i8>* %b 124 %cmp = icmp eq <256 x i8> %op1, %op2 125 %sext = sext <256 x i1> %cmp to <256 x i8> 126 store <256 x i8> %sext, <256 x i8>* %a 127 ret void 128} 129 130; Don't use SVE for 64-bit vectors. 131define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { 132; CHECK-LABEL: icmp_eq_v4i16: 133; CHECK: cmeq v0.4h, v0.4h, v1.4h 134; CHECK-NEXT: ret 135 %cmp = icmp eq <4 x i16> %op1, %op2 136 %sext = sext <4 x i1> %cmp to <4 x i16> 137 ret <4 x i16> %sext 138} 139 140; Don't use SVE for 128-bit vectors. 141define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { 142; CHECK-LABEL: icmp_eq_v8i16: 143; CHECK: cmeq v0.8h, v0.8h, v1.8h 144; CHECK-NEXT: ret 145 %cmp = icmp eq <8 x i16> %op1, %op2 146 %sext = sext <8 x i1> %cmp to <8 x i16> 147 ret <8 x i16> %sext 148} 149 150define void @icmp_eq_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 { 151; CHECK-LABEL: icmp_eq_v16i16: 152; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 153; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] 154; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] 155; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h 156; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1 157; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0] 158; CHECK-NEXT: ret 159 %op1 = load <16 x i16>, <16 x i16>* %a 160 %op2 = load <16 x i16>, <16 x i16>* %b 161 %cmp = icmp eq <16 x i16> %op1, %op2 162 %sext = sext <16 x i1> %cmp to <16 x i16> 163 store <16 x i16> %sext, <16 x i16>* %a 164 ret void 165} 166 167define void @icmp_eq_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 { 168; CHECK-LABEL: icmp_eq_v32i16: 169; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 170; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] 171; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] 172; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h 173; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1 174; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0] 175; VBITS_GE_512-NEXT: ret 176 177; Ensure sensible type legalisation 178; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 179; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 180; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 181; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 182; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] 183; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] 184; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] 185; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h 186; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h 187; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1 188; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1 189; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x0] 190; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[A_HI]]] 191; VBITS_EQ_256-NEXT: ret 192 %op1 = load <32 x i16>, <32 x i16>* %a 193 %op2 = load <32 x i16>, <32 x i16>* %b 194 %cmp = icmp eq <32 x i16> %op1, %op2 195 %sext = sext <32 x i1> %cmp to <32 x i16> 196 store <32 x i16> %sext, <32 x i16>* %a 197 ret void 198} 199 200define void @icmp_eq_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 { 201; CHECK-LABEL: icmp_eq_v64i16: 202; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 203; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] 204; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] 205; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h 206; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1 207; VBITS_GE_1024-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0] 208; VBITS_GE_1024-NEXT: ret 209 %op1 = load <64 x i16>, <64 x i16>* %a 210 %op2 = load <64 x i16>, <64 x i16>* %b 211 %cmp = icmp eq <64 x i16> %op1, %op2 212 %sext = sext <64 x i1> %cmp to <64 x i16> 213 store <64 x i16> %sext, <64 x i16>* %a 214 ret void 215} 216 217define void @icmp_eq_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 { 218; CHECK-LABEL: icmp_eq_v128i16: 219; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 220; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] 221; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] 222; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h 223; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1 224; VBITS_GE_2048-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0] 225; VBITS_GE_2048-NEXT: ret 226 %op1 = load <128 x i16>, <128 x i16>* %a 227 %op2 = load <128 x i16>, <128 x i16>* %b 228 %cmp = icmp eq <128 x i16> %op1, %op2 229 %sext = sext <128 x i1> %cmp to <128 x i16> 230 store <128 x i16> %sext, <128 x i16>* %a 231 ret void 232} 233 234; Don't use SVE for 64-bit vectors. 235define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 { 236; CHECK-LABEL: icmp_eq_v2i32: 237; CHECK: cmeq v0.2s, v0.2s, v1.2s 238; CHECK-NEXT: ret 239 %cmp = icmp eq <2 x i32> %op1, %op2 240 %sext = sext <2 x i1> %cmp to <2 x i32> 241 ret <2 x i32> %sext 242} 243 244; Don't use SVE for 128-bit vectors. 245define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { 246; CHECK-LABEL: icmp_eq_v4i32: 247; CHECK: cmeq v0.4s, v0.4s, v1.4s 248; CHECK-NEXT: ret 249 %cmp = icmp eq <4 x i32> %op1, %op2 250 %sext = sext <4 x i1> %cmp to <4 x i32> 251 ret <4 x i32> %sext 252} 253 254define void @icmp_eq_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 { 255; CHECK-LABEL: icmp_eq_v8i32: 256; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 257; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] 258; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] 259; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s 260; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1 261; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0] 262; CHECK-NEXT: ret 263 %op1 = load <8 x i32>, <8 x i32>* %a 264 %op2 = load <8 x i32>, <8 x i32>* %b 265 %cmp = icmp eq <8 x i32> %op1, %op2 266 %sext = sext <8 x i1> %cmp to <8 x i32> 267 store <8 x i32> %sext, <8 x i32>* %a 268 ret void 269} 270 271define void @icmp_eq_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 { 272; CHECK-LABEL: icmp_eq_v16i32: 273; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 274; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] 275; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] 276; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s 277; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1 278; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0] 279; VBITS_GE_512-NEXT: ret 280 281; Ensure sensible type legalisation 282; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 283; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 284; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 285; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 286; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] 287; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] 288; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] 289; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s 290; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s 291; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1 292; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1 293; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x0] 294; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[A_HI]]] 295; VBITS_EQ_256-NEXT: ret 296 %op1 = load <16 x i32>, <16 x i32>* %a 297 %op2 = load <16 x i32>, <16 x i32>* %b 298 %cmp = icmp eq <16 x i32> %op1, %op2 299 %sext = sext <16 x i1> %cmp to <16 x i32> 300 store <16 x i32> %sext, <16 x i32>* %a 301 ret void 302} 303 304define void @icmp_eq_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 { 305; CHECK-LABEL: icmp_eq_v32i32: 306; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 307; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] 308; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] 309; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s 310; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1 311; VBITS_GE_1024-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0] 312; VBITS_GE_1024-NEXT: ret 313 %op1 = load <32 x i32>, <32 x i32>* %a 314 %op2 = load <32 x i32>, <32 x i32>* %b 315 %cmp = icmp eq <32 x i32> %op1, %op2 316 %sext = sext <32 x i1> %cmp to <32 x i32> 317 store <32 x i32> %sext, <32 x i32>* %a 318 ret void 319} 320 321define void @icmp_eq_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 { 322; CHECK-LABEL: icmp_eq_v64i32: 323; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 324; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] 325; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] 326; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s 327; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1 328; VBITS_GE_2048-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0] 329; VBITS_GE_2048-NEXT: ret 330 %op1 = load <64 x i32>, <64 x i32>* %a 331 %op2 = load <64 x i32>, <64 x i32>* %b 332 %cmp = icmp eq <64 x i32> %op1, %op2 333 %sext = sext <64 x i1> %cmp to <64 x i32> 334 store <64 x i32> %sext, <64 x i32>* %a 335 ret void 336} 337 338; Don't use SVE for 64-bit vectors. 339define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 { 340; CHECK-LABEL: icmp_eq_v1i64: 341; CHECK: cmeq d0, d0, d1 342; CHECK-NEXT: ret 343 %cmp = icmp eq <1 x i64> %op1, %op2 344 %sext = sext <1 x i1> %cmp to <1 x i64> 345 ret <1 x i64> %sext 346} 347 348; Don't use SVE for 128-bit vectors. 349define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { 350; CHECK-LABEL: icmp_eq_v2i64: 351; CHECK: cmeq v0.2d, v0.2d, v1.2d 352; CHECK-NEXT: ret 353 %cmp = icmp eq <2 x i64> %op1, %op2 354 %sext = sext <2 x i1> %cmp to <2 x i64> 355 ret <2 x i64> %sext 356} 357 358define void @icmp_eq_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 { 359; CHECK-LABEL: icmp_eq_v4i64: 360; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 361; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 362; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 363; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d 364; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 365; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 366; CHECK-NEXT: ret 367 %op1 = load <4 x i64>, <4 x i64>* %a 368 %op2 = load <4 x i64>, <4 x i64>* %b 369 %cmp = icmp eq <4 x i64> %op1, %op2 370 %sext = sext <4 x i1> %cmp to <4 x i64> 371 store <4 x i64> %sext, <4 x i64>* %a 372 ret void 373} 374 375define void @icmp_eq_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 { 376; CHECK-LABEL: icmp_eq_v8i64: 377; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 378; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 379; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 380; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d 381; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 382; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 383; VBITS_GE_512-NEXT: ret 384 385; Ensure sensible type legalisation 386; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 387; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 388; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 389; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 390; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] 391; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] 392; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] 393; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d 394; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d 395; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1 396; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1 397; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x0] 398; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[A_HI]]] 399; VBITS_EQ_256-NEXT: ret 400 %op1 = load <8 x i64>, <8 x i64>* %a 401 %op2 = load <8 x i64>, <8 x i64>* %b 402 %cmp = icmp eq <8 x i64> %op1, %op2 403 %sext = sext <8 x i1> %cmp to <8 x i64> 404 store <8 x i64> %sext, <8 x i64>* %a 405 ret void 406} 407 408define void @icmp_eq_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 { 409; CHECK-LABEL: icmp_eq_v16i64: 410; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 411; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 412; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 413; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d 414; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 415; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 416; VBITS_GE_1024-NEXT: ret 417 %op1 = load <16 x i64>, <16 x i64>* %a 418 %op2 = load <16 x i64>, <16 x i64>* %b 419 %cmp = icmp eq <16 x i64> %op1, %op2 420 %sext = sext <16 x i1> %cmp to <16 x i64> 421 store <16 x i64> %sext, <16 x i64>* %a 422 ret void 423} 424 425define void @icmp_eq_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { 426; CHECK-LABEL: icmp_eq_v32i64: 427; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 428; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 429; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 430; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d 431; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 432; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 433; VBITS_GE_2048-NEXT: ret 434 %op1 = load <32 x i64>, <32 x i64>* %a 435 %op2 = load <32 x i64>, <32 x i64>* %b 436 %cmp = icmp eq <32 x i64> %op1, %op2 437 %sext = sext <32 x i1> %cmp to <32 x i64> 438 store <32 x i64> %sext, <32 x i64>* %a 439 ret void 440} 441 442; 443; ICMP NE 444; 445 446define void @icmp_ne_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 { 447; CHECK-LABEL: icmp_ne_v32i8: 448; CHECK: ptrue [[PG:p[0-9]+]].b, vl32 449; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] 450; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] 451; CHECK-NEXT: cmpne [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b 452; CHECK-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1 453; CHECK-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0] 454; CHECK-NEXT: ret 455 %op1 = load <32 x i8>, <32 x i8>* %a 456 %op2 = load <32 x i8>, <32 x i8>* %b 457 %cmp = icmp ne <32 x i8> %op1, %op2 458 %sext = sext <32 x i1> %cmp to <32 x i8> 459 store <32 x i8> %sext, <32 x i8>* %a 460 ret void 461} 462 463; 464; ICMP SGE 465; 466 467define void @icmp_sge_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 { 468; CHECK-LABEL: icmp_sge_v32i16: 469; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 470; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] 471; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] 472; VBITS_GE_512-NEXT: cmpge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h 473; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1 474; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0] 475; VBITS_GE_512-NEXT: ret 476 %op1 = load <32 x i16>, <32 x i16>* %a 477 %op2 = load <32 x i16>, <32 x i16>* %b 478 %cmp = icmp sge <32 x i16> %op1, %op2 479 %sext = sext <32 x i1> %cmp to <32 x i16> 480 store <32 x i16> %sext, <32 x i16>* %a 481 ret void 482} 483 484; 485; ICMP SGT 486; 487 488define void @icmp_sgt_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 { 489; CHECK-LABEL: icmp_sgt_v16i16: 490; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 491; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] 492; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] 493; CHECK-NEXT: cmpgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h 494; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1 495; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0] 496; CHECK-NEXT: ret 497 %op1 = load <16 x i16>, <16 x i16>* %a 498 %op2 = load <16 x i16>, <16 x i16>* %b 499 %cmp = icmp sgt <16 x i16> %op1, %op2 500 %sext = sext <16 x i1> %cmp to <16 x i16> 501 store <16 x i16> %sext, <16 x i16>* %a 502 ret void 503} 504 505; 506; ICMP SLE 507; 508 509define void @icmp_sle_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 { 510; CHECK-LABEL: icmp_sle_v16i32: 511; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 512; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] 513; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] 514; VBITS_GE_512-NEXT: cmpge [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP2]].s, [[OP1]].s 515; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1 516; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0] 517; VBITS_GE_512-NEXT: ret 518 %op1 = load <16 x i32>, <16 x i32>* %a 519 %op2 = load <16 x i32>, <16 x i32>* %b 520 %cmp = icmp sle <16 x i32> %op1, %op2 521 %sext = sext <16 x i1> %cmp to <16 x i32> 522 store <16 x i32> %sext, <16 x i32>* %a 523 ret void 524} 525 526; 527; ICMP SLT 528; 529 530define void @icmp_slt_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 { 531; CHECK-LABEL: icmp_slt_v8i32: 532; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 533; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] 534; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] 535; CHECK-NEXT: cmpgt [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP2]].s, [[OP1]].s 536; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1 537; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0] 538; CHECK-NEXT: ret 539 %op1 = load <8 x i32>, <8 x i32>* %a 540 %op2 = load <8 x i32>, <8 x i32>* %b 541 %cmp = icmp slt <8 x i32> %op1, %op2 542 %sext = sext <8 x i1> %cmp to <8 x i32> 543 store <8 x i32> %sext, <8 x i32>* %a 544 ret void 545} 546 547; 548; ICMP UGE 549; 550 551define void @icmp_uge_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 { 552; CHECK-LABEL: icmp_uge_v8i64: 553; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 554; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 555; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 556; VBITS_GE_512-NEXT: cmphs [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d 557; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 558; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 559; VBITS_GE_512-NEXT: ret 560 %op1 = load <8 x i64>, <8 x i64>* %a 561 %op2 = load <8 x i64>, <8 x i64>* %b 562 %cmp = icmp uge <8 x i64> %op1, %op2 563 %sext = sext <8 x i1> %cmp to <8 x i64> 564 store <8 x i64> %sext, <8 x i64>* %a 565 ret void 566} 567 568; 569; ICMP UGT 570; 571 572define void @icmp_ugt_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 { 573; CHECK-LABEL: icmp_ugt_v4i64: 574; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 575; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 576; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 577; CHECK-NEXT: cmphi [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d 578; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 579; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 580; CHECK-NEXT: ret 581 %op1 = load <4 x i64>, <4 x i64>* %a 582 %op2 = load <4 x i64>, <4 x i64>* %b 583 %cmp = icmp ugt <4 x i64> %op1, %op2 584 %sext = sext <4 x i1> %cmp to <4 x i64> 585 store <4 x i64> %sext, <4 x i64>* %a 586 ret void 587} 588 589; 590; ICMP ULE 591; 592 593define void @icmp_ule_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 { 594; CHECK-LABEL: icmp_ule_v16i64: 595; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 596; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 597; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 598; VBITS_GE_1024-NEXT: cmphs [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP2]].d, [[OP1]].d 599; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 600; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 601; VBITS_GE_1024-NEXT: ret 602 %op1 = load <16 x i64>, <16 x i64>* %a 603 %op2 = load <16 x i64>, <16 x i64>* %b 604 %cmp = icmp ule <16 x i64> %op1, %op2 605 %sext = sext <16 x i1> %cmp to <16 x i64> 606 store <16 x i64> %sext, <16 x i64>* %a 607 ret void 608} 609 610; 611; ICMP ULT 612; 613 614define void @icmp_ult_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { 615; CHECK-LABEL: icmp_ult_v32i64: 616; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 617; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] 618; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] 619; VBITS_GE_2048-NEXT: cmphi [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP2]].d, [[OP1]].d 620; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1 621; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0] 622; VBITS_GE_2048-NEXT: ret 623 %op1 = load <32 x i64>, <32 x i64>* %a 624 %op2 = load <32 x i64>, <32 x i64>* %b 625 %cmp = icmp ult <32 x i64> %op1, %op2 626 %sext = sext <32 x i1> %cmp to <32 x i64> 627 store <32 x i64> %sext, <32 x i64>* %a 628 ret void 629} 630 631attributes #0 = { "target-features"="+sve" } 632