1; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefix=NO_SVE 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 3; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK 4; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 6; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 7; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 8; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 9; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 10; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 11; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 12; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 13; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 14; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 15; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 16; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 17 18target triple = "aarch64-unknown-linux-gnu" 19 20; Don't use SVE when its registers are no bigger than NEON. 21; NO_SVE-NOT: ptrue 22 23; 24; DUP (integer) 25; 26 27; Don't use SVE for 64-bit vectors. 28define <8 x i8> @splat_v8i8(i8 %a) #0 { 29; CHECK-LABEL: splat_v8i8: 30; CHECK: dup v0.8b, w0 31; CHECK-NEXT: ret 32 %insert = insertelement <8 x i8> undef, i8 %a, i64 0 33 %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer 34 ret <8 x i8> %splat 35} 36 37; Don't use SVE for 128-bit vectors. 38define <16 x i8> @splat_v16i8(i8 %a) #0 { 39; CHECK-LABEL: splat_v16i8: 40; CHECK: dup v0.16b, w0 41; CHECK-NEXT: ret 42 %insert = insertelement <16 x i8> undef, i8 %a, i64 0 43 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer 44 ret <16 x i8> %splat 45} 46 47define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 { 48; CHECK-LABEL: splat_v32i8: 49; CHECK-DAG: mov [[RES:z[0-9]+]].b, w0 50; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl32 51; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x1] 52; CHECK-NEXT: ret 53 %insert = insertelement <32 x i8> undef, i8 %a, i64 0 54 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer 55 store <32 x i8> %splat, <32 x i8>* %b 56 ret void 57} 58 59define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 { 60; CHECK-LABEL: splat_v64i8: 61; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, w0 62; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64 63; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x1] 64; VBITS_GE_512-NEXT: ret 65 66; Ensure sensible type legalisation. 67; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].b, w0 68; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 69; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32 70; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1] 71; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[OFFSET_HI]] 72; VBITS_EQ_256-NEXT: ret 73 %insert = insertelement <64 x i8> undef, i8 %a, i64 0 74 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer 75 store <64 x i8> %splat, <64 x i8>* %b 76 ret void 77} 78 79define void @splat_v128i8(i8 %a, <128 x i8>* %b) #0 { 80; CHECK-LABEL: splat_v128i8: 81; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].b, w0 82; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].b, vl128 83; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x1] 84; VBITS_GE_1024-NEXT: ret 85 %insert = insertelement <128 x i8> undef, i8 %a, i64 0 86 %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer 87 store <128 x i8> %splat, <128 x i8>* %b 88 ret void 89} 90 91define void @splat_v256i8(i8 %a, <256 x i8>* %b) #0 { 92; CHECK-LABEL: splat_v256i8: 93; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].b, w0 94; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].b, vl256 95; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x1] 96; VBITS_GE_2048-NEXT: ret 97 %insert = insertelement <256 x i8> undef, i8 %a, i64 0 98 %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer 99 store <256 x i8> %splat, <256 x i8>* %b 100 ret void 101} 102 103; Don't use SVE for 64-bit vectors. 104define <4 x i16> @splat_v4i16(i16 %a) #0 { 105; CHECK-LABEL: splat_v4i16: 106; CHECK: dup v0.4h, w0 107; CHECK-NEXT: ret 108 %insert = insertelement <4 x i16> undef, i16 %a, i64 0 109 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer 110 ret <4 x i16> %splat 111} 112 113; Don't use SVE for 128-bit vectors. 114define <8 x i16> @splat_v8i16(i16 %a) #0 { 115; CHECK-LABEL: splat_v8i16: 116; CHECK: dup v0.8h, w0 117; CHECK-NEXT: ret 118 %insert = insertelement <8 x i16> undef, i16 %a, i64 0 119 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer 120 ret <8 x i16> %splat 121} 122 123define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 { 124; CHECK-LABEL: splat_v16i16: 125; CHECK-DAG: mov [[RES:z[0-9]+]].h, w0 126; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16 127; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1] 128; CHECK-NEXT: ret 129 %insert = insertelement <16 x i16> undef, i16 %a, i64 0 130 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer 131 store <16 x i16> %splat, <16 x i16>* %b 132 ret void 133} 134 135define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 { 136; CHECK-LABEL: splat_v32i16: 137; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, w0 138; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 139; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1] 140; VBITS_GE_512-NEXT: ret 141 142; Ensure sensible type legalisation. 143; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, w0 144; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 145; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 146; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1] 147; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]] 148; VBITS_EQ_256-NEXT: ret 149 %insert = insertelement <32 x i16> undef, i16 %a, i64 0 150 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer 151 store <32 x i16> %splat, <32 x i16>* %b 152 ret void 153} 154 155define void @splat_v64i16(i16 %a, <64 x i16>* %b) #0 { 156; CHECK-LABEL: splat_v64i16: 157; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, w0 158; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64 159; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1] 160; VBITS_GE_1024-NEXT: ret 161 %insert = insertelement <64 x i16> undef, i16 %a, i64 0 162 %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer 163 store <64 x i16> %splat, <64 x i16>* %b 164 ret void 165} 166 167define void @splat_v128i16(i16 %a, <128 x i16>* %b) #0 { 168; CHECK-LABEL: splat_v128i16: 169; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, w0 170; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128 171; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1] 172; VBITS_GE_2048-NEXT: ret 173 %insert = insertelement <128 x i16> undef, i16 %a, i64 0 174 %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer 175 store <128 x i16> %splat, <128 x i16>* %b 176 ret void 177} 178 179; Don't use SVE for 64-bit vectors. 180define <2 x i32> @splat_v2i32(i32 %a) #0 { 181; CHECK-LABEL: splat_v2i32: 182; CHECK: dup v0.2s, w0 183; CHECK-NEXT: ret 184 %insert = insertelement <2 x i32> undef, i32 %a, i64 0 185 %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer 186 ret <2 x i32> %splat 187} 188 189; Don't use SVE for 128-bit vectors. 190define <4 x i32> @splat_v4i32(i32 %a) #0 { 191; CHECK-LABEL: splat_v4i32: 192; CHECK: dup v0.4s, w0 193; CHECK-NEXT: ret 194 %insert = insertelement <4 x i32> undef, i32 %a, i64 0 195 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer 196 ret <4 x i32> %splat 197} 198 199define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 { 200; CHECK-LABEL: splat_v8i32: 201; CHECK-DAG: mov [[RES:z[0-9]+]].s, w0 202; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8 203; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1] 204; CHECK-NEXT: ret 205 %insert = insertelement <8 x i32> undef, i32 %a, i64 0 206 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer 207 store <8 x i32> %splat, <8 x i32>* %b 208 ret void 209} 210 211define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 { 212; CHECK-LABEL: splat_v16i32: 213; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, w0 214; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 215; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1] 216; VBITS_GE_512-NEXT: ret 217 218; Ensure sensible type legalisation. 219; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, w0 220; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 221; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 222; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1] 223; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]] 224; VBITS_EQ_256-NEXT: ret 225 %insert = insertelement <16 x i32> undef, i32 %a, i64 0 226 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer 227 store <16 x i32> %splat, <16 x i32>* %b 228 ret void 229} 230 231define void @splat_v32i32(i32 %a, <32 x i32>* %b) #0 { 232; CHECK-LABEL: splat_v32i32: 233; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, w0 234; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32 235; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1] 236; VBITS_GE_1024-NEXT: ret 237 %insert = insertelement <32 x i32> undef, i32 %a, i64 0 238 %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer 239 store <32 x i32> %splat, <32 x i32>* %b 240 ret void 241} 242 243define void @splat_v64i32(i32 %a, <64 x i32>* %b) #0 { 244; CHECK-LABEL: splat_v64i32: 245; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, w0 246; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64 247; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1] 248; VBITS_GE_2048-NEXT: ret 249 %insert = insertelement <64 x i32> undef, i32 %a, i64 0 250 %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer 251 store <64 x i32> %splat, <64 x i32>* %b 252 ret void 253} 254 255; Don't use SVE for 64-bit vectors. 256define <1 x i64> @splat_v1i64(i64 %a) #0 { 257; CHECK-LABEL: splat_v1i64: 258; CHECK: fmov d0, x0 259; CHECK-NEXT: ret 260 %insert = insertelement <1 x i64> undef, i64 %a, i64 0 261 %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer 262 ret <1 x i64> %splat 263} 264 265; Don't use SVE for 128-bit vectors. 266define <2 x i64> @splat_v2i64(i64 %a) #0 { 267; CHECK-LABEL: splat_v2i64: 268; CHECK: dup v0.2d, x0 269; CHECK-NEXT: ret 270 %insert = insertelement <2 x i64> undef, i64 %a, i64 0 271 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer 272 ret <2 x i64> %splat 273} 274 275define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 { 276; CHECK-LABEL: splat_v4i64: 277; CHECK-DAG: mov [[RES:z[0-9]+]].d, x0 278; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4 279; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1] 280; CHECK-NEXT: ret 281 %insert = insertelement <4 x i64> undef, i64 %a, i64 0 282 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer 283 store <4 x i64> %splat, <4 x i64>* %b 284 ret void 285} 286 287define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 { 288; CHECK-LABEL: splat_v8i64: 289; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, x0 290; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 291; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1] 292; VBITS_GE_512-NEXT: ret 293 294; Ensure sensible type legalisation. 295; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, x0 296; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 297; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 298; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1] 299; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]] 300; VBITS_EQ_256-NEXT: ret 301 %insert = insertelement <8 x i64> undef, i64 %a, i64 0 302 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer 303 store <8 x i64> %splat, <8 x i64>* %b 304 ret void 305} 306 307define void @splat_v16i64(i64 %a, <16 x i64>* %b) #0 { 308; CHECK-LABEL: splat_v16i64: 309; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, x0 310; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16 311; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1] 312; VBITS_GE_1024-NEXT: ret 313 %insert = insertelement <16 x i64> undef, i64 %a, i64 0 314 %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer 315 store <16 x i64> %splat, <16 x i64>* %b 316 ret void 317} 318 319define void @splat_v32i64(i64 %a, <32 x i64>* %b) #0 { 320; CHECK-LABEL: splat_v32i64: 321; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, x0 322; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32 323; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1] 324; VBITS_GE_2048-NEXT: ret 325 %insert = insertelement <32 x i64> undef, i64 %a, i64 0 326 %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer 327 store <32 x i64> %splat, <32 x i64>* %b 328 ret void 329} 330 331; 332; DUP (floating-point) 333; 334 335; Don't use SVE for 64-bit vectors. 336define <4 x half> @splat_v4f16(half %a) #0 { 337; CHECK-LABEL: splat_v4f16: 338; CHECK: dup v0.4h, v0.h[0] 339; CHECK-NEXT: ret 340 %insert = insertelement <4 x half> undef, half %a, i64 0 341 %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer 342 ret <4 x half> %splat 343} 344 345; Don't use SVE for 128-bit vectors. 346define <8 x half> @splat_v8f16(half %a) #0 { 347; CHECK-LABEL: splat_v8f16: 348; CHECK: dup v0.8h, v0.h[0] 349; CHECK-NEXT: ret 350 %insert = insertelement <8 x half> undef, half %a, i64 0 351 %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer 352 ret <8 x half> %splat 353} 354 355define void @splat_v16f16(half %a, <16 x half>* %b) #0 { 356; CHECK-LABEL: splat_v16f16: 357; CHECK-DAG: mov [[RES:z[0-9]+]].h, h0 358; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16 359; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 360; CHECK-NEXT: ret 361 %insert = insertelement <16 x half> undef, half %a, i64 0 362 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer 363 store <16 x half> %splat, <16 x half>* %b 364 ret void 365} 366 367define void @splat_v32f16(half %a, <32 x half>* %b) #0 { 368; CHECK-LABEL: splat_v32f16: 369; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, h0 370; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 371; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 372; VBITS_GE_512-NEXT: ret 373 374; Ensure sensible type legalisation. 375; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, h0 376; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 377; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 378; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0] 379; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]] 380; VBITS_EQ_256-NEXT: ret 381 %insert = insertelement <32 x half> undef, half %a, i64 0 382 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer 383 store <32 x half> %splat, <32 x half>* %b 384 ret void 385} 386 387define void @splat_v64f16(half %a, <64 x half>* %b) #0 { 388; CHECK-LABEL: splat_v64f16: 389; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, h0 390; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64 391; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 392; VBITS_GE_1024-NEXT: ret 393 %insert = insertelement <64 x half> undef, half %a, i64 0 394 %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer 395 store <64 x half> %splat, <64 x half>* %b 396 ret void 397} 398 399define void @splat_v128f16(half %a, <128 x half>* %b) #0 { 400; CHECK-LABEL: splat_v128f16: 401; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, h0 402; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128 403; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 404; VBITS_GE_2048-NEXT: ret 405 %insert = insertelement <128 x half> undef, half %a, i64 0 406 %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer 407 store <128 x half> %splat, <128 x half>* %b 408 ret void 409} 410 411; Don't use SVE for 64-bit vectors. 412define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 { 413; CHECK-LABEL: splat_v2f32: 414; CHECK: dup v0.2s, v0.s[0] 415; CHECK-NEXT: ret 416 %insert = insertelement <2 x float> undef, float %a, i64 0 417 %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer 418 ret <2 x float> %splat 419} 420 421; Don't use SVE for 128-bit vectors. 422define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 { 423; CHECK-LABEL: splat_v4f32: 424; CHECK: dup v0.4s, v0.s[0] 425; CHECK-NEXT: ret 426 %insert = insertelement <4 x float> undef, float %a, i64 0 427 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer 428 ret <4 x float> %splat 429} 430 431define void @splat_v8f32(float %a, <8 x float>* %b) #0 { 432; CHECK-LABEL: splat_v8f32: 433; CHECK-DAG: mov [[RES:z[0-9]+]].s, s0 434; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8 435; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 436; CHECK-NEXT: ret 437 %insert = insertelement <8 x float> undef, float %a, i64 0 438 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer 439 store <8 x float> %splat, <8 x float>* %b 440 ret void 441} 442 443define void @splat_v16f32(float %a, <16 x float>* %b) #0 { 444; CHECK-LABEL: splat_v16f32: 445; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, s0 446; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 447; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 448; VBITS_GE_512-NEXT: ret 449 450; Ensure sensible type legalisation. 451; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, s0 452; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 453; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 454; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0] 455; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]] 456; VBITS_EQ_256-NEXT: ret 457 %insert = insertelement <16 x float> undef, float %a, i64 0 458 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer 459 store <16 x float> %splat, <16 x float>* %b 460 ret void 461} 462 463define void @splat_v32f32(float %a, <32 x float>* %b) #0 { 464; CHECK-LABEL: splat_v32f32: 465; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, s0 466; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32 467; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 468; VBITS_GE_1024-NEXT: ret 469 %insert = insertelement <32 x float> undef, float %a, i64 0 470 %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer 471 store <32 x float> %splat, <32 x float>* %b 472 ret void 473} 474 475define void @splat_v64f32(float %a, <64 x float>* %b) #0 { 476; CHECK-LABEL: splat_v64f32: 477; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, s0 478; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64 479; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 480; VBITS_GE_2048-NEXT: ret 481 %insert = insertelement <64 x float> undef, float %a, i64 0 482 %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer 483 store <64 x float> %splat, <64 x float>* %b 484 ret void 485} 486 487; Don't use SVE for 64-bit vectors. 488define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 { 489; CHECK-LABEL: splat_v1f64: 490; CHECK: // %bb.0: 491; CHECK-NEXT: ret 492 %insert = insertelement <1 x double> undef, double %a, i64 0 493 %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer 494 ret <1 x double> %splat 495} 496 497; Don't use SVE for 128-bit vectors. 498define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 { 499; CHECK-LABEL: splat_v2f64: 500; CHECK: dup v0.2d, v0.d[0] 501; CHECK-NEXT: ret 502 %insert = insertelement <2 x double> undef, double %a, i64 0 503 %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer 504 ret <2 x double> %splat 505} 506 507define void @splat_v4f64(double %a, <4 x double>* %b) #0 { 508; CHECK-LABEL: splat_v4f64: 509; CHECK-DAG: mov [[RES:z[0-9]+]].d, d0 510; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4 511; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 512; CHECK-NEXT: ret 513 %insert = insertelement <4 x double> undef, double %a, i64 0 514 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer 515 store <4 x double> %splat, <4 x double>* %b 516 ret void 517} 518 519define void @splat_v8f64(double %a, <8 x double>* %b) #0 { 520; CHECK-LABEL: splat_v8f64: 521; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, d0 522; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 523; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 524; VBITS_GE_512-NEXT: ret 525 526; Ensure sensible type legalisation. 527; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, d0 528; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 529; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 530; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0] 531; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]] 532; VBITS_EQ_256-NEXT: ret 533 %insert = insertelement <8 x double> undef, double %a, i64 0 534 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer 535 store <8 x double> %splat, <8 x double>* %b 536 ret void 537} 538 539define void @splat_v16f64(double %a, <16 x double>* %b) #0 { 540; CHECK-LABEL: splat_v16f64: 541; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, d0 542; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16 543; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 544; VBITS_GE_1024-NEXT: ret 545 %insert = insertelement <16 x double> undef, double %a, i64 0 546 %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer 547 store <16 x double> %splat, <16 x double>* %b 548 ret void 549} 550 551define void @splat_v32f64(double %a, <32 x double>* %b) #0 { 552; CHECK-LABEL: splat_v32f64: 553; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, d0 554; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32 555; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 556; VBITS_GE_2048-NEXT: ret 557 %insert = insertelement <32 x double> undef, double %a, i64 0 558 %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer 559 store <32 x double> %splat, <32 x double>* %b 560 ret void 561} 562 563; 564; DUP (integer immediate) 565; 566 567define void @splat_imm_v64i8(<64 x i8>* %a) #0 { 568; CHECK-LABEL: splat_imm_v64i8: 569; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, #1 570; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64 571; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0] 572; VBITS_GE_512-NEXT: ret 573 %insert = insertelement <64 x i8> undef, i8 1, i64 0 574 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer 575 store <64 x i8> %splat, <64 x i8>* %a 576 ret void 577} 578 579define void @splat_imm_v32i16(<32 x i16>* %a) #0 { 580; CHECK-LABEL: splat_imm_v32i16: 581; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, #2 582; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 583; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 584; VBITS_GE_512-NEXT: ret 585 %insert = insertelement <32 x i16> undef, i16 2, i64 0 586 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer 587 store <32 x i16> %splat, <32 x i16>* %a 588 ret void 589} 590 591define void @splat_imm_v16i32(<16 x i32>* %a) #0 { 592; CHECK-LABEL: splat_imm_v16i32: 593; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, #3 594; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 595; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 596; VBITS_GE_512-NEXT: ret 597 %insert = insertelement <16 x i32> undef, i32 3, i64 0 598 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer 599 store <16 x i32> %splat, <16 x i32>* %a 600 ret void 601} 602 603define void @splat_imm_v8i64(<8 x i64>* %a) #0 { 604; CHECK-LABEL: splat_imm_v8i64: 605; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, #4 606; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 607; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 608; VBITS_GE_512-NEXT: ret 609 %insert = insertelement <8 x i64> undef, i64 4, i64 0 610 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer 611 store <8 x i64> %splat, <8 x i64>* %a 612 ret void 613} 614 615; 616; DUP (floating-point immediate) 617; 618 619define void @splat_imm_v32f16(<32 x half>* %a) #0 { 620; CHECK-LABEL: splat_imm_v32f16: 621; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].h, #5.00000000 622; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 623; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 624; VBITS_GE_512-NEXT: ret 625 %insert = insertelement <32 x half> undef, half 5.0, i64 0 626 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer 627 store <32 x half> %splat, <32 x half>* %a 628 ret void 629} 630 631define void @splat_imm_v16f32(<16 x float>* %a) #0 { 632; CHECK-LABEL: splat_imm_v16f32: 633; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].s, #6.00000000 634; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 635; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 636; VBITS_GE_512-NEXT: ret 637 %insert = insertelement <16 x float> undef, float 6.0, i64 0 638 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer 639 store <16 x float> %splat, <16 x float>* %a 640 ret void 641} 642 643define void @splat_imm_v8f64(<8 x double>* %a) #0 { 644; CHECK-LABEL: splat_imm_v8f64: 645; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].d, #7.00000000 646; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 647; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 648; VBITS_GE_512-NEXT: ret 649 %insert = insertelement <8 x double> undef, double 7.0, i64 0 650 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer 651 store <8 x double> %splat, <8 x double>* %a 652 ret void 653} 654attributes #0 = { "target-features"="+sve" } 655