1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; NOTE: invalid, upper and lower bound immediate values of the reg+imm 8; addressing mode are checked only for the byte version of each 9; instruction (`st<N>b`), as the code for detecting the immediate is 10; common to all instructions, and varies only for the number of 11; elements of the structured store, which is <N> = 2, 3, 4. 12 13; 14; ST2B 15; 16 17define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 18; CHECK-LABEL: st2b_i8_valid_imm: 19; CHECK: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl] 20; CHECK-NEXT: ret 21 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2, i64 0 22 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0, 23 <vscale x 16 x i8> %v1, 24 <vscale x 16 x i1> %pred, 25 i8* %base) 26 ret void 27} 28 29define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 30; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2: 31; CHECK: rdvl x[[N:[0-9]+]], #3 32; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]] 33; CHECK-NEXT: ret 34 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0 35 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0, 36 <vscale x 16 x i8> %v1, 37 <vscale x 16 x i1> %pred, 38 i8* %base) 39 ret void 40} 41 42define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 43; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound: 44; CHECK: rdvl x[[N:[0-9]+]], #-18 45; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]] 46; CHECK-NEXT: ret 47 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18, i64 0 48 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0, 49 <vscale x 16 x i8> %v1, 50 <vscale x 16 x i1> %pred, 51 i8* %base) 52 ret void 53} 54 55define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 56; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound: 57; CHECK: rdvl x[[N:[0-9]+]], #16 58; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]] 59; CHECK-NEXT: ret 60 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16, i64 0 61 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0, 62 <vscale x 16 x i8> %v1, 63 <vscale x 16 x i1> %pred, 64 i8* %base) 65 ret void 66} 67 68define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 69; CHECK-LABEL: st2b_i8_valid_imm_lower_bound: 70; CHECK: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl] 71; CHECK-NEXT: ret 72 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16, i64 0 73 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0, 74 <vscale x 16 x i8> %v1, 75 <vscale x 16 x i1> %pred, 76 i8* %base) 77 ret void 78} 79 80define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 81; CHECK-LABEL: st2b_i8_valid_imm_upper_bound: 82; CHECK: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl] 83; CHECK-NEXT: ret 84 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14, i64 0 85 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0, 86 <vscale x 16 x i8> %v1, 87 <vscale x 16 x i1> %pred, 88 i8* %base) 89 ret void 90} 91 92; 93; ST2H 94; 95 96define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) { 97; CHECK-LABEL: st2h_i16: 98; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] 99; CHECK-NEXT: ret 100 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2, i64 0 101 call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0, 102 <vscale x 8 x i16> %v1, 103 <vscale x 8 x i1> %pred, 104 i16* %base) 105 ret void 106} 107 108define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) { 109; CHECK-LABEL: st2h_f16: 110; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] 111; CHECK-NEXT: ret 112 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2, i64 0 113 call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0, 114 <vscale x 8 x half> %v1, 115 <vscale x 8 x i1> %pred, 116 half* %base) 117 ret void 118} 119 120; 121; ST2W 122; 123 124define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) { 125; CHECK-LABEL: st2w_i32: 126; CHECK: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl] 127; CHECK-NEXT: ret 128 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4, i64 0 129 call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0, 130 <vscale x 4 x i32> %v1, 131 <vscale x 4 x i1> %pred, 132 i32* %base) 133 ret void 134} 135 136define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) { 137; CHECK-LABEL: st2w_f32: 138; CHECK: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl] 139; CHECK-NEXT: ret 140 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6, i64 0 141 call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0, 142 <vscale x 4 x float> %v1, 143 <vscale x 4 x i1> %pred, 144 float* %base) 145 ret void 146} 147 148; 149; ST2D 150; 151 152define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) { 153; CHECK-LABEL: st2d_i64: 154; CHECK: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl] 155; CHECK-NEXT: ret 156 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8, i64 0 157 call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0, 158 <vscale x 2 x i64> %v1, 159 <vscale x 2 x i1> %pred, 160 i64* %base) 161 ret void 162} 163 164define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) { 165; CHECK-LABEL: st2d_f64: 166; CHECK: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl] 167; CHECK-NEXT: ret 168 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10, i64 0 169 call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0, 170 <vscale x 2 x double> %v1, 171 <vscale x 2 x i1> %pred, 172 double* %base) 173 ret void 174} 175 176; 177; ST3B 178; 179 180define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 181; CHECK-LABEL: st3b_i8_valid_imm: 182; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl] 183; CHECK-NEXT: ret 184 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0 185 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 186 <vscale x 16 x i8> %v1, 187 <vscale x 16 x i8> %v2, 188 <vscale x 16 x i1> %pred, 189 i8* %base) 190 ret void 191} 192 193define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 194; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01: 195; CHECK: rdvl x[[N:[0-9]+]], #4 196; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]] 197; CHECK-NEXT: ret 198 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0 199 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 200 <vscale x 16 x i8> %v1, 201 <vscale x 16 x i8> %v2, 202 <vscale x 16 x i1> %pred, 203 i8* %base) 204 ret void 205} 206 207define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 208; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02: 209; CHECK: rdvl x[[N:[0-9]+]], #5 210; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]] 211; CHECK-NEXT: ret 212 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0 213 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 214 <vscale x 16 x i8> %v1, 215 <vscale x 16 x i8> %v2, 216 <vscale x 16 x i1> %pred, 217 i8* %base) 218 ret void 219} 220 221define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 222; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound: 223; CHECK: rdvl x[[N:[0-9]+]], #-27 224; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]] 225; CHECK-NEXT: ret 226 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27, i64 0 227 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 228 <vscale x 16 x i8> %v1, 229 <vscale x 16 x i8> %v2, 230 <vscale x 16 x i1> %pred, 231 i8* %base) 232 ret void 233} 234 235define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 236; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound: 237; CHECK: rdvl x[[N:[0-9]+]], #24 238; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]] 239; CHECK-NEXT: ret 240 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24, i64 0 241 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 242 <vscale x 16 x i8> %v1, 243 <vscale x 16 x i8> %v2, 244 <vscale x 16 x i1> %pred, 245 i8* %base) 246 ret void 247} 248 249define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 250; CHECK-LABEL: st3b_i8_valid_imm_lower_bound: 251; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl] 252; CHECK-NEXT: ret 253 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24, i64 0 254 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 255 <vscale x 16 x i8> %v1, 256 <vscale x 16 x i8> %v2, 257 <vscale x 16 x i1> %pred, 258 i8* %base) 259 ret void 260} 261 262define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 263; CHECK-LABEL: st3b_i8_valid_imm_upper_bound: 264; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl] 265; CHECK-NEXT: ret 266 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21, i64 0 267 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 268 <vscale x 16 x i8> %v1, 269 <vscale x 16 x i8> %v2, 270 <vscale x 16 x i1> %pred, 271 i8* %base) 272 ret void 273} 274 275; 276; ST3H 277; 278 279define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) { 280; CHECK-LABEL: st3h_i16: 281; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl] 282; CHECK-NEXT: ret 283 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6, i64 0 284 call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0, 285 <vscale x 8 x i16> %v1, 286 <vscale x 8 x i16> %v2, 287 <vscale x 8 x i1> %pred, 288 i16* %base) 289 ret void 290} 291 292define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) { 293; CHECK-LABEL: st3h_f16: 294; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl] 295; CHECK-NEXT: ret 296 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9, i64 0 297 call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0, 298 <vscale x 8 x half> %v1, 299 <vscale x 8 x half> %v2, 300 <vscale x 8 x i1> %pred, 301 half* %base) 302 ret void 303} 304 305; 306; ST3W 307; 308 309define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) { 310; CHECK-LABEL: st3w_i32: 311; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl] 312; CHECK-NEXT: ret 313 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12, i64 0 314 call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0, 315 <vscale x 4 x i32> %v1, 316 <vscale x 4 x i32> %v2, 317 <vscale x 4 x i1> %pred, 318 i32* %base) 319 ret void 320} 321 322define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) { 323; CHECK-LABEL: st3w_f32: 324; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl] 325; CHECK-NEXT: ret 326 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15, i64 0 327 call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0, 328 <vscale x 4 x float> %v1, 329 <vscale x 4 x float> %v2, 330 <vscale x 4 x i1> %pred, 331 float* %base) 332 ret void 333} 334 335; 336; ST3D 337; 338 339define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) { 340; CHECK-LABEL: st3d_i64: 341; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl] 342; CHECK-NEXT: ret 343 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18, i64 0 344 call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0, 345 <vscale x 2 x i64> %v1, 346 <vscale x 2 x i64> %v2, 347 <vscale x 2 x i1> %pred, 348 i64* %base) 349 ret void 350} 351 352define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) { 353; CHECK-LABEL: st3d_f64: 354; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl] 355; CHECK-NEXT: ret 356 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3, i64 0 357 call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0, 358 <vscale x 2 x double> %v1, 359 <vscale x 2 x double> %v2, 360 <vscale x 2 x i1> %pred, 361 double* %base) 362 ret void 363} 364 365; 366; ST4B 367; 368 369define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 370; CHECK-LABEL: st4b_i8_valid_imm: 371; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl] 372; CHECK-NEXT: ret 373 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0 374 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 375 <vscale x 16 x i8> %v1, 376 <vscale x 16 x i8> %v2, 377 <vscale x 16 x i8> %v3, 378 <vscale x 16 x i1> %pred, 379 i8* %base) 380 ret void 381} 382 383define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 384; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01: 385; CHECK: rdvl x[[N:[0-9]+]], #5 386; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]] 387; CHECK-NEXT: ret 388 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0 389 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 390 <vscale x 16 x i8> %v1, 391 <vscale x 16 x i8> %v2, 392 <vscale x 16 x i8> %v3, 393 <vscale x 16 x i1> %pred, 394 i8* %base) 395 ret void 396} 397 398define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 399; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02: 400; CHECK: rdvl x[[N:[0-9]+]], #6 401; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]] 402; CHECK-NEXT: ret 403 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6, i64 0 404 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 405 <vscale x 16 x i8> %v1, 406 <vscale x 16 x i8> %v2, 407 <vscale x 16 x i8> %v3, 408 <vscale x 16 x i1> %pred, 409 i8* %base) 410 ret void 411} 412 413define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 414; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03: 415; CHECK: rdvl x[[N:[0-9]+]], #7 416; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]] 417; CHECK-NEXT: ret 418 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7, i64 0 419 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 420 <vscale x 16 x i8> %v1, 421 <vscale x 16 x i8> %v2, 422 <vscale x 16 x i8> %v3, 423 <vscale x 16 x i1> %pred, 424 i8* %base) 425 ret void 426} 427 428define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 429; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound: 430; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9) 431; xM = -9 * 2^6 432; xP = RDVL * 2^-4 433; xBASE = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36 434; CHECK: rdvl x[[N:[0-9]+]], #1 435; CHECK-DAG: mov x[[M:[0-9]+]], #-576 436; CHECK-DAG: lsr x[[P:[0-9]+]], x[[N]], #4 437; CHECK-DAG: mul x[[OFFSET:[0-9]+]], x[[P]], x[[M]] 438; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]] 439; CHECK-NEXT: ret 440 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36, i64 0 441 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 442 <vscale x 16 x i8> %v1, 443 <vscale x 16 x i8> %v2, 444 <vscale x 16 x i8> %v3, 445 <vscale x 16 x i1> %pred, 446 i8* %base) 447 ret void 448} 449 450define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 451; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound: 452; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1) 453; xM = 2^9 454; xP = RDVL * 2^-4 455; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32 456; CHECK: rdvl x[[N:[0-9]+]], #1 457; CHECK-DAG: mov w[[M:[0-9]+]], #512 458; CHECK-DAG: lsr x[[P:[0-9]+]], x[[N]], #4 459; CHECK-DAG: mul x[[OFFSET:[0-9]+]], x[[P]], x[[M]] 460; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]] 461; CHECK-NEXT: ret 462 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32, i64 0 463 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 464 <vscale x 16 x i8> %v1, 465 <vscale x 16 x i8> %v2, 466 <vscale x 16 x i8> %v3, 467 <vscale x 16 x i1> %pred, 468 i8* %base) 469 ret void 470} 471 472define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 473; CHECK-LABEL: st4b_i8_valid_imm_lower_bound: 474; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl] 475; CHECK-NEXT: ret 476 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32, i64 0 477 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 478 <vscale x 16 x i8> %v1, 479 <vscale x 16 x i8> %v2, 480 <vscale x 16 x i8> %v3, 481 <vscale x 16 x i1> %pred, 482 i8* %base) 483 ret void 484} 485 486define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { 487; CHECK-LABEL: st4b_i8_valid_imm_upper_bound: 488; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl] 489; CHECK-NEXT: ret 490 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28, i64 0 491 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 492 <vscale x 16 x i8> %v1, 493 <vscale x 16 x i8> %v2, 494 <vscale x 16 x i8> %v3, 495 <vscale x 16 x i1> %pred, 496 i8* %base) 497 ret void 498} 499 500; 501; ST4H 502; 503 504define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) { 505; CHECK-LABEL: st4h_i16: 506; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl] 507; CHECK-NEXT: ret 508 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8, i64 0 509 call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0, 510 <vscale x 8 x i16> %v1, 511 <vscale x 8 x i16> %v2, 512 <vscale x 8 x i16> %v3, 513 <vscale x 8 x i1> %pred, 514 i16* %base) 515 ret void 516} 517 518define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) { 519; CHECK-LABEL: st4h_f16: 520; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl] 521; CHECK-NEXT: ret 522 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12, i64 0 523 call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0, 524 <vscale x 8 x half> %v1, 525 <vscale x 8 x half> %v2, 526 <vscale x 8 x half> %v3, 527 <vscale x 8 x i1> %pred, 528 half* %base) 529 ret void 530} 531 532; 533; ST4W 534; 535 536define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) { 537; CHECK-LABEL: st4w_i32: 538; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl] 539; CHECK-NEXT: ret 540 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16, i64 0 541 call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0, 542 <vscale x 4 x i32> %v1, 543 <vscale x 4 x i32> %v2, 544 <vscale x 4 x i32> %v3, 545 <vscale x 4 x i1> %pred, 546 i32* %base) 547 ret void 548} 549 550define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) { 551; CHECK-LABEL: st4w_f32: 552; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl] 553; CHECK-NEXT: ret 554 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20, i64 0 555 call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0, 556 <vscale x 4 x float> %v1, 557 <vscale x 4 x float> %v2, 558 <vscale x 4 x float> %v3, 559 <vscale x 4 x i1> %pred, 560 float* %base) 561 ret void 562} 563 564; 565; ST4D 566; 567 568define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) { 569; CHECK-LABEL: st4d_i64: 570; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl] 571; CHECK-NEXT: ret 572 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24, i64 0 573 call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0, 574 <vscale x 2 x i64> %v1, 575 <vscale x 2 x i64> %v2, 576 <vscale x 2 x i64> %v3, 577 <vscale x 2 x i1> %pred, 578 i64* %base) 579 ret void 580} 581 582define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) { 583; CHECK-LABEL: st4d_f64: 584; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl] 585; CHECK-NEXT: ret 586 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28, i64 0 587 call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0, 588 <vscale x 2 x double> %v1, 589 <vscale x 2 x double> %v2, 590 <vscale x 2 x double> %v3, 591 <vscale x 2 x i1> %pred, 592 double* %base) 593 ret void 594} 595 596declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*) 597declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*) 598declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*) 599declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*) 600declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*) 601declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*) 602declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*) 603 604declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*) 605declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*) 606declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*) 607declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*) 608declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*) 609declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*) 610declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*) 611 612declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*) 613declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*) 614declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*) 615declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*) 616declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*) 617declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*) 618declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*) 619