1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; ST2B 9; 10 11define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) { 12; CHECK-LABEL: st2b_i8: 13; CHECK: st2b { z0.b, z1.b }, p0, [x0, x1] 14; CHECK-NEXT: ret 15 %1 = getelementptr i8, i8* %addr, i64 %offset 16 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0, 17 <vscale x 16 x i8> %v1, 18 <vscale x 16 x i1> %pred, 19 i8* %1) 20 ret void 21} 22 23; 24; ST2H 25; 26 27define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) { 28; CHECK-LABEL: st2h_i16: 29; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] 30; CHECK-NEXT: ret 31 %1 = getelementptr i16, i16* %addr, i64 %offset 32 call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0, 33 <vscale x 8 x i16> %v1, 34 <vscale x 8 x i1> %pred, 35 i16* %1) 36 ret void 37} 38 39define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) { 40; CHECK-LABEL: st2h_f16: 41; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] 42; CHECK-NEXT: ret 43 %1 = getelementptr half, half* %addr, i64 %offset 44 call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0, 45 <vscale x 8 x half> %v1, 46 <vscale x 8 x i1> %pred, 47 half* %1) 48 ret void 49} 50 51; 52; ST2W 53; 54 55define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) { 56; CHECK-LABEL: st2w_i32: 57; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] 58; CHECK-NEXT: ret 59 %1 = getelementptr i32, i32* %addr, i64 %offset 60 call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0, 61 <vscale x 4 x i32> %v1, 62 <vscale x 4 x i1> %pred, 63 i32* %1) 64 ret void 65} 66 67define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) { 68; CHECK-LABEL: st2w_f32: 69; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] 70; CHECK-NEXT: ret 71 %1 = getelementptr float, float* %addr, i64 %offset 72 call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0, 73 <vscale x 4 x float> %v1, 74 <vscale x 4 x i1> %pred, 75 float* %1) 76 ret void 77} 78 79; 80; ST2D 81; 82 83define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) { 84; CHECK-LABEL: st2d_i64: 85; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] 86; CHECK-NEXT: ret 87 %1 = getelementptr i64, i64* %addr, i64 %offset 88 call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0, 89 <vscale x 2 x i64> %v1, 90 <vscale x 2 x i1> %pred, 91 i64* %1) 92 ret void 93} 94 95define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) { 96; CHECK-LABEL: st2d_f64: 97; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] 98; CHECK-NEXT: ret 99 %1 = getelementptr double, double* %addr, i64 %offset 100 call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0, 101 <vscale x 2 x double> %v1, 102 <vscale x 2 x i1> %pred, 103 double* %1) 104 ret void 105} 106 107; 108; ST3B 109; 110 111define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) { 112; CHECK-LABEL: st3b_i8: 113; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1] 114; CHECK-NEXT: ret 115 %1 = getelementptr i8, i8* %addr, i64 %offset 116 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0, 117 <vscale x 16 x i8> %v1, 118 <vscale x 16 x i8> %v2, 119 <vscale x 16 x i1> %pred, 120 i8* %1) 121 ret void 122} 123 124; 125; ST3H 126; 127 128define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) { 129; CHECK-LABEL: st3h_i16: 130; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1] 131; CHECK-NEXT: ret 132 %1 = getelementptr i16, i16* %addr, i64 %offset 133 call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0, 134 <vscale x 8 x i16> %v1, 135 <vscale x 8 x i16> %v2, 136 <vscale x 8 x i1> %pred, 137 i16* %1) 138 ret void 139} 140 141define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) { 142; CHECK-LABEL: st3h_f16: 143; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1] 144; CHECK-NEXT: ret 145 %1 = getelementptr half, half* %addr, i64 %offset 146 call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0, 147 <vscale x 8 x half> %v1, 148 <vscale x 8 x half> %v2, 149 <vscale x 8 x i1> %pred, 150 half* %1) 151 ret void 152} 153 154; 155; ST3W 156; 157 158define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) { 159; CHECK-LABEL: st3w_i32: 160; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2] 161; CHECK-NEXT: ret 162 %1 = getelementptr i32, i32* %addr, i64 %offset 163 call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0, 164 <vscale x 4 x i32> %v1, 165 <vscale x 4 x i32> %v2, 166 <vscale x 4 x i1> %pred, 167 i32* %1) 168 ret void 169} 170 171define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) { 172; CHECK-LABEL: st3w_f32: 173; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2] 174; CHECK-NEXT: ret 175 %1 = getelementptr float, float* %addr, i64 %offset 176 call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0, 177 <vscale x 4 x float> %v1, 178 <vscale x 4 x float> %v2, 179 <vscale x 4 x i1> %pred, 180 float* %1) 181 ret void 182} 183 184; 185; ST3D 186; 187 188define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) { 189; CHECK-LABEL: st3d_i64: 190; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3] 191; CHECK-NEXT: ret 192 %1 = getelementptr i64, i64* %addr, i64 %offset 193 call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0, 194 <vscale x 2 x i64> %v1, 195 <vscale x 2 x i64> %v2, 196 <vscale x 2 x i1> %pred, 197 i64* %1) 198 ret void 199} 200 201define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) { 202; CHECK-LABEL: st3d_f64: 203; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3] 204; CHECK-NEXT: ret 205 %1 = getelementptr double, double* %addr, i64 %offset 206 call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0, 207 <vscale x 2 x double> %v1, 208 <vscale x 2 x double> %v2, 209 <vscale x 2 x i1> %pred, 210 double* %1) 211 ret void 212} 213 214; 215; ST4B 216; 217 218define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) { 219; CHECK-LABEL: st4b_i8: 220; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1] 221; CHECK-NEXT: ret 222 %1 = getelementptr i8, i8* %addr, i64 %offset 223 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0, 224 <vscale x 16 x i8> %v1, 225 <vscale x 16 x i8> %v2, 226 <vscale x 16 x i8> %v3, 227 <vscale x 16 x i1> %pred, 228 i8* %1) 229 ret void 230} 231 232; 233; ST4H 234; 235 236define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) { 237; CHECK-LABEL: st4h_i16: 238; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1] 239; CHECK-NEXT: ret 240 %1 = getelementptr i16, i16* %addr, i64 %offset 241 call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0, 242 <vscale x 8 x i16> %v1, 243 <vscale x 8 x i16> %v2, 244 <vscale x 8 x i16> %v3, 245 <vscale x 8 x i1> %pred, 246 i16* %1) 247 ret void 248} 249 250define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) { 251; CHECK-LABEL: st4h_f16: 252; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1] 253; CHECK-NEXT: ret 254 %1 = getelementptr half, half* %addr, i64 %offset 255 call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0, 256 <vscale x 8 x half> %v1, 257 <vscale x 8 x half> %v2, 258 <vscale x 8 x half> %v3, 259 <vscale x 8 x i1> %pred, 260 half* %1) 261 ret void 262} 263 264; 265; ST4W 266; 267 268define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) { 269; CHECK-LABEL: st4w_i32: 270; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2] 271; CHECK-NEXT: ret 272 %1 = getelementptr i32, i32* %addr, i64 %offset 273 call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0, 274 <vscale x 4 x i32> %v1, 275 <vscale x 4 x i32> %v2, 276 <vscale x 4 x i32> %v3, 277 <vscale x 4 x i1> %pred, 278 i32* %1) 279 ret void 280} 281 282define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) { 283; CHECK-LABEL: st4w_f32: 284; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2] 285; CHECK-NEXT: ret 286 %1 = getelementptr float, float* %addr, i64 %offset 287 call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0, 288 <vscale x 4 x float> %v1, 289 <vscale x 4 x float> %v2, 290 <vscale x 4 x float> %v3, 291 <vscale x 4 x i1> %pred, 292 float* %1) 293 ret void 294} 295 296; 297; ST4D 298; 299 300define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) { 301; CHECK-LABEL: st4d_i64: 302; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3] 303; CHECK-NEXT: ret 304 %1 = getelementptr i64, i64* %addr, i64 %offset 305 call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0, 306 <vscale x 2 x i64> %v1, 307 <vscale x 2 x i64> %v2, 308 <vscale x 2 x i64> %v3, 309 <vscale x 2 x i1> %pred, 310 i64* %1) 311 ret void 312} 313 314define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) { 315; CHECK-LABEL: st4d_f64: 316; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3] 317; CHECK-NEXT: ret 318 %1 = getelementptr double, double* %addr, i64 %offset 319 call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0, 320 <vscale x 2 x double> %v1, 321 <vscale x 2 x double> %v2, 322 <vscale x 2 x double> %v3, 323 <vscale x 2 x i1> %pred, 324 double* %1) 325 ret void 326} 327 328declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*) 329declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*) 330declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*) 331declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*) 332declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*) 333declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*) 334declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*) 335 336declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*) 337declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*) 338declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*) 339declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*) 340declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*) 341declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*) 342declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*) 343 344declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*) 345declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*) 346declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*) 347declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*) 348declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*) 349declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*) 350declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*) 351