1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2 3define void @st1lane_16b(<16 x i8> %A, i8* %D) { 4; CHECK-LABEL: st1lane_16b 5; CHECK: st1.b 6 %tmp = extractelement <16 x i8> %A, i32 1 7 store i8 %tmp, i8* %D 8 ret void 9} 10 11define void @st1lane_8h(<8 x i16> %A, i16* %D) { 12; CHECK-LABEL: st1lane_8h 13; CHECK: st1.h 14 %tmp = extractelement <8 x i16> %A, i32 1 15 store i16 %tmp, i16* %D 16 ret void 17} 18 19define void @st1lane_4s(<4 x i32> %A, i32* %D) { 20; CHECK-LABEL: st1lane_4s 21; CHECK: st1.s 22 %tmp = extractelement <4 x i32> %A, i32 1 23 store i32 %tmp, i32* %D 24 ret void 25} 26 27define void @st1lane_4s_float(<4 x float> %A, float* %D) { 28; CHECK-LABEL: st1lane_4s_float 29; CHECK: st1.s 30 %tmp = extractelement <4 x float> %A, i32 1 31 store float %tmp, float* %D 32 ret void 33} 34 35define void @st1lane_2d(<2 x i64> %A, i64* %D) { 36; CHECK-LABEL: st1lane_2d 37; CHECK: st1.d 38 %tmp = extractelement <2 x i64> %A, i32 1 39 store i64 %tmp, i64* %D 40 ret void 41} 42 43define void @st1lane_2d_double(<2 x double> %A, double* %D) { 44; CHECK-LABEL: st1lane_2d_double 45; CHECK: st1.d 46 %tmp = extractelement <2 x double> %A, i32 1 47 store double %tmp, double* %D 48 ret void 49} 50 51define void @st1lane_8b(<8 x i8> %A, i8* %D) { 52; CHECK-LABEL: st1lane_8b 53; CHECK: st1.b 54 %tmp = extractelement <8 x i8> %A, i32 1 55 store i8 %tmp, i8* %D 56 ret void 57} 58 59define void @st1lane_4h(<4 x i16> %A, i16* %D) { 60; CHECK-LABEL: st1lane_4h 61; CHECK: st1.h 62 %tmp = extractelement <4 x i16> %A, i32 1 63 store i16 %tmp, i16* %D 64 ret void 65} 66 67define void @st1lane_2s(<2 x i32> %A, i32* %D) { 68; CHECK-LABEL: st1lane_2s 69; CHECK: st1.s 70 %tmp = extractelement <2 x i32> %A, i32 1 71 store i32 %tmp, i32* %D 72 ret void 73} 74 75define void @st1lane_2s_float(<2 x float> %A, float* %D) { 76; CHECK-LABEL: st1lane_2s_float 77; CHECK: st1.s 78 %tmp = extractelement <2 x float> %A, i32 1 79 store float %tmp, float* %D 80 ret void 81} 82 83define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) { 84; CHECK-LABEL: st2lane_16b 85; CHECK: st2.b 86 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D) 87 ret void 88} 89 90define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) { 91; CHECK-LABEL: st2lane_8h 92; CHECK: st2.h 93 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D) 94 ret void 95} 96 97define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) { 98; CHECK-LABEL: st2lane_4s 99; CHECK: st2.s 100 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D) 101 ret void 102} 103 104define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) { 105; CHECK-LABEL: st2lane_2d 106; CHECK: st2.d 107 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D) 108 ret void 109} 110 111declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 112declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 113declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 114declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 115 116define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) { 117; CHECK-LABEL: st3lane_16b 118; CHECK: st3.b 119 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D) 120 ret void 121} 122 123define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) { 124; CHECK-LABEL: st3lane_8h 125; CHECK: st3.h 126 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D) 127 ret void 128} 129 130define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) { 131; CHECK-LABEL: st3lane_4s 132; CHECK: st3.s 133 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D) 134 ret void 135} 136 137define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) { 138; CHECK-LABEL: st3lane_2d 139; CHECK: st3.d 140 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D) 141 ret void 142} 143 144declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 145declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 146declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 147declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 148 149define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) { 150; CHECK-LABEL: st4lane_16b 151; CHECK: st4.b 152 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E) 153 ret void 154} 155 156define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) { 157; CHECK-LABEL: st4lane_8h 158; CHECK: st4.h 159 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E) 160 ret void 161} 162 163define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) { 164; CHECK-LABEL: st4lane_4s 165; CHECK: st4.s 166 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E) 167 ret void 168} 169 170define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) { 171; CHECK-LABEL: st4lane_2d 172; CHECK: st4.d 173 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E) 174 ret void 175} 176 177declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 178declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 179declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 180declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 181 182 183define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind { 184; CHECK-LABEL: st2_8b 185; CHECK st2.8b 186 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) 187 ret void 188} 189 190define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind { 191; CHECK-LABEL: st3_8b 192; CHECK st3.8b 193 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) 194 ret void 195} 196 197define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind { 198; CHECK-LABEL: st4_8b 199; CHECK st4.8b 200 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) 201 ret void 202} 203 204declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 205declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 206declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 207 208define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind { 209; CHECK-LABEL: st2_16b 210; CHECK st2.16b 211 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) 212 ret void 213} 214 215define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind { 216; CHECK-LABEL: st3_16b 217; CHECK st3.16b 218 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) 219 ret void 220} 221 222define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind { 223; CHECK-LABEL: st4_16b 224; CHECK st4.16b 225 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) 226 ret void 227} 228 229declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 230declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 231declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 232 233define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind { 234; CHECK-LABEL: st2_4h 235; CHECK st2.4h 236 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) 237 ret void 238} 239 240define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind { 241; CHECK-LABEL: st3_4h 242; CHECK st3.4h 243 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) 244 ret void 245} 246 247define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind { 248; CHECK-LABEL: st4_4h 249; CHECK st4.4h 250 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) 251 ret void 252} 253 254declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 255declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 256declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 257 258define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind { 259; CHECK-LABEL: st2_8h 260; CHECK st2.8h 261 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) 262 ret void 263} 264 265define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind { 266; CHECK-LABEL: st3_8h 267; CHECK st3.8h 268 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) 269 ret void 270} 271 272define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind { 273; CHECK-LABEL: st4_8h 274; CHECK st4.8h 275 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) 276 ret void 277} 278 279declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 280declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 281declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 282 283define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind { 284; CHECK-LABEL: st2_2s 285; CHECK st2.2s 286 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) 287 ret void 288} 289 290define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind { 291; CHECK-LABEL: st3_2s 292; CHECK st3.2s 293 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) 294 ret void 295} 296 297define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind { 298; CHECK-LABEL: st4_2s 299; CHECK st4.2s 300 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) 301 ret void 302} 303 304declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 305declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 306declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 307 308define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind { 309; CHECK-LABEL: st2_4s 310; CHECK st2.4s 311 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) 312 ret void 313} 314 315define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind { 316; CHECK-LABEL: st3_4s 317; CHECK st3.4s 318 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) 319 ret void 320} 321 322define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind { 323; CHECK-LABEL: st4_4s 324; CHECK st4.4s 325 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) 326 ret void 327} 328 329declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 330declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 331declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 332 333define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind { 334; CHECK-LABEL: st2_1d 335; CHECK st1.2d 336 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P) 337 ret void 338} 339 340define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind { 341; CHECK-LABEL: st3_1d 342; CHECK st1.3d 343 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) 344 ret void 345} 346 347define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind { 348; CHECK-LABEL: st4_1d 349; CHECK st1.4d 350 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) 351 ret void 352} 353 354declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 355declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 356declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 357 358define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind { 359; CHECK-LABEL: st2_2d 360; CHECK st2.2d 361 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) 362 ret void 363} 364 365define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind { 366; CHECK-LABEL: st3_2d 367; CHECK st2.3d 368 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) 369 ret void 370} 371 372define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind { 373; CHECK-LABEL: st4_2d 374; CHECK st2.4d 375 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) 376 ret void 377} 378 379declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 380declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 381declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 382 383declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 384declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 385declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 386declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly 387declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 388declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly 389 390define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) { 391; CHECK-LABEL: st1_x2_v8i8: 392; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 393 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) 394 ret void 395} 396 397define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) { 398; CHECK-LABEL: st1_x2_v4i16: 399; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 400 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) 401 ret void 402} 403 404define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) { 405; CHECK-LABEL: st1_x2_v2i32: 406; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 407 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) 408 ret void 409} 410 411define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) { 412; CHECK-LABEL: st1_x2_v2f32: 413; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 414 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr) 415 ret void 416} 417 418define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) { 419; CHECK-LABEL: st1_x2_v1i64: 420; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 421 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) 422 ret void 423} 424 425define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) { 426; CHECK-LABEL: st1_x2_v1f64: 427; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 428 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr) 429 ret void 430} 431 432declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 433declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 434declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 435declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly 436declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 437declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly 438 439define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) { 440; CHECK-LABEL: st1_x2_v16i8: 441; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 442 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) 443 ret void 444} 445 446define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) { 447; CHECK-LABEL: st1_x2_v8i16: 448; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 449 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) 450 ret void 451} 452 453define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) { 454; CHECK-LABEL: st1_x2_v4i32: 455; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 456 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) 457 ret void 458} 459 460define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) { 461; CHECK-LABEL: st1_x2_v4f32: 462; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 463 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr) 464 ret void 465} 466 467define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) { 468; CHECK-LABEL: st1_x2_v2i64: 469; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 470 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) 471 ret void 472} 473 474define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) { 475; CHECK-LABEL: st1_x2_v2f64: 476; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 477 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr) 478 ret void 479} 480 481declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 482declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 483declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 484declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 485declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 486declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 487 488define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) { 489; CHECK-LABEL: st1_x3_v8i8: 490; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 491 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) 492 ret void 493} 494 495define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) { 496; CHECK-LABEL: st1_x3_v4i16: 497; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 498 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) 499 ret void 500} 501 502define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) { 503; CHECK-LABEL: st1_x3_v2i32: 504; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 505 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) 506 ret void 507} 508 509define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) { 510; CHECK-LABEL: st1_x3_v2f32: 511; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 512 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) 513 ret void 514} 515 516define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) { 517; CHECK-LABEL: st1_x3_v1i64: 518; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 519 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) 520 ret void 521} 522 523define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) { 524; CHECK-LABEL: st1_x3_v1f64: 525; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 526 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) 527 ret void 528} 529 530declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 531declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 532declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 533declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 534declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 535declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 536 537define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) { 538; CHECK-LABEL: st1_x3_v16i8: 539; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 540 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) 541 ret void 542} 543 544define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) { 545; CHECK-LABEL: st1_x3_v8i16: 546; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 547 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) 548 ret void 549} 550 551define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) { 552; CHECK-LABEL: st1_x3_v4i32: 553; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 554 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) 555 ret void 556} 557 558define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) { 559; CHECK-LABEL: st1_x3_v4f32: 560; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 561 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) 562 ret void 563} 564 565define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) { 566; CHECK-LABEL: st1_x3_v2i64: 567; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 568 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) 569 ret void 570} 571 572define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) { 573; CHECK-LABEL: st1_x3_v2f64: 574; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 575 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) 576 ret void 577} 578 579 580declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 581declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 582declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 583declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 584declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 585declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 586 587define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) { 588; CHECK-LABEL: st1_x4_v8i8: 589; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 590 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) 591 ret void 592} 593 594define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) { 595; CHECK-LABEL: st1_x4_v4i16: 596; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 597 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) 598 ret void 599} 600 601define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) { 602; CHECK-LABEL: st1_x4_v2i32: 603; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 604 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) 605 ret void 606} 607 608define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) { 609; CHECK-LABEL: st1_x4_v2f32: 610; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 611 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) 612 ret void 613} 614 615define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) { 616; CHECK-LABEL: st1_x4_v1i64: 617; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 618 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) 619 ret void 620} 621 622define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) { 623; CHECK-LABEL: st1_x4_v1f64: 624; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 625 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) 626 ret void 627} 628 629declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 630declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 631declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 632declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 633declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 634declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 635 636define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) { 637; CHECK-LABEL: st1_x4_v16i8: 638; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 639 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) 640 ret void 641} 642 643define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) { 644; CHECK-LABEL: st1_x4_v8i16: 645; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 646 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) 647 ret void 648} 649 650define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) { 651; CHECK-LABEL: st1_x4_v4i32: 652; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 653 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) 654 ret void 655} 656 657define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) { 658; CHECK-LABEL: st1_x4_v4f32: 659; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 660 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) 661 ret void 662} 663 664define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) { 665; CHECK-LABEL: st1_x4_v2i64: 666; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 667 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) 668 ret void 669} 670 671define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) { 672; CHECK-LABEL: st1_x4_v2f64: 673; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 674 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) 675 ret void 676} 677