1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s 3; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check. 4 5define void @st1lane_16b(<16 x i8> %A, i8* %D) { 6; CHECK-LABEL: st1lane_16b 7; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}] 8 %ptr = getelementptr i8, i8* %D, i64 1 9 %tmp = extractelement <16 x i8> %A, i32 1 10 store i8 %tmp, i8* %ptr 11 ret void 12} 13 14define void @st1lane0_16b(<16 x i8> %A, i8* %D) { 15; CHECK-LABEL: st1lane0_16b 16; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}] 17 %ptr = getelementptr i8, i8* %D, i64 1 18 %tmp = extractelement <16 x i8> %A, i32 0 19 store i8 %tmp, i8* %ptr 20 ret void 21} 22 23define void @st1lane0u_16b(<16 x i8> %A, i8* %D) { 24; CHECK-LABEL: st1lane0u_16b 25; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}] 26 %ptr = getelementptr i8, i8* %D, i64 -1 27 %tmp = extractelement <16 x i8> %A, i32 0 28 store i8 %tmp, i8* %ptr 29 ret void 30} 31 32define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { 33; CHECK-LABEL: st1lane_ro_16b 34; CHECK: add x[[XREG:[0-9]+]], x0, x1 35; CHECK: st1.b { v0 }[1], [x[[XREG]]] 36 %ptr = getelementptr i8, i8* %D, i64 %offset 37 %tmp = extractelement <16 x i8> %A, i32 1 38 store i8 %tmp, i8* %ptr 39 ret void 40} 41 42define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { 43; CHECK-LABEL: st1lane0_ro_16b 44; CHECK: add x[[XREG:[0-9]+]], x0, x1 45; CHECK: st1.b { v0 }[0], [x[[XREG]]] 46 %ptr = getelementptr i8, i8* %D, i64 %offset 47 %tmp = extractelement <16 x i8> %A, i32 0 48 store i8 %tmp, i8* %ptr 49 ret void 50} 51 52define void @st1lane_8h(<8 x i16> %A, i16* %D) { 53; CHECK-LABEL: st1lane_8h 54; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}] 55 %ptr = getelementptr i16, i16* %D, i64 1 56 %tmp = extractelement <8 x i16> %A, i32 1 57 store i16 %tmp, i16* %ptr 58 ret void 59} 60 61define void @st1lane0_8h(<8 x i16> %A, i16* %D) { 62; CHECK-LABEL: st1lane0_8h 63; CHECK: str h0, [x0, #2] 64 %ptr = getelementptr i16, i16* %D, i64 1 65 %tmp = extractelement <8 x i16> %A, i32 0 66 store i16 %tmp, i16* %ptr 67 ret void 68} 69 70define void @st1lane0u_8h(<8 x i16> %A, i16* %D) { 71; CHECK-LABEL: st1lane0u_8h 72; CHECK: stur h0, [x0, #-2] 73 %ptr = getelementptr i16, i16* %D, i64 -1 74 %tmp = extractelement <8 x i16> %A, i32 0 75 store i16 %tmp, i16* %ptr 76 ret void 77} 78 79define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { 80; CHECK-LABEL: st1lane_ro_8h 81; CHECK: add x[[XREG:[0-9]+]], x0, x1 82; CHECK: st1.h { v0 }[1], [x[[XREG]]] 83 %ptr = getelementptr i16, i16* %D, i64 %offset 84 %tmp = extractelement <8 x i16> %A, i32 1 85 store i16 %tmp, i16* %ptr 86 ret void 87} 88 89define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { 90; CHECK-LABEL: st1lane0_ro_8h 91; CHECK: str h0, [x0, x1, lsl #1] 92 %ptr = getelementptr i16, i16* %D, i64 %offset 93 %tmp = extractelement <8 x i16> %A, i32 0 94 store i16 %tmp, i16* %ptr 95 ret void 96} 97 98define void @st1lane_4s(<4 x i32> %A, i32* %D) { 99; CHECK-LABEL: st1lane_4s 100; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 101 %ptr = getelementptr i32, i32* %D, i64 1 102 %tmp = extractelement <4 x i32> %A, i32 1 103 store i32 %tmp, i32* %ptr 104 ret void 105} 106 107define void @st1lane0_4s(<4 x i32> %A, i32* %D) { 108; CHECK-LABEL: st1lane0_4s 109; CHECK: str s0, [x0, #4] 110 %ptr = getelementptr i32, i32* %D, i64 1 111 %tmp = extractelement <4 x i32> %A, i32 0 112 store i32 %tmp, i32* %ptr 113 ret void 114} 115 116define void @st1lane0u_4s(<4 x i32> %A, i32* %D) { 117; CHECK-LABEL: st1lane0u_4s 118; CHECK: stur s0, [x0, #-4] 119 %ptr = getelementptr i32, i32* %D, i64 -1 120 %tmp = extractelement <4 x i32> %A, i32 0 121 store i32 %tmp, i32* %ptr 122 ret void 123} 124 125define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { 126; CHECK-LABEL: st1lane_ro_4s 127; CHECK: add x[[XREG:[0-9]+]], x0, x1 128; CHECK: st1.s { v0 }[1], [x[[XREG]]] 129 %ptr = getelementptr i32, i32* %D, i64 %offset 130 %tmp = extractelement <4 x i32> %A, i32 1 131 store i32 %tmp, i32* %ptr 132 ret void 133} 134 135define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { 136; CHECK-LABEL: st1lane0_ro_4s 137; CHECK: str s0, [x0, x1, lsl #2] 138 %ptr = getelementptr i32, i32* %D, i64 %offset 139 %tmp = extractelement <4 x i32> %A, i32 0 140 store i32 %tmp, i32* %ptr 141 ret void 142} 143 144define void @st1lane_4s_float(<4 x float> %A, float* %D) { 145; CHECK-LABEL: st1lane_4s_float 146; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 147 %ptr = getelementptr float, float* %D, i64 1 148 %tmp = extractelement <4 x float> %A, i32 1 149 store float %tmp, float* %ptr 150 ret void 151} 152 153define void @st1lane0_4s_float(<4 x float> %A, float* %D) { 154; CHECK-LABEL: st1lane0_4s_float 155; CHECK: str s0, [x0, #4] 156 %ptr = getelementptr float, float* %D, i64 1 157 %tmp = extractelement <4 x float> %A, i32 0 158 store float %tmp, float* %ptr 159 ret void 160} 161 162define void @st1lane0u_4s_float(<4 x float> %A, float* %D) { 163; CHECK-LABEL: st1lane0u_4s_float 164; CHECK: stur s0, [x0, #-4] 165 %ptr = getelementptr float, float* %D, i64 -1 166 %tmp = extractelement <4 x float> %A, i32 0 167 store float %tmp, float* %ptr 168 ret void 169} 170 171define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { 172; CHECK-LABEL: st1lane_ro_4s_float 173; CHECK: add x[[XREG:[0-9]+]], x0, x1 174; CHECK: st1.s { v0 }[1], [x[[XREG]]] 175 %ptr = getelementptr float, float* %D, i64 %offset 176 %tmp = extractelement <4 x float> %A, i32 1 177 store float %tmp, float* %ptr 178 ret void 179} 180 181define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { 182; CHECK-LABEL: st1lane0_ro_4s_float 183; CHECK: str s0, [x0, x1, lsl #2] 184 %ptr = getelementptr float, float* %D, i64 %offset 185 %tmp = extractelement <4 x float> %A, i32 0 186 store float %tmp, float* %ptr 187 ret void 188} 189 190define void @st1lane_2d(<2 x i64> %A, i64* %D) { 191; CHECK-LABEL: st1lane_2d 192; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}] 193 %ptr = getelementptr i64, i64* %D, i64 1 194 %tmp = extractelement <2 x i64> %A, i32 1 195 store i64 %tmp, i64* %ptr 196 ret void 197} 198 199define void @st1lane0_2d(<2 x i64> %A, i64* %D) { 200; CHECK-LABEL: st1lane0_2d 201; CHECK: str d0, [x0, #8] 202 %ptr = getelementptr i64, i64* %D, i64 1 203 %tmp = extractelement <2 x i64> %A, i32 0 204 store i64 %tmp, i64* %ptr 205 ret void 206} 207 208define void @st1lane0u_2d(<2 x i64> %A, i64* %D) { 209; CHECK-LABEL: st1lane0u_2d 210; CHECK: stur d0, [x0, #-8] 211 %ptr = getelementptr i64, i64* %D, i64 -1 212 %tmp = extractelement <2 x i64> %A, i32 0 213 store i64 %tmp, i64* %ptr 214 ret void 215} 216 217define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { 218; CHECK-LABEL: st1lane_ro_2d 219; CHECK: add x[[XREG:[0-9]+]], x0, x1 220; CHECK: st1.d { v0 }[1], [x[[XREG]]] 221 %ptr = getelementptr i64, i64* %D, i64 %offset 222 %tmp = extractelement <2 x i64> %A, i32 1 223 store i64 %tmp, i64* %ptr 224 ret void 225} 226 227define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { 228; CHECK-LABEL: st1lane0_ro_2d 229; CHECK: str d0, [x0, x1, lsl #3] 230 %ptr = getelementptr i64, i64* %D, i64 %offset 231 %tmp = extractelement <2 x i64> %A, i32 0 232 store i64 %tmp, i64* %ptr 233 ret void 234} 235 236define void @st1lane_2d_double(<2 x double> %A, double* %D) { 237; CHECK-LABEL: st1lane_2d_double 238; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}] 239 %ptr = getelementptr double, double* %D, i64 1 240 %tmp = extractelement <2 x double> %A, i32 1 241 store double %tmp, double* %ptr 242 ret void 243} 244 245define void @st1lane0_2d_double(<2 x double> %A, double* %D) { 246; CHECK-LABEL: st1lane0_2d_double 247; CHECK: str d0, [x0, #8] 248 %ptr = getelementptr double, double* %D, i64 1 249 %tmp = extractelement <2 x double> %A, i32 0 250 store double %tmp, double* %ptr 251 ret void 252} 253 254define void @st1lane0u_2d_double(<2 x double> %A, double* %D) { 255; CHECK-LABEL: st1lane0u_2d_double 256; CHECK: stur d0, [x0, #-8] 257 %ptr = getelementptr double, double* %D, i64 -1 258 %tmp = extractelement <2 x double> %A, i32 0 259 store double %tmp, double* %ptr 260 ret void 261} 262 263define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { 264; CHECK-LABEL: st1lane_ro_2d_double 265; CHECK: add x[[XREG:[0-9]+]], x0, x1 266; CHECK: st1.d { v0 }[1], [x[[XREG]]] 267 %ptr = getelementptr double, double* %D, i64 %offset 268 %tmp = extractelement <2 x double> %A, i32 1 269 store double %tmp, double* %ptr 270 ret void 271} 272 273define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { 274; CHECK-LABEL: st1lane0_ro_2d_double 275; CHECK: str d0, [x0, x1, lsl #3] 276 %ptr = getelementptr double, double* %D, i64 %offset 277 %tmp = extractelement <2 x double> %A, i32 0 278 store double %tmp, double* %ptr 279 ret void 280} 281 282define void @st1lane_8b(<8 x i8> %A, i8* %D) { 283; CHECK-LABEL: st1lane_8b 284; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}] 285 %ptr = getelementptr i8, i8* %D, i64 1 286 %tmp = extractelement <8 x i8> %A, i32 1 287 store i8 %tmp, i8* %ptr 288 ret void 289} 290 291define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { 292; CHECK-LABEL: st1lane_ro_8b 293; CHECK: add x[[XREG:[0-9]+]], x0, x1 294; CHECK: st1.b { v0 }[1], [x[[XREG]]] 295 %ptr = getelementptr i8, i8* %D, i64 %offset 296 %tmp = extractelement <8 x i8> %A, i32 1 297 store i8 %tmp, i8* %ptr 298 ret void 299} 300 301define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { 302; CHECK-LABEL: st1lane0_ro_8b 303; CHECK: add x[[XREG:[0-9]+]], x0, x1 304; CHECK: st1.b { v0 }[0], [x[[XREG]]] 305 %ptr = getelementptr i8, i8* %D, i64 %offset 306 %tmp = extractelement <8 x i8> %A, i32 0 307 store i8 %tmp, i8* %ptr 308 ret void 309} 310 311define void @st1lane_4h(<4 x i16> %A, i16* %D) { 312; CHECK-LABEL: st1lane_4h 313; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}] 314 %ptr = getelementptr i16, i16* %D, i64 1 315 %tmp = extractelement <4 x i16> %A, i32 1 316 store i16 %tmp, i16* %ptr 317 ret void 318} 319 320define void @st1lane0_4h(<4 x i16> %A, i16* %D) { 321; CHECK-LABEL: st1lane0_4h 322; CHECK: str h0, [x0, #2] 323 %ptr = getelementptr i16, i16* %D, i64 1 324 %tmp = extractelement <4 x i16> %A, i32 0 325 store i16 %tmp, i16* %ptr 326 ret void 327} 328 329define void @st1lane0u_4h(<4 x i16> %A, i16* %D) { 330; CHECK-LABEL: st1lane0u_4h 331; CHECK: stur h0, [x0, #-2] 332 %ptr = getelementptr i16, i16* %D, i64 -1 333 %tmp = extractelement <4 x i16> %A, i32 0 334 store i16 %tmp, i16* %ptr 335 ret void 336} 337 338define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { 339; CHECK-LABEL: st1lane_ro_4h 340; CHECK: add x[[XREG:[0-9]+]], x0, x1 341; CHECK: st1.h { v0 }[1], [x[[XREG]]] 342 %ptr = getelementptr i16, i16* %D, i64 %offset 343 %tmp = extractelement <4 x i16> %A, i32 1 344 store i16 %tmp, i16* %ptr 345 ret void 346} 347 348define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { 349; CHECK-LABEL: st1lane0_ro_4h 350; CHECK: str h0, [x0, x1, lsl #1] 351 %ptr = getelementptr i16, i16* %D, i64 %offset 352 %tmp = extractelement <4 x i16> %A, i32 0 353 store i16 %tmp, i16* %ptr 354 ret void 355} 356 357define void @st1lane_2s(<2 x i32> %A, i32* %D) { 358; CHECK-LABEL: st1lane_2s 359; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 360 %ptr = getelementptr i32, i32* %D, i64 1 361 %tmp = extractelement <2 x i32> %A, i32 1 362 store i32 %tmp, i32* %ptr 363 ret void 364} 365 366define void @st1lane0_2s(<2 x i32> %A, i32* %D) { 367; CHECK-LABEL: st1lane0_2s 368; CHECK: str s0, [x0, #4] 369 %ptr = getelementptr i32, i32* %D, i64 1 370 %tmp = extractelement <2 x i32> %A, i32 0 371 store i32 %tmp, i32* %ptr 372 ret void 373} 374 375define void @st1lane0u_2s(<2 x i32> %A, i32* %D) { 376; CHECK-LABEL: st1lane0u_2s 377; CHECK: stur s0, [x0, #-4] 378 %ptr = getelementptr i32, i32* %D, i64 -1 379 %tmp = extractelement <2 x i32> %A, i32 0 380 store i32 %tmp, i32* %ptr 381 ret void 382} 383 384define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { 385; CHECK-LABEL: st1lane_ro_2s 386; CHECK: add x[[XREG:[0-9]+]], x0, x1 387; CHECK: st1.s { v0 }[1], [x[[XREG]]] 388 %ptr = getelementptr i32, i32* %D, i64 %offset 389 %tmp = extractelement <2 x i32> %A, i32 1 390 store i32 %tmp, i32* %ptr 391 ret void 392} 393 394define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { 395; CHECK-LABEL: st1lane0_ro_2s 396; CHECK: str s0, [x0, x1, lsl #2] 397 %ptr = getelementptr i32, i32* %D, i64 %offset 398 %tmp = extractelement <2 x i32> %A, i32 0 399 store i32 %tmp, i32* %ptr 400 ret void 401} 402 403define void @st1lane_2s_float(<2 x float> %A, float* %D) { 404; CHECK-LABEL: st1lane_2s_float 405; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 406 %ptr = getelementptr float, float* %D, i64 1 407 %tmp = extractelement <2 x float> %A, i32 1 408 store float %tmp, float* %ptr 409 ret void 410} 411 412define void @st1lane0_2s_float(<2 x float> %A, float* %D) { 413; CHECK-LABEL: st1lane0_2s_float 414; CHECK: str s0, [x0, #4] 415 %ptr = getelementptr float, float* %D, i64 1 416 %tmp = extractelement <2 x float> %A, i32 0 417 store float %tmp, float* %ptr 418 ret void 419} 420 421define void @st1lane0u_2s_float(<2 x float> %A, float* %D) { 422; CHECK-LABEL: st1lane0u_2s_float 423; CHECK: stur s0, [x0, #-4] 424 %ptr = getelementptr float, float* %D, i64 -1 425 %tmp = extractelement <2 x float> %A, i32 0 426 store float %tmp, float* %ptr 427 ret void 428} 429 430define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { 431; CHECK-LABEL: st1lane_ro_2s_float 432; CHECK: add x[[XREG:[0-9]+]], x0, x1 433; CHECK: st1.s { v0 }[1], [x[[XREG]]] 434 %ptr = getelementptr float, float* %D, i64 %offset 435 %tmp = extractelement <2 x float> %A, i32 1 436 store float %tmp, float* %ptr 437 ret void 438} 439 440define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { 441; CHECK-LABEL: st1lane0_ro_2s_float 442; CHECK: str s0, [x0, x1, lsl #2] 443 %ptr = getelementptr float, float* %D, i64 %offset 444 %tmp = extractelement <2 x float> %A, i32 0 445 store float %tmp, float* %ptr 446 ret void 447} 448 449define void @st1lane0_1d(<1 x i64> %A, i64* %D) { 450; CHECK-LABEL: st1lane0_1d 451; CHECK: str d0, [x0, #8] 452 %ptr = getelementptr i64, i64* %D, i64 1 453 %tmp = extractelement <1 x i64> %A, i32 0 454 store i64 %tmp, i64* %ptr 455 ret void 456} 457 458define void @st1lane0u_1d(<1 x i64> %A, i64* %D) { 459; CHECK-LABEL: st1lane0u_1d 460; CHECK: stur d0, [x0, #-8] 461 %ptr = getelementptr i64, i64* %D, i64 -1 462 %tmp = extractelement <1 x i64> %A, i32 0 463 store i64 %tmp, i64* %ptr 464 ret void 465} 466 467define void @st1lane0_ro_1d(<1 x i64> %A, i64* %D, i64 %offset) { 468; CHECK-LABEL: st1lane0_ro_1d 469; CHECK: str d0, [x0, x1, lsl #3] 470 %ptr = getelementptr i64, i64* %D, i64 %offset 471 %tmp = extractelement <1 x i64> %A, i32 0 472 store i64 %tmp, i64* %ptr 473 ret void 474} 475 476define void @st1lane0_1d_double(<1 x double> %A, double* %D) { 477; CHECK-LABEL: st1lane0_1d_double 478; CHECK: str d0, [x0, #8] 479 %ptr = getelementptr double, double* %D, i64 1 480 %tmp = extractelement <1 x double> %A, i32 0 481 store double %tmp, double* %ptr 482 ret void 483} 484 485define void @st1lane0u_1d_double(<1 x double> %A, double* %D) { 486; CHECK-LABEL: st1lane0u_1d_double 487; CHECK: stur d0, [x0, #-8] 488 %ptr = getelementptr double, double* %D, i64 -1 489 %tmp = extractelement <1 x double> %A, i32 0 490 store double %tmp, double* %ptr 491 ret void 492} 493 494define void @st1lane0_ro_1d_double(<1 x double> %A, double* %D, i64 %offset) { 495; CHECK-LABEL: st1lane0_ro_1d_double 496; CHECK: str d0, [x0, x1, lsl #3] 497 %ptr = getelementptr double, double* %D, i64 %offset 498 %tmp = extractelement <1 x double> %A, i32 0 499 store double %tmp, double* %ptr 500 ret void 501} 502 503define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) { 504; CHECK-LABEL: st2lane_16b 505; CHECK: st2.b 506 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D) 507 ret void 508} 509 510define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) { 511; CHECK-LABEL: st2lane_8h 512; CHECK: st2.h 513 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D) 514 ret void 515} 516 517define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) { 518; CHECK-LABEL: st2lane_4s 519; CHECK: st2.s 520 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D) 521 ret void 522} 523 524define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) { 525; CHECK-LABEL: st2lane_2d 526; CHECK: st2.d 527 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D) 528 ret void 529} 530 531declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 532declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 533declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 534declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 535 536define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) { 537; CHECK-LABEL: st3lane_16b 538; CHECK: st3.b 539 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D) 540 ret void 541} 542 543define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) { 544; CHECK-LABEL: st3lane_8h 545; CHECK: st3.h 546 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D) 547 ret void 548} 549 550define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) { 551; CHECK-LABEL: st3lane_4s 552; CHECK: st3.s 553 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D) 554 ret void 555} 556 557define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) { 558; CHECK-LABEL: st3lane_2d 559; CHECK: st3.d 560 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D) 561 ret void 562} 563 564declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 565declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 566declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 567declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 568 569define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) { 570; CHECK-LABEL: st4lane_16b 571; CHECK: st4.b 572 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E) 573 ret void 574} 575 576define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) { 577; CHECK-LABEL: st4lane_8h 578; CHECK: st4.h 579 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E) 580 ret void 581} 582 583define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) { 584; CHECK-LABEL: st4lane_4s 585; CHECK: st4.s 586 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E) 587 ret void 588} 589 590define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) { 591; CHECK-LABEL: st4lane_2d 592; CHECK: st4.d 593 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E) 594 ret void 595} 596 597declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 598declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 599declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 600declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 601 602 603define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind { 604; CHECK-LABEL: st2_8b 605; CHECK: st2.8b 606; EXYNOS-LABEL: st2_8b 607; EXYNOS: zip1.8b 608; EXYNOS: zip2.8b 609; EXYNOS: stp 610 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) 611 ret void 612} 613 614define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind { 615; CHECK-LABEL: st3_8b 616; CHECK: st3.8b 617 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) 618 ret void 619} 620 621define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind { 622; CHECK-LABEL: st4_8b 623; CHECK: st4.8b 624; EXYNOS-LABEL: st4_8b 625; EXYNOS: zip1.8b 626; EXYNOS: zip2.8b 627; EXYNOS: zip1.8b 628; EXYNOS: zip2.8b 629; EXYNOS: zip1.8b 630; EXYNOS: zip2.8b 631; EXYNOS: stp 632; EXYNOS: zip1.8b 633; EXYNOS: zip2.8b 634; EXYNOS: stp 635 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) 636 ret void 637} 638 639declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 640declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 641declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 642 643define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind { 644; CHECK-LABEL: st2_16b 645; CHECK: st2.16b 646; EXYNOS-LABEL: st2_16b 647; EXYNOS: zip1.16b 648; EXYNOS: zip2.16b 649; EXYNOS: stp 650 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) 651 ret void 652} 653 654define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind { 655; CHECK-LABEL: st3_16b 656; CHECK: st3.16b 657 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) 658 ret void 659} 660 661define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind { 662; CHECK-LABEL: st4_16b 663; CHECK: st4.16b 664; EXYNOS-LABEL: st4_16b 665; EXYNOS: zip1.16b 666; EXYNOS: zip2.16b 667; EXYNOS: zip1.16b 668; EXYNOS: zip2.16b 669; EXYNOS: zip1.16b 670; EXYNOS: zip2.16b 671; EXYNOS: stp 672; EXYNOS: zip1.16b 673; EXYNOS: zip2.16b 674; EXYNOS: stp 675 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) 676 ret void 677} 678 679declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 680declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 681declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 682 683define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind { 684; CHECK-LABEL: st2_4h 685; CHECK: st2.4h 686; EXYNOS-LABEL: st2_4h 687; EXYNOS: zip1.4h 688; EXYNOS: zip2.4h 689; EXYNOS: stp 690 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) 691 ret void 692} 693 694define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind { 695; CHECK-LABEL: st3_4h 696; CHECK: st3.4h 697 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) 698 ret void 699} 700 701define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind { 702; CHECK-LABEL: st4_4h 703; CHECK: st4.4h 704; EXYNOS-LABEL: st4_4h 705; EXYNOS: zip1.4h 706; EXYNOS: zip2.4h 707; EXYNOS: zip1.4h 708; EXYNOS: zip2.4h 709; EXYNOS: zip1.4h 710; EXYNOS: zip2.4h 711; EXYNOS: stp 712; EXYNOS: zip1.4h 713; EXYNOS: zip2.4h 714; EXYNOS: stp 715 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) 716 ret void 717} 718 719declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 720declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 721declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 722 723define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind { 724; CHECK-LABEL: st2_8h 725; CHECK: st2.8h 726; EXYNOS-LABEL: st2_8h 727; EXYNOS: zip1.8h 728; EXYNOS: zip2.8h 729; EXYNOS: stp 730 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) 731 ret void 732} 733 734define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind { 735; CHECK-LABEL: st3_8h 736; CHECK: st3.8h 737 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) 738 ret void 739} 740 741define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind { 742; CHECK-LABEL: st4_8h 743; CHECK: st4.8h 744; EXYNOS-LABEL: st4_8h 745; EXYNOS: zip1.8h 746; EXYNOS: zip2.8h 747; EXYNOS: zip1.8h 748; EXYNOS: zip2.8h 749; EXYNOS: zip1.8h 750; EXYNOS: zip2.8h 751; EXYNOS: stp 752; EXYNOS: zip1.8h 753; EXYNOS: zip2.8h 754; EXYNOS: stp 755 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) 756 ret void 757} 758 759declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 760declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 761declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 762 763define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind { 764; CHECK-LABEL: st2_2s 765; CHECK: st2.2s 766; EXYNOS-LABEL: st2_2s 767; EXYNOS: zip1.2s 768; EXYNOS: zip2.2s 769; EXYNOS: stp 770 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) 771 ret void 772} 773 774define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind { 775; CHECK-LABEL: st3_2s 776; CHECK: st3.2s 777 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) 778 ret void 779} 780 781define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind { 782; CHECK-LABEL: st4_2s 783; CHECK: st4.2s 784; EXYNOS-LABEL: st4_2s 785; EXYNOS: zip1.2s 786; EXYNOS: zip2.2s 787; EXYNOS: zip1.2s 788; EXYNOS: zip2.2s 789; EXYNOS: zip1.2s 790; EXYNOS: zip2.2s 791; EXYNOS: stp 792; EXYNOS: zip1.2s 793; EXYNOS: zip2.2s 794; EXYNOS: stp 795 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) 796 ret void 797} 798 799declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 800declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 801declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 802 803define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind { 804; CHECK-LABEL: st2_4s 805; CHECK: st2.4s 806; EXYNOS-LABEL: st2_4s 807; EXYNOS: zip1.4s 808; EXYNOS: zip2.4s 809; EXYNOS: stp 810 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) 811 ret void 812} 813 814define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind { 815; CHECK-LABEL: st3_4s 816; CHECK: st3.4s 817 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) 818 ret void 819} 820 821define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind { 822; CHECK-LABEL: st4_4s 823; CHECK: st4.4s 824; EXYNOS-LABEL: st4_4s 825; EXYNOS: zip1.4s 826; EXYNOS: zip2.4s 827; EXYNOS: zip1.4s 828; EXYNOS: zip2.4s 829; EXYNOS: zip1.4s 830; EXYNOS: zip2.4s 831; EXYNOS: stp 832; EXYNOS: zip1.4s 833; EXYNOS: zip2.4s 834; EXYNOS: stp 835 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) 836 ret void 837} 838 839declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 840declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 841declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 842 843; If there's only one element, st2/3/4 don't make much sense, stick to st1. 844define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind { 845; CHECK-LABEL: st2_1d 846; CHECK: st1.1d 847 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P) 848 ret void 849} 850 851define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind { 852; CHECK-LABEL: st3_1d 853; CHECK: st1.1d 854 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) 855 ret void 856} 857 858define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind { 859; CHECK-LABEL: st4_1d 860; CHECK: st1.1d 861 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) 862 ret void 863} 864 865declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 866declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 867declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 868 869define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind { 870; CHECK-LABEL: st2_2d 871; CHECK: st2.2d 872; EXYNOS-LABEL: st2_2d 873; EXYNOS: zip1.2d 874; EXYNOS: zip2.2d 875; EXYNOS: stp 876 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) 877 ret void 878} 879 880define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind { 881; CHECK-LABEL: st3_2d 882; CHECK: st3.2d 883 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) 884 ret void 885} 886 887define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind { 888; CHECK-LABEL: st4_2d 889; CHECK: st4.2d 890; EXYNOS-LABEL: st4_2d 891; EXYNOS: zip1.2d 892; EXYNOS: zip2.2d 893; EXYNOS: zip1.2d 894; EXYNOS: zip2.2d 895; EXYNOS: zip1.2d 896; EXYNOS: zip2.2d 897; EXYNOS: stp 898; EXYNOS: zip1.2d 899; EXYNOS: zip2.2d 900; EXYNOS: stp 901 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) 902 ret void 903} 904 905declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 906declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 907declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 908 909declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 910declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 911declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 912declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly 913declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 914declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly 915 916define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) { 917; CHECK-LABEL: st1_x2_v8i8: 918; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 919 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) 920 ret void 921} 922 923define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) { 924; CHECK-LABEL: st1_x2_v4i16: 925; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 926 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) 927 ret void 928} 929 930define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) { 931; CHECK-LABEL: st1_x2_v2i32: 932; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 933 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) 934 ret void 935} 936 937define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) { 938; CHECK-LABEL: st1_x2_v2f32: 939; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 940 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr) 941 ret void 942} 943 944define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) { 945; CHECK-LABEL: st1_x2_v1i64: 946; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 947 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) 948 ret void 949} 950 951define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) { 952; CHECK-LABEL: st1_x2_v1f64: 953; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 954 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr) 955 ret void 956} 957 958declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 959declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 960declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 961declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly 962declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 963declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly 964 965define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) { 966; CHECK-LABEL: st1_x2_v16i8: 967; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 968 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) 969 ret void 970} 971 972define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) { 973; CHECK-LABEL: st1_x2_v8i16: 974; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 975 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) 976 ret void 977} 978 979define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) { 980; CHECK-LABEL: st1_x2_v4i32: 981; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 982 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) 983 ret void 984} 985 986define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) { 987; CHECK-LABEL: st1_x2_v4f32: 988; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 989 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr) 990 ret void 991} 992 993define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) { 994; CHECK-LABEL: st1_x2_v2i64: 995; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 996 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) 997 ret void 998} 999 1000define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) { 1001; CHECK-LABEL: st1_x2_v2f64: 1002; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1003 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr) 1004 ret void 1005} 1006 1007declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 1008declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 1009declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 1010declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 1011declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 1012declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 1013 1014define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) { 1015; CHECK-LABEL: st1_x3_v8i8: 1016; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1017 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) 1018 ret void 1019} 1020 1021define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) { 1022; CHECK-LABEL: st1_x3_v4i16: 1023; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1024 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) 1025 ret void 1026} 1027 1028define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) { 1029; CHECK-LABEL: st1_x3_v2i32: 1030; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1031 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) 1032 ret void 1033} 1034 1035define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) { 1036; CHECK-LABEL: st1_x3_v2f32: 1037; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1038 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) 1039 ret void 1040} 1041 1042define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) { 1043; CHECK-LABEL: st1_x3_v1i64: 1044; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1045 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) 1046 ret void 1047} 1048 1049define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) { 1050; CHECK-LABEL: st1_x3_v1f64: 1051; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1052 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) 1053 ret void 1054} 1055 1056declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 1057declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 1058declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 1059declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 1060declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 1061declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 1062 1063define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) { 1064; CHECK-LABEL: st1_x3_v16i8: 1065; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1066 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) 1067 ret void 1068} 1069 1070define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) { 1071; CHECK-LABEL: st1_x3_v8i16: 1072; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1073 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) 1074 ret void 1075} 1076 1077define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) { 1078; CHECK-LABEL: st1_x3_v4i32: 1079; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1080 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) 1081 ret void 1082} 1083 1084define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) { 1085; CHECK-LABEL: st1_x3_v4f32: 1086; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1087 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) 1088 ret void 1089} 1090 1091define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) { 1092; CHECK-LABEL: st1_x3_v2i64: 1093; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1094 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) 1095 ret void 1096} 1097 1098define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) { 1099; CHECK-LABEL: st1_x3_v2f64: 1100; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1101 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) 1102 ret void 1103} 1104 1105 1106declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 1107declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 1108declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 1109declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 1110declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 1111declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 1112 1113define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) { 1114; CHECK-LABEL: st1_x4_v8i8: 1115; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1116 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) 1117 ret void 1118} 1119 1120define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) { 1121; CHECK-LABEL: st1_x4_v4i16: 1122; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1123 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) 1124 ret void 1125} 1126 1127define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) { 1128; CHECK-LABEL: st1_x4_v2i32: 1129; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1130 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) 1131 ret void 1132} 1133 1134define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) { 1135; CHECK-LABEL: st1_x4_v2f32: 1136; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1137 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) 1138 ret void 1139} 1140 1141define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) { 1142; CHECK-LABEL: st1_x4_v1i64: 1143; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1144 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) 1145 ret void 1146} 1147 1148define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) { 1149; CHECK-LABEL: st1_x4_v1f64: 1150; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1151 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) 1152 ret void 1153} 1154 1155declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 1156declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 1157declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 1158declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 1159declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 1160declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 1161 1162define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) { 1163; CHECK-LABEL: st1_x4_v16i8: 1164; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1165 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) 1166 ret void 1167} 1168 1169define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) { 1170; CHECK-LABEL: st1_x4_v8i16: 1171; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1172 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) 1173 ret void 1174} 1175 1176define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) { 1177; CHECK-LABEL: st1_x4_v4i32: 1178; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1179 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) 1180 ret void 1181} 1182 1183define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) { 1184; CHECK-LABEL: st1_x4_v4f32: 1185; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1186 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) 1187 ret void 1188} 1189 1190define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) { 1191; CHECK-LABEL: st1_x4_v2i64: 1192; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1193 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) 1194 ret void 1195} 1196 1197define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) { 1198; CHECK-LABEL: st1_x4_v2f64: 1199; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1200 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) 1201 ret void 1202} 1203