1;RUN: llc -mtriple=arm-eabi -mattr=+v7 -mattr=+neon %s -o - | FileCheck %s 2 3;ALIGN = 1 4;SIZE = 64 5;TYPE = <8 x i8> 6define void @v64_v8i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 7;CHECK-LABEL: v64_v8i8_1: 8entry: 9 %po = getelementptr i8, i8* %out, i32 0 10 %pi = getelementptr i8, i8* %in, i32 0 11 %vi = bitcast i8* %pi to <8 x i8>* 12 %vo = bitcast i8* %po to <8 x i8>* 13;CHECK: vld1.8 14 %v1 = load <8 x i8>, <8 x i8>* %vi, align 1 15;CHECK: vst1.8 16 store <8 x i8> %v1, <8 x i8>* %vo, align 1 17 ret void 18} 19 20 21;ALIGN = 1 22;SIZE = 64 23;TYPE = <4 x i16> 24define void @v64_v4i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 25;CHECK-LABEL: v64_v4i16_1: 26entry: 27 %po = getelementptr i8, i8* %out, i32 0 28 %pi = getelementptr i8, i8* %in, i32 0 29 %vi = bitcast i8* %pi to <4 x i16>* 30 %vo = bitcast i8* %po to <4 x i16>* 31;CHECK: vld1.8 32 %v1 = load <4 x i16>, <4 x i16>* %vi, align 1 33;CHECK: vst1.8 34 store <4 x i16> %v1, <4 x i16>* %vo, align 1 35 ret void 36} 37 38 39;ALIGN = 1 40;SIZE = 64 41;TYPE = <2 x i32> 42define void @v64_v2i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 43;CHECK-LABEL: v64_v2i32_1: 44entry: 45 %po = getelementptr i8, i8* %out, i32 0 46 %pi = getelementptr i8, i8* %in, i32 0 47 %vi = bitcast i8* %pi to <2 x i32>* 48 %vo = bitcast i8* %po to <2 x i32>* 49;CHECK: vld1.8 50 %v1 = load <2 x i32>, <2 x i32>* %vi, align 1 51;CHECK: vst1.8 52 store <2 x i32> %v1, <2 x i32>* %vo, align 1 53 ret void 54} 55 56 57;ALIGN = 1 58;SIZE = 64 59;TYPE = <2 x float> 60define void @v64_v2f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 61;CHECK-LABEL: v64_v2f32_1: 62entry: 63 %po = getelementptr i8, i8* %out, i32 0 64 %pi = getelementptr i8, i8* %in, i32 0 65 %vi = bitcast i8* %pi to <2 x float>* 66 %vo = bitcast i8* %po to <2 x float>* 67;CHECK: vld1.8 68 %v1 = load <2 x float>, <2 x float>* %vi, align 1 69;CHECK: vst1.8 70 store <2 x float> %v1, <2 x float>* %vo, align 1 71 ret void 72} 73 74 75;ALIGN = 1 76;SIZE = 128 77;TYPE = <16 x i8> 78define void @v128_v16i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 79;CHECK-LABEL: v128_v16i8_1: 80entry: 81 %po = getelementptr i8, i8* %out, i32 0 82 %pi = getelementptr i8, i8* %in, i32 0 83 %vi = bitcast i8* %pi to <16 x i8>* 84 %vo = bitcast i8* %po to <16 x i8>* 85;CHECK: vld1.8 86 %v1 = load <16 x i8>, <16 x i8>* %vi, align 1 87;CHECK: vst1.8 88 store <16 x i8> %v1, <16 x i8>* %vo, align 1 89 ret void 90} 91 92 93;ALIGN = 1 94;SIZE = 128 95;TYPE = <8 x i16> 96define void @v128_v8i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 97;CHECK-LABEL: v128_v8i16_1: 98entry: 99 %po = getelementptr i8, i8* %out, i32 0 100 %pi = getelementptr i8, i8* %in, i32 0 101 %vi = bitcast i8* %pi to <8 x i16>* 102 %vo = bitcast i8* %po to <8 x i16>* 103;CHECK: vld1.8 104 %v1 = load <8 x i16>, <8 x i16>* %vi, align 1 105;CHECK: vst1.8 106 store <8 x i16> %v1, <8 x i16>* %vo, align 1 107 ret void 108} 109 110 111;ALIGN = 1 112;SIZE = 128 113;TYPE = <4 x i32> 114define void @v128_v4i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 115;CHECK-LABEL: v128_v4i32_1: 116entry: 117 %po = getelementptr i8, i8* %out, i32 0 118 %pi = getelementptr i8, i8* %in, i32 0 119 %vi = bitcast i8* %pi to <4 x i32>* 120 %vo = bitcast i8* %po to <4 x i32>* 121;CHECK: vld1.8 122 %v1 = load <4 x i32>, <4 x i32>* %vi, align 1 123;CHECK: vst1.8 124 store <4 x i32> %v1, <4 x i32>* %vo, align 1 125 ret void 126} 127 128 129;ALIGN = 1 130;SIZE = 128 131;TYPE = <2 x i64> 132define void @v128_v2i64_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 133;CHECK-LABEL: v128_v2i64_1: 134entry: 135 %po = getelementptr i8, i8* %out, i32 0 136 %pi = getelementptr i8, i8* %in, i32 0 137 %vi = bitcast i8* %pi to <2 x i64>* 138 %vo = bitcast i8* %po to <2 x i64>* 139;CHECK: vld1.8 140 %v1 = load <2 x i64>, <2 x i64>* %vi, align 1 141;CHECK: vst1.8 142 store <2 x i64> %v1, <2 x i64>* %vo, align 1 143 ret void 144} 145 146 147;ALIGN = 1 148;SIZE = 128 149;TYPE = <4 x float> 150define void @v128_v4f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 151;CHECK-LABEL: v128_v4f32_1: 152entry: 153 %po = getelementptr i8, i8* %out, i32 0 154 %pi = getelementptr i8, i8* %in, i32 0 155 %vi = bitcast i8* %pi to <4 x float>* 156 %vo = bitcast i8* %po to <4 x float>* 157;CHECK: vld1.8 158 %v1 = load <4 x float>, <4 x float>* %vi, align 1 159;CHECK: vst1.8 160 store <4 x float> %v1, <4 x float>* %vo, align 1 161 ret void 162} 163 164 165;ALIGN = 2 166;SIZE = 64 167;TYPE = <8 x i8> 168define void @v64_v8i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 169;CHECK-LABEL: v64_v8i8_2: 170entry: 171 %po = getelementptr i8, i8* %out, i32 0 172 %pi = getelementptr i8, i8* %in, i32 0 173 %vi = bitcast i8* %pi to <8 x i8>* 174 %vo = bitcast i8* %po to <8 x i8>* 175;CHECK: vld1.16 176 %v1 = load <8 x i8>, <8 x i8>* %vi, align 2 177;CHECK: vst1.16 178 store <8 x i8> %v1, <8 x i8>* %vo, align 2 179 ret void 180} 181 182 183;ALIGN = 2 184;SIZE = 64 185;TYPE = <4 x i16> 186define void @v64_v4i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 187;CHECK-LABEL: v64_v4i16_2: 188entry: 189 %po = getelementptr i8, i8* %out, i32 0 190 %pi = getelementptr i8, i8* %in, i32 0 191 %vi = bitcast i8* %pi to <4 x i16>* 192 %vo = bitcast i8* %po to <4 x i16>* 193;CHECK: vld1.16 194 %v1 = load <4 x i16>, <4 x i16>* %vi, align 2 195;CHECK: vst1.16 196 store <4 x i16> %v1, <4 x i16>* %vo, align 2 197 ret void 198} 199 200 201;ALIGN = 2 202;SIZE = 64 203;TYPE = <2 x i32> 204define void @v64_v2i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 205;CHECK-LABEL: v64_v2i32_2: 206entry: 207 %po = getelementptr i8, i8* %out, i32 0 208 %pi = getelementptr i8, i8* %in, i32 0 209 %vi = bitcast i8* %pi to <2 x i32>* 210 %vo = bitcast i8* %po to <2 x i32>* 211;CHECK: vld1.16 212 %v1 = load <2 x i32>, <2 x i32>* %vi, align 2 213;CHECK: vst1.16 214 store <2 x i32> %v1, <2 x i32>* %vo, align 2 215 ret void 216} 217 218 219;ALIGN = 2 220;SIZE = 64 221;TYPE = <2 x float> 222define void @v64_v2f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 223;CHECK-LABEL: v64_v2f32_2: 224entry: 225 %po = getelementptr i8, i8* %out, i32 0 226 %pi = getelementptr i8, i8* %in, i32 0 227 %vi = bitcast i8* %pi to <2 x float>* 228 %vo = bitcast i8* %po to <2 x float>* 229;CHECK: vld1.16 230 %v1 = load <2 x float>, <2 x float>* %vi, align 2 231;CHECK: vst1.16 232 store <2 x float> %v1, <2 x float>* %vo, align 2 233 ret void 234} 235 236 237;ALIGN = 2 238;SIZE = 128 239;TYPE = <16 x i8> 240define void @v128_v16i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 241;CHECK-LABEL: v128_v16i8_2: 242entry: 243 %po = getelementptr i8, i8* %out, i32 0 244 %pi = getelementptr i8, i8* %in, i32 0 245 %vi = bitcast i8* %pi to <16 x i8>* 246 %vo = bitcast i8* %po to <16 x i8>* 247;CHECK: vld1.16 248 %v1 = load <16 x i8>, <16 x i8>* %vi, align 2 249;CHECK: vst1.16 250 store <16 x i8> %v1, <16 x i8>* %vo, align 2 251 ret void 252} 253 254 255;ALIGN = 2 256;SIZE = 128 257;TYPE = <8 x i16> 258define void @v128_v8i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 259;CHECK-LABEL: v128_v8i16_2: 260entry: 261 %po = getelementptr i8, i8* %out, i32 0 262 %pi = getelementptr i8, i8* %in, i32 0 263 %vi = bitcast i8* %pi to <8 x i16>* 264 %vo = bitcast i8* %po to <8 x i16>* 265;CHECK: vld1.16 266 %v1 = load <8 x i16>, <8 x i16>* %vi, align 2 267;CHECK: vst1.16 268 store <8 x i16> %v1, <8 x i16>* %vo, align 2 269 ret void 270} 271 272 273;ALIGN = 2 274;SIZE = 128 275;TYPE = <4 x i32> 276define void @v128_v4i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 277;CHECK-LABEL: v128_v4i32_2: 278entry: 279 %po = getelementptr i8, i8* %out, i32 0 280 %pi = getelementptr i8, i8* %in, i32 0 281 %vi = bitcast i8* %pi to <4 x i32>* 282 %vo = bitcast i8* %po to <4 x i32>* 283;CHECK: vld1.16 284 %v1 = load <4 x i32>, <4 x i32>* %vi, align 2 285;CHECK: vst1.16 286 store <4 x i32> %v1, <4 x i32>* %vo, align 2 287 ret void 288} 289 290 291;ALIGN = 2 292;SIZE = 128 293;TYPE = <2 x i64> 294define void @v128_v2i64_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 295;CHECK-LABEL: v128_v2i64_2: 296entry: 297 %po = getelementptr i8, i8* %out, i32 0 298 %pi = getelementptr i8, i8* %in, i32 0 299 %vi = bitcast i8* %pi to <2 x i64>* 300 %vo = bitcast i8* %po to <2 x i64>* 301;CHECK: vld1.16 302 %v1 = load <2 x i64>, <2 x i64>* %vi, align 2 303;CHECK: vst1.16 304 store <2 x i64> %v1, <2 x i64>* %vo, align 2 305 ret void 306} 307 308 309;ALIGN = 2 310;SIZE = 128 311;TYPE = <4 x float> 312define void @v128_v4f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 313;CHECK-LABEL: v128_v4f32_2: 314entry: 315 %po = getelementptr i8, i8* %out, i32 0 316 %pi = getelementptr i8, i8* %in, i32 0 317 %vi = bitcast i8* %pi to <4 x float>* 318 %vo = bitcast i8* %po to <4 x float>* 319;CHECK: vld1.16 320 %v1 = load <4 x float>, <4 x float>* %vi, align 2 321;CHECK: vst1.16 322 store <4 x float> %v1, <4 x float>* %vo, align 2 323 ret void 324} 325 326 327;ALIGN = 4 328;SIZE = 64 329;TYPE = <8 x i8> 330define void @v64_v8i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 331;CHECK-LABEL: v64_v8i8_4: 332entry: 333 %po = getelementptr i8, i8* %out, i32 0 334 %pi = getelementptr i8, i8* %in, i32 0 335 %vi = bitcast i8* %pi to <8 x i8>* 336 %vo = bitcast i8* %po to <8 x i8>* 337;CHECK: vldr 338 %v1 = load <8 x i8>, <8 x i8>* %vi, align 4 339;CHECK: vstr 340 store <8 x i8> %v1, <8 x i8>* %vo, align 4 341 ret void 342} 343 344 345;ALIGN = 4 346;SIZE = 64 347;TYPE = <4 x i16> 348define void @v64_v4i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 349;CHECK-LABEL: v64_v4i16_4: 350entry: 351 %po = getelementptr i8, i8* %out, i32 0 352 %pi = getelementptr i8, i8* %in, i32 0 353 %vi = bitcast i8* %pi to <4 x i16>* 354 %vo = bitcast i8* %po to <4 x i16>* 355;CHECK: vldr 356 %v1 = load <4 x i16>, <4 x i16>* %vi, align 4 357;CHECK: vstr 358 store <4 x i16> %v1, <4 x i16>* %vo, align 4 359 ret void 360} 361 362 363;ALIGN = 4 364;SIZE = 64 365;TYPE = <2 x i32> 366define void @v64_v2i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 367;CHECK-LABEL: v64_v2i32_4: 368entry: 369 %po = getelementptr i8, i8* %out, i32 0 370 %pi = getelementptr i8, i8* %in, i32 0 371 %vi = bitcast i8* %pi to <2 x i32>* 372 %vo = bitcast i8* %po to <2 x i32>* 373;CHECK: vldr 374 %v1 = load <2 x i32>, <2 x i32>* %vi, align 4 375;CHECK: vstr 376 store <2 x i32> %v1, <2 x i32>* %vo, align 4 377 ret void 378} 379 380 381;ALIGN = 4 382;SIZE = 64 383;TYPE = <2 x float> 384define void @v64_v2f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 385;CHECK-LABEL: v64_v2f32_4: 386entry: 387 %po = getelementptr i8, i8* %out, i32 0 388 %pi = getelementptr i8, i8* %in, i32 0 389 %vi = bitcast i8* %pi to <2 x float>* 390 %vo = bitcast i8* %po to <2 x float>* 391;CHECK: vldr 392 %v1 = load <2 x float>, <2 x float>* %vi, align 4 393;CHECK: vstr 394 store <2 x float> %v1, <2 x float>* %vo, align 4 395 ret void 396} 397 398 399;ALIGN = 4 400;SIZE = 128 401;TYPE = <16 x i8> 402define void @v128_v16i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 403;CHECK-LABEL: v128_v16i8_4: 404entry: 405 %po = getelementptr i8, i8* %out, i32 0 406 %pi = getelementptr i8, i8* %in, i32 0 407 %vi = bitcast i8* %pi to <16 x i8>* 408 %vo = bitcast i8* %po to <16 x i8>* 409;CHECK: vld1.32 410 %v1 = load <16 x i8>, <16 x i8>* %vi, align 4 411;CHECK: vst1.32 412 store <16 x i8> %v1, <16 x i8>* %vo, align 4 413 ret void 414} 415 416 417;ALIGN = 4 418;SIZE = 128 419;TYPE = <8 x i16> 420define void @v128_v8i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 421;CHECK-LABEL: v128_v8i16_4: 422entry: 423 %po = getelementptr i8, i8* %out, i32 0 424 %pi = getelementptr i8, i8* %in, i32 0 425 %vi = bitcast i8* %pi to <8 x i16>* 426 %vo = bitcast i8* %po to <8 x i16>* 427;CHECK: vld1.32 428 %v1 = load <8 x i16>, <8 x i16>* %vi, align 4 429;CHECK: vst1.32 430 store <8 x i16> %v1, <8 x i16>* %vo, align 4 431 ret void 432} 433 434 435;ALIGN = 4 436;SIZE = 128 437;TYPE = <4 x i32> 438define void @v128_v4i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 439;CHECK-LABEL: v128_v4i32_4: 440entry: 441 %po = getelementptr i8, i8* %out, i32 0 442 %pi = getelementptr i8, i8* %in, i32 0 443 %vi = bitcast i8* %pi to <4 x i32>* 444 %vo = bitcast i8* %po to <4 x i32>* 445;CHECK: vld1.32 446 %v1 = load <4 x i32>, <4 x i32>* %vi, align 4 447;CHECK: vst1.32 448 store <4 x i32> %v1, <4 x i32>* %vo, align 4 449 ret void 450} 451 452 453;ALIGN = 4 454;SIZE = 128 455;TYPE = <2 x i64> 456define void @v128_v2i64_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 457;CHECK-LABEL: v128_v2i64_4: 458entry: 459 %po = getelementptr i8, i8* %out, i32 0 460 %pi = getelementptr i8, i8* %in, i32 0 461 %vi = bitcast i8* %pi to <2 x i64>* 462 %vo = bitcast i8* %po to <2 x i64>* 463;CHECK: vld1.32 464 %v1 = load <2 x i64>, <2 x i64>* %vi, align 4 465;CHECK: vst1.32 466 store <2 x i64> %v1, <2 x i64>* %vo, align 4 467 ret void 468} 469 470 471;ALIGN = 4 472;SIZE = 128 473;TYPE = <4 x float> 474define void @v128_v4f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 475;CHECK-LABEL: v128_v4f32_4: 476entry: 477 %po = getelementptr i8, i8* %out, i32 0 478 %pi = getelementptr i8, i8* %in, i32 0 479 %vi = bitcast i8* %pi to <4 x float>* 480 %vo = bitcast i8* %po to <4 x float>* 481;CHECK: vld1.32 482 %v1 = load <4 x float>, <4 x float>* %vi, align 4 483;CHECK: vst1.32 484 store <4 x float> %v1, <4 x float>* %vo, align 4 485 ret void 486} 487 488