1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE 4 5define i8* @ldrwu32_4(i8* %x, i8* %y) { 6; CHECK-LABEL: ldrwu32_4: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vldrw.u32 q0, [r0], #4 9; CHECK-NEXT: vstrw.32 q0, [r1] 10; CHECK-NEXT: bx lr 11entry: 12 %z = getelementptr inbounds i8, i8* %x, i32 4 13 %0 = bitcast i8* %x to <4 x i32>* 14 %1 = load <4 x i32>, <4 x i32>* %0, align 4 15 %2 = bitcast i8* %y to <4 x i32>* 16 store <4 x i32> %1, <4 x i32>* %2, align 4 17 ret i8* %z 18} 19 20define i8* @ldrwu32_3(i8* %x, i8* %y) { 21; CHECK-LE-LABEL: ldrwu32_3: 22; CHECK-LE: @ %bb.0: @ %entry 23; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 24; CHECK-LE-NEXT: vstrw.32 q0, [r1] 25; CHECK-LE-NEXT: bx lr 26; 27; CHECK-BE-LABEL: ldrwu32_3: 28; CHECK-BE: @ %bb.0: @ %entry 29; CHECK-BE-NEXT: vldrw.u32 q0, [r0] 30; CHECK-BE-NEXT: adds r0, #3 31; CHECK-BE-NEXT: vstrw.32 q0, [r1] 32; CHECK-BE-NEXT: bx lr 33entry: 34 %z = getelementptr inbounds i8, i8* %x, i32 3 35 %0 = bitcast i8* %x to <4 x i32>* 36 %1 = load <4 x i32>, <4 x i32>* %0, align 4 37 %2 = bitcast i8* %y to <4 x i32>* 38 store <4 x i32> %1, <4 x i32>* %2, align 4 39 ret i8* %z 40} 41 42define i8* @ldrwu32_m4(i8* %x, i8* %y) { 43; CHECK-LABEL: ldrwu32_m4: 44; CHECK: @ %bb.0: @ %entry 45; CHECK-NEXT: vldrw.u32 q0, [r0], #-4 46; CHECK-NEXT: vstrw.32 q0, [r1] 47; CHECK-NEXT: bx lr 48entry: 49 %z = getelementptr inbounds i8, i8* %x, i32 -4 50 %0 = bitcast i8* %x to <4 x i32>* 51 %1 = load <4 x i32>, <4 x i32>* %0, align 4 52 %2 = bitcast i8* %y to <4 x i32>* 53 store <4 x i32> %1, <4 x i32>* %2, align 4 54 ret i8* %z 55} 56 57define i8* @ldrwu32_508(i8* %x, i8* %y) { 58; CHECK-LABEL: ldrwu32_508: 59; CHECK: @ %bb.0: @ %entry 60; CHECK-NEXT: vldrw.u32 q0, [r0], #508 61; CHECK-NEXT: vstrw.32 q0, [r1] 62; CHECK-NEXT: bx lr 63entry: 64 %z = getelementptr inbounds i8, i8* %x, i32 508 65 %0 = bitcast i8* %x to <4 x i32>* 66 %1 = load <4 x i32>, <4 x i32>* %0, align 4 67 %2 = bitcast i8* %y to <4 x i32>* 68 store <4 x i32> %1, <4 x i32>* %2, align 4 69 ret i8* %z 70} 71 72define i8* @ldrwu32_512(i8* %x, i8* %y) { 73; CHECK-LABEL: ldrwu32_512: 74; CHECK: @ %bb.0: @ %entry 75; CHECK-NEXT: vldrw.u32 q0, [r0] 76; CHECK-NEXT: add.w r0, r0, #512 77; CHECK-NEXT: vstrw.32 q0, [r1] 78; CHECK-NEXT: bx lr 79entry: 80 %z = getelementptr inbounds i8, i8* %x, i32 512 81 %0 = bitcast i8* %x to <4 x i32>* 82 %1 = load <4 x i32>, <4 x i32>* %0, align 4 83 %2 = bitcast i8* %y to <4 x i32>* 84 store <4 x i32> %1, <4 x i32>* %2, align 4 85 ret i8* %z 86} 87 88define i8* @ldrwu32_m508(i8* %x, i8* %y) { 89; CHECK-LABEL: ldrwu32_m508: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vldrw.u32 q0, [r0], #-508 92; CHECK-NEXT: vstrw.32 q0, [r1] 93; CHECK-NEXT: bx lr 94entry: 95 %z = getelementptr inbounds i8, i8* %x, i32 -508 96 %0 = bitcast i8* %x to <4 x i32>* 97 %1 = load <4 x i32>, <4 x i32>* %0, align 4 98 %2 = bitcast i8* %y to <4 x i32>* 99 store <4 x i32> %1, <4 x i32>* %2, align 4 100 ret i8* %z 101} 102 103define i8* @ldrwu32_m512(i8* %x, i8* %y) { 104; CHECK-LABEL: ldrwu32_m512: 105; CHECK: @ %bb.0: @ %entry 106; CHECK-NEXT: vldrw.u32 q0, [r0] 107; CHECK-NEXT: sub.w r0, r0, #512 108; CHECK-NEXT: vstrw.32 q0, [r1] 109; CHECK-NEXT: bx lr 110entry: 111 %z = getelementptr inbounds i8, i8* %x, i32 -512 112 %0 = bitcast i8* %x to <4 x i32>* 113 %1 = load <4 x i32>, <4 x i32>* %0, align 4 114 %2 = bitcast i8* %y to <4 x i32>* 115 store <4 x i32> %1, <4 x i32>* %2, align 4 116 ret i8* %z 117} 118 119 120define i8* @ldrhu32_4(i8* %x, i8* %y) { 121; CHECK-LABEL: ldrhu32_4: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vldrh.u32 q0, [r0], #4 124; CHECK-NEXT: vstrw.32 q0, [r1] 125; CHECK-NEXT: bx lr 126entry: 127 %z = getelementptr inbounds i8, i8* %x, i32 4 128 %0 = bitcast i8* %x to <4 x i16>* 129 %1 = load <4 x i16>, <4 x i16>* %0, align 2 130 %2 = zext <4 x i16> %1 to <4 x i32> 131 %3 = bitcast i8* %y to <4 x i32>* 132 store <4 x i32> %2, <4 x i32>* %3, align 4 133 ret i8* %z 134} 135 136define i8* @ldrhu32_3(i8* %x, i8* %y) { 137; CHECK-LABEL: ldrhu32_3: 138; CHECK: @ %bb.0: @ %entry 139; CHECK-NEXT: vldrh.u32 q0, [r0] 140; CHECK-NEXT: adds r0, #3 141; CHECK-NEXT: vstrw.32 q0, [r1] 142; CHECK-NEXT: bx lr 143entry: 144 %z = getelementptr inbounds i8, i8* %x, i32 3 145 %0 = bitcast i8* %x to <4 x i16>* 146 %1 = load <4 x i16>, <4 x i16>* %0, align 2 147 %2 = zext <4 x i16> %1 to <4 x i32> 148 %3 = bitcast i8* %y to <4 x i32>* 149 store <4 x i32> %2, <4 x i32>* %3, align 4 150 ret i8* %z 151} 152 153define i8* @ldrhu32_2(i8* %x, i8* %y) { 154; CHECK-LABEL: ldrhu32_2: 155; CHECK: @ %bb.0: @ %entry 156; CHECK-NEXT: vldrh.u32 q0, [r0], #2 157; CHECK-NEXT: vstrw.32 q0, [r1] 158; CHECK-NEXT: bx lr 159entry: 160 %z = getelementptr inbounds i8, i8* %x, i32 2 161 %0 = bitcast i8* %x to <4 x i16>* 162 %1 = load <4 x i16>, <4 x i16>* %0, align 2 163 %2 = zext <4 x i16> %1 to <4 x i32> 164 %3 = bitcast i8* %y to <4 x i32>* 165 store <4 x i32> %2, <4 x i32>* %3, align 4 166 ret i8* %z 167} 168 169define i8* @ldrhu32_254(i8* %x, i8* %y) { 170; CHECK-LABEL: ldrhu32_254: 171; CHECK: @ %bb.0: @ %entry 172; CHECK-NEXT: vldrh.u32 q0, [r0], #254 173; CHECK-NEXT: vstrw.32 q0, [r1] 174; CHECK-NEXT: bx lr 175entry: 176 %z = getelementptr inbounds i8, i8* %x, i32 254 177 %0 = bitcast i8* %x to <4 x i16>* 178 %1 = load <4 x i16>, <4 x i16>* %0, align 2 179 %2 = zext <4 x i16> %1 to <4 x i32> 180 %3 = bitcast i8* %y to <4 x i32>* 181 store <4 x i32> %2, <4 x i32>* %3, align 4 182 ret i8* %z 183} 184 185define i8* @ldrhu32_256(i8* %x, i8* %y) { 186; CHECK-LABEL: ldrhu32_256: 187; CHECK: @ %bb.0: @ %entry 188; CHECK-NEXT: vldrh.u32 q0, [r0] 189; CHECK-NEXT: add.w r0, r0, #256 190; CHECK-NEXT: vstrw.32 q0, [r1] 191; CHECK-NEXT: bx lr 192entry: 193 %z = getelementptr inbounds i8, i8* %x, i32 256 194 %0 = bitcast i8* %x to <4 x i16>* 195 %1 = load <4 x i16>, <4 x i16>* %0, align 2 196 %2 = zext <4 x i16> %1 to <4 x i32> 197 %3 = bitcast i8* %y to <4 x i32>* 198 store <4 x i32> %2, <4 x i32>* %3, align 4 199 ret i8* %z 200} 201 202define i8* @ldrhu32_m254(i8* %x, i8* %y) { 203; CHECK-LABEL: ldrhu32_m254: 204; CHECK: @ %bb.0: @ %entry 205; CHECK-NEXT: vldrh.u32 q0, [r0], #-254 206; CHECK-NEXT: vstrw.32 q0, [r1] 207; CHECK-NEXT: bx lr 208entry: 209 %z = getelementptr inbounds i8, i8* %x, i32 -254 210 %0 = bitcast i8* %x to <4 x i16>* 211 %1 = load <4 x i16>, <4 x i16>* %0, align 2 212 %2 = zext <4 x i16> %1 to <4 x i32> 213 %3 = bitcast i8* %y to <4 x i32>* 214 store <4 x i32> %2, <4 x i32>* %3, align 4 215 ret i8* %z 216} 217 218define i8* @ldrhu32_m256(i8* %x, i8* %y) { 219; CHECK-LABEL: ldrhu32_m256: 220; CHECK: @ %bb.0: @ %entry 221; CHECK-NEXT: vldrh.u32 q0, [r0] 222; CHECK-NEXT: sub.w r0, r0, #256 223; CHECK-NEXT: vstrw.32 q0, [r1] 224; CHECK-NEXT: bx lr 225entry: 226 %z = getelementptr inbounds i8, i8* %x, i32 -256 227 %0 = bitcast i8* %x to <4 x i16>* 228 %1 = load <4 x i16>, <4 x i16>* %0, align 2 229 %2 = zext <4 x i16> %1 to <4 x i32> 230 %3 = bitcast i8* %y to <4 x i32>* 231 store <4 x i32> %2, <4 x i32>* %3, align 4 232 ret i8* %z 233} 234 235 236define i8* @ldrhs32_4(i8* %x, i8* %y) { 237; CHECK-LABEL: ldrhs32_4: 238; CHECK: @ %bb.0: @ %entry 239; CHECK-NEXT: vldrh.s32 q0, [r0], #4 240; CHECK-NEXT: vstrw.32 q0, [r1] 241; CHECK-NEXT: bx lr 242entry: 243 %z = getelementptr inbounds i8, i8* %x, i32 4 244 %0 = bitcast i8* %x to <4 x i16>* 245 %1 = load <4 x i16>, <4 x i16>* %0, align 2 246 %2 = sext <4 x i16> %1 to <4 x i32> 247 %3 = bitcast i8* %y to <4 x i32>* 248 store <4 x i32> %2, <4 x i32>* %3, align 4 249 ret i8* %z 250} 251 252define i8* @ldrhs32_3(i8* %x, i8* %y) { 253; CHECK-LABEL: ldrhs32_3: 254; CHECK: @ %bb.0: @ %entry 255; CHECK-NEXT: vldrh.s32 q0, [r0] 256; CHECK-NEXT: adds r0, #3 257; CHECK-NEXT: vstrw.32 q0, [r1] 258; CHECK-NEXT: bx lr 259entry: 260 %z = getelementptr inbounds i8, i8* %x, i32 3 261 %0 = bitcast i8* %x to <4 x i16>* 262 %1 = load <4 x i16>, <4 x i16>* %0, align 2 263 %2 = sext <4 x i16> %1 to <4 x i32> 264 %3 = bitcast i8* %y to <4 x i32>* 265 store <4 x i32> %2, <4 x i32>* %3, align 4 266 ret i8* %z 267} 268 269define i8* @ldrhs32_2(i8* %x, i8* %y) { 270; CHECK-LABEL: ldrhs32_2: 271; CHECK: @ %bb.0: @ %entry 272; CHECK-NEXT: vldrh.s32 q0, [r0], #2 273; CHECK-NEXT: vstrw.32 q0, [r1] 274; CHECK-NEXT: bx lr 275entry: 276 %z = getelementptr inbounds i8, i8* %x, i32 2 277 %0 = bitcast i8* %x to <4 x i16>* 278 %1 = load <4 x i16>, <4 x i16>* %0, align 2 279 %2 = sext <4 x i16> %1 to <4 x i32> 280 %3 = bitcast i8* %y to <4 x i32>* 281 store <4 x i32> %2, <4 x i32>* %3, align 4 282 ret i8* %z 283} 284 285define i8* @ldrhs32_254(i8* %x, i8* %y) { 286; CHECK-LABEL: ldrhs32_254: 287; CHECK: @ %bb.0: @ %entry 288; CHECK-NEXT: vldrh.s32 q0, [r0], #254 289; CHECK-NEXT: vstrw.32 q0, [r1] 290; CHECK-NEXT: bx lr 291entry: 292 %z = getelementptr inbounds i8, i8* %x, i32 254 293 %0 = bitcast i8* %x to <4 x i16>* 294 %1 = load <4 x i16>, <4 x i16>* %0, align 2 295 %2 = sext <4 x i16> %1 to <4 x i32> 296 %3 = bitcast i8* %y to <4 x i32>* 297 store <4 x i32> %2, <4 x i32>* %3, align 4 298 ret i8* %z 299} 300 301define i8* @ldrhs32_256(i8* %x, i8* %y) { 302; CHECK-LABEL: ldrhs32_256: 303; CHECK: @ %bb.0: @ %entry 304; CHECK-NEXT: vldrh.s32 q0, [r0] 305; CHECK-NEXT: add.w r0, r0, #256 306; CHECK-NEXT: vstrw.32 q0, [r1] 307; CHECK-NEXT: bx lr 308entry: 309 %z = getelementptr inbounds i8, i8* %x, i32 256 310 %0 = bitcast i8* %x to <4 x i16>* 311 %1 = load <4 x i16>, <4 x i16>* %0, align 2 312 %2 = sext <4 x i16> %1 to <4 x i32> 313 %3 = bitcast i8* %y to <4 x i32>* 314 store <4 x i32> %2, <4 x i32>* %3, align 4 315 ret i8* %z 316} 317 318define i8* @ldrhs32_m254(i8* %x, i8* %y) { 319; CHECK-LABEL: ldrhs32_m254: 320; CHECK: @ %bb.0: @ %entry 321; CHECK-NEXT: vldrh.s32 q0, [r0], #-254 322; CHECK-NEXT: vstrw.32 q0, [r1] 323; CHECK-NEXT: bx lr 324entry: 325 %z = getelementptr inbounds i8, i8* %x, i32 -254 326 %0 = bitcast i8* %x to <4 x i16>* 327 %1 = load <4 x i16>, <4 x i16>* %0, align 2 328 %2 = sext <4 x i16> %1 to <4 x i32> 329 %3 = bitcast i8* %y to <4 x i32>* 330 store <4 x i32> %2, <4 x i32>* %3, align 4 331 ret i8* %z 332} 333 334define i8* @ldrhs32_m256(i8* %x, i8* %y) { 335; CHECK-LABEL: ldrhs32_m256: 336; CHECK: @ %bb.0: @ %entry 337; CHECK-NEXT: vldrh.s32 q0, [r0] 338; CHECK-NEXT: sub.w r0, r0, #256 339; CHECK-NEXT: vstrw.32 q0, [r1] 340; CHECK-NEXT: bx lr 341entry: 342 %z = getelementptr inbounds i8, i8* %x, i32 -256 343 %0 = bitcast i8* %x to <4 x i16>* 344 %1 = load <4 x i16>, <4 x i16>* %0, align 2 345 %2 = sext <4 x i16> %1 to <4 x i32> 346 %3 = bitcast i8* %y to <4 x i32>* 347 store <4 x i32> %2, <4 x i32>* %3, align 4 348 ret i8* %z 349} 350 351 352define i8* @ldrhu16_4(i8* %x, i8* %y) { 353; CHECK-LABEL: ldrhu16_4: 354; CHECK: @ %bb.0: @ %entry 355; CHECK-NEXT: vldrh.u16 q0, [r0], #4 356; CHECK-NEXT: vstrh.16 q0, [r1] 357; CHECK-NEXT: bx lr 358entry: 359 %z = getelementptr inbounds i8, i8* %x, i32 4 360 %0 = bitcast i8* %x to <8 x i16>* 361 %1 = load <8 x i16>, <8 x i16>* %0, align 2 362 %2 = bitcast i8* %y to <8 x i16>* 363 store <8 x i16> %1, <8 x i16>* %2, align 2 364 ret i8* %z 365} 366 367define i8* @ldrhu16_3(i8* %x, i8* %y) { 368; CHECK-LE-LABEL: ldrhu16_3: 369; CHECK-LE: @ %bb.0: @ %entry 370; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 371; CHECK-LE-NEXT: vstrh.16 q0, [r1] 372; CHECK-LE-NEXT: bx lr 373; 374; CHECK-BE-LABEL: ldrhu16_3: 375; CHECK-BE: @ %bb.0: @ %entry 376; CHECK-BE-NEXT: vldrh.u16 q0, [r0] 377; CHECK-BE-NEXT: adds r0, #3 378; CHECK-BE-NEXT: vstrh.16 q0, [r1] 379; CHECK-BE-NEXT: bx lr 380entry: 381 %z = getelementptr inbounds i8, i8* %x, i32 3 382 %0 = bitcast i8* %x to <8 x i16>* 383 %1 = load <8 x i16>, <8 x i16>* %0, align 2 384 %2 = bitcast i8* %y to <8 x i16>* 385 store <8 x i16> %1, <8 x i16>* %2, align 2 386 ret i8* %z 387} 388 389define i8* @ldrhu16_2(i8* %x, i8* %y) { 390; CHECK-LABEL: ldrhu16_2: 391; CHECK: @ %bb.0: @ %entry 392; CHECK-NEXT: vldrh.u16 q0, [r0], #2 393; CHECK-NEXT: vstrh.16 q0, [r1] 394; CHECK-NEXT: bx lr 395entry: 396 %z = getelementptr inbounds i8, i8* %x, i32 2 397 %0 = bitcast i8* %x to <8 x i16>* 398 %1 = load <8 x i16>, <8 x i16>* %0, align 2 399 %2 = bitcast i8* %y to <8 x i16>* 400 store <8 x i16> %1, <8 x i16>* %2, align 2 401 ret i8* %z 402} 403 404define i8* @ldrhu16_254(i8* %x, i8* %y) { 405; CHECK-LABEL: ldrhu16_254: 406; CHECK: @ %bb.0: @ %entry 407; CHECK-NEXT: vldrh.u16 q0, [r0], #254 408; CHECK-NEXT: vstrh.16 q0, [r1] 409; CHECK-NEXT: bx lr 410entry: 411 %z = getelementptr inbounds i8, i8* %x, i32 254 412 %0 = bitcast i8* %x to <8 x i16>* 413 %1 = load <8 x i16>, <8 x i16>* %0, align 2 414 %2 = bitcast i8* %y to <8 x i16>* 415 store <8 x i16> %1, <8 x i16>* %2, align 2 416 ret i8* %z 417} 418 419define i8* @ldrhu16_256(i8* %x, i8* %y) { 420; CHECK-LABEL: ldrhu16_256: 421; CHECK: @ %bb.0: @ %entry 422; CHECK-NEXT: vldrh.u16 q0, [r0] 423; CHECK-NEXT: add.w r0, r0, #256 424; CHECK-NEXT: vstrh.16 q0, [r1] 425; CHECK-NEXT: bx lr 426entry: 427 %z = getelementptr inbounds i8, i8* %x, i32 256 428 %0 = bitcast i8* %x to <8 x i16>* 429 %1 = load <8 x i16>, <8 x i16>* %0, align 2 430 %2 = bitcast i8* %y to <8 x i16>* 431 store <8 x i16> %1, <8 x i16>* %2, align 2 432 ret i8* %z 433} 434 435define i8* @ldrhu16_m254(i8* %x, i8* %y) { 436; CHECK-LABEL: ldrhu16_m254: 437; CHECK: @ %bb.0: @ %entry 438; CHECK-NEXT: vldrh.u16 q0, [r0], #-254 439; CHECK-NEXT: vstrh.16 q0, [r1] 440; CHECK-NEXT: bx lr 441entry: 442 %z = getelementptr inbounds i8, i8* %x, i32 -254 443 %0 = bitcast i8* %x to <8 x i16>* 444 %1 = load <8 x i16>, <8 x i16>* %0, align 2 445 %2 = bitcast i8* %y to <8 x i16>* 446 store <8 x i16> %1, <8 x i16>* %2, align 2 447 ret i8* %z 448} 449 450define i8* @ldrhu16_m256(i8* %x, i8* %y) { 451; CHECK-LABEL: ldrhu16_m256: 452; CHECK: @ %bb.0: @ %entry 453; CHECK-NEXT: vldrh.u16 q0, [r0] 454; CHECK-NEXT: sub.w r0, r0, #256 455; CHECK-NEXT: vstrh.16 q0, [r1] 456; CHECK-NEXT: bx lr 457entry: 458 %z = getelementptr inbounds i8, i8* %x, i32 -256 459 %0 = bitcast i8* %x to <8 x i16>* 460 %1 = load <8 x i16>, <8 x i16>* %0, align 2 461 %2 = bitcast i8* %y to <8 x i16>* 462 store <8 x i16> %1, <8 x i16>* %2, align 2 463 ret i8* %z 464} 465 466 467define i8* @ldrbu32_4(i8* %x, i8* %y) { 468; CHECK-LABEL: ldrbu32_4: 469; CHECK: @ %bb.0: @ %entry 470; CHECK-NEXT: vldrb.u32 q0, [r0], #4 471; CHECK-NEXT: vstrw.32 q0, [r1] 472; CHECK-NEXT: bx lr 473entry: 474 %z = getelementptr inbounds i8, i8* %x, i32 4 475 %0 = bitcast i8* %x to <4 x i8>* 476 %1 = load <4 x i8>, <4 x i8>* %0, align 1 477 %2 = zext <4 x i8> %1 to <4 x i32> 478 %3 = bitcast i8* %y to <4 x i32>* 479 store <4 x i32> %2, <4 x i32>* %3, align 4 480 ret i8* %z 481} 482 483define i8* @ldrbu32_3(i8* %x, i8* %y) { 484; CHECK-LABEL: ldrbu32_3: 485; CHECK: @ %bb.0: @ %entry 486; CHECK-NEXT: vldrb.u32 q0, [r0], #3 487; CHECK-NEXT: vstrw.32 q0, [r1] 488; CHECK-NEXT: bx lr 489entry: 490 %z = getelementptr inbounds i8, i8* %x, i32 3 491 %0 = bitcast i8* %x to <4 x i8>* 492 %1 = load <4 x i8>, <4 x i8>* %0, align 1 493 %2 = zext <4 x i8> %1 to <4 x i32> 494 %3 = bitcast i8* %y to <4 x i32>* 495 store <4 x i32> %2, <4 x i32>* %3, align 4 496 ret i8* %z 497} 498 499define i8* @ldrbu32_127(i8* %x, i8* %y) { 500; CHECK-LABEL: ldrbu32_127: 501; CHECK: @ %bb.0: @ %entry 502; CHECK-NEXT: vldrb.u32 q0, [r0], #127 503; CHECK-NEXT: vstrw.32 q0, [r1] 504; CHECK-NEXT: bx lr 505entry: 506 %z = getelementptr inbounds i8, i8* %x, i32 127 507 %0 = bitcast i8* %x to <4 x i8>* 508 %1 = load <4 x i8>, <4 x i8>* %0, align 1 509 %2 = zext <4 x i8> %1 to <4 x i32> 510 %3 = bitcast i8* %y to <4 x i32>* 511 store <4 x i32> %2, <4 x i32>* %3, align 4 512 ret i8* %z 513} 514 515define i8* @ldrbu32_128(i8* %x, i8* %y) { 516; CHECK-LABEL: ldrbu32_128: 517; CHECK: @ %bb.0: @ %entry 518; CHECK-NEXT: vldrb.u32 q0, [r0] 519; CHECK-NEXT: adds r0, #128 520; CHECK-NEXT: vstrw.32 q0, [r1] 521; CHECK-NEXT: bx lr 522entry: 523 %z = getelementptr inbounds i8, i8* %x, i32 128 524 %0 = bitcast i8* %x to <4 x i8>* 525 %1 = load <4 x i8>, <4 x i8>* %0, align 1 526 %2 = zext <4 x i8> %1 to <4 x i32> 527 %3 = bitcast i8* %y to <4 x i32>* 528 store <4 x i32> %2, <4 x i32>* %3, align 4 529 ret i8* %z 530} 531 532define i8* @ldrbu32_m127(i8* %x, i8* %y) { 533; CHECK-LABEL: ldrbu32_m127: 534; CHECK: @ %bb.0: @ %entry 535; CHECK-NEXT: vldrb.u32 q0, [r0], #-127 536; CHECK-NEXT: vstrw.32 q0, [r1] 537; CHECK-NEXT: bx lr 538entry: 539 %z = getelementptr inbounds i8, i8* %x, i32 -127 540 %0 = bitcast i8* %x to <4 x i8>* 541 %1 = load <4 x i8>, <4 x i8>* %0, align 1 542 %2 = zext <4 x i8> %1 to <4 x i32> 543 %3 = bitcast i8* %y to <4 x i32>* 544 store <4 x i32> %2, <4 x i32>* %3, align 4 545 ret i8* %z 546} 547 548define i8* @ldrbu32_m128(i8* %x, i8* %y) { 549; CHECK-LABEL: ldrbu32_m128: 550; CHECK: @ %bb.0: @ %entry 551; CHECK-NEXT: vldrb.u32 q0, [r0] 552; CHECK-NEXT: subs r0, #128 553; CHECK-NEXT: vstrw.32 q0, [r1] 554; CHECK-NEXT: bx lr 555entry: 556 %z = getelementptr inbounds i8, i8* %x, i32 -128 557 %0 = bitcast i8* %x to <4 x i8>* 558 %1 = load <4 x i8>, <4 x i8>* %0, align 1 559 %2 = zext <4 x i8> %1 to <4 x i32> 560 %3 = bitcast i8* %y to <4 x i32>* 561 store <4 x i32> %2, <4 x i32>* %3, align 4 562 ret i8* %z 563} 564 565 566define i8* @ldrbs32_4(i8* %x, i8* %y) { 567; CHECK-LABEL: ldrbs32_4: 568; CHECK: @ %bb.0: @ %entry 569; CHECK-NEXT: vldrb.s32 q0, [r0], #4 570; CHECK-NEXT: vstrw.32 q0, [r1] 571; CHECK-NEXT: bx lr 572entry: 573 %z = getelementptr inbounds i8, i8* %x, i32 4 574 %0 = bitcast i8* %x to <4 x i8>* 575 %1 = load <4 x i8>, <4 x i8>* %0, align 1 576 %2 = sext <4 x i8> %1 to <4 x i32> 577 %3 = bitcast i8* %y to <4 x i32>* 578 store <4 x i32> %2, <4 x i32>* %3, align 4 579 ret i8* %z 580} 581 582define i8* @ldrbs32_3(i8* %x, i8* %y) { 583; CHECK-LABEL: ldrbs32_3: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: vldrb.s32 q0, [r0], #3 586; CHECK-NEXT: vstrw.32 q0, [r1] 587; CHECK-NEXT: bx lr 588entry: 589 %z = getelementptr inbounds i8, i8* %x, i32 3 590 %0 = bitcast i8* %x to <4 x i8>* 591 %1 = load <4 x i8>, <4 x i8>* %0, align 1 592 %2 = sext <4 x i8> %1 to <4 x i32> 593 %3 = bitcast i8* %y to <4 x i32>* 594 store <4 x i32> %2, <4 x i32>* %3, align 4 595 ret i8* %z 596} 597 598define i8* @ldrbs32_127(i8* %x, i8* %y) { 599; CHECK-LABEL: ldrbs32_127: 600; CHECK: @ %bb.0: @ %entry 601; CHECK-NEXT: vldrb.s32 q0, [r0], #127 602; CHECK-NEXT: vstrw.32 q0, [r1] 603; CHECK-NEXT: bx lr 604entry: 605 %z = getelementptr inbounds i8, i8* %x, i32 127 606 %0 = bitcast i8* %x to <4 x i8>* 607 %1 = load <4 x i8>, <4 x i8>* %0, align 1 608 %2 = sext <4 x i8> %1 to <4 x i32> 609 %3 = bitcast i8* %y to <4 x i32>* 610 store <4 x i32> %2, <4 x i32>* %3, align 4 611 ret i8* %z 612} 613 614define i8* @ldrbs32_128(i8* %x, i8* %y) { 615; CHECK-LABEL: ldrbs32_128: 616; CHECK: @ %bb.0: @ %entry 617; CHECK-NEXT: vldrb.s32 q0, [r0] 618; CHECK-NEXT: adds r0, #128 619; CHECK-NEXT: vstrw.32 q0, [r1] 620; CHECK-NEXT: bx lr 621entry: 622 %z = getelementptr inbounds i8, i8* %x, i32 128 623 %0 = bitcast i8* %x to <4 x i8>* 624 %1 = load <4 x i8>, <4 x i8>* %0, align 1 625 %2 = sext <4 x i8> %1 to <4 x i32> 626 %3 = bitcast i8* %y to <4 x i32>* 627 store <4 x i32> %2, <4 x i32>* %3, align 4 628 ret i8* %z 629} 630 631define i8* @ldrbs32_m127(i8* %x, i8* %y) { 632; CHECK-LABEL: ldrbs32_m127: 633; CHECK: @ %bb.0: @ %entry 634; CHECK-NEXT: vldrb.s32 q0, [r0], #-127 635; CHECK-NEXT: vstrw.32 q0, [r1] 636; CHECK-NEXT: bx lr 637entry: 638 %z = getelementptr inbounds i8, i8* %x, i32 -127 639 %0 = bitcast i8* %x to <4 x i8>* 640 %1 = load <4 x i8>, <4 x i8>* %0, align 1 641 %2 = sext <4 x i8> %1 to <4 x i32> 642 %3 = bitcast i8* %y to <4 x i32>* 643 store <4 x i32> %2, <4 x i32>* %3, align 4 644 ret i8* %z 645} 646 647define i8* @ldrbs32_m128(i8* %x, i8* %y) { 648; CHECK-LABEL: ldrbs32_m128: 649; CHECK: @ %bb.0: @ %entry 650; CHECK-NEXT: vldrb.s32 q0, [r0] 651; CHECK-NEXT: subs r0, #128 652; CHECK-NEXT: vstrw.32 q0, [r1] 653; CHECK-NEXT: bx lr 654entry: 655 %z = getelementptr inbounds i8, i8* %x, i32 -128 656 %0 = bitcast i8* %x to <4 x i8>* 657 %1 = load <4 x i8>, <4 x i8>* %0, align 1 658 %2 = sext <4 x i8> %1 to <4 x i32> 659 %3 = bitcast i8* %y to <4 x i32>* 660 store <4 x i32> %2, <4 x i32>* %3, align 4 661 ret i8* %z 662} 663 664 665define i8* @ldrbu16_4(i8* %x, i8* %y) { 666; CHECK-LABEL: ldrbu16_4: 667; CHECK: @ %bb.0: @ %entry 668; CHECK-NEXT: vldrb.u16 q0, [r0], #4 669; CHECK-NEXT: vstrh.16 q0, [r1] 670; CHECK-NEXT: bx lr 671entry: 672 %z = getelementptr inbounds i8, i8* %x, i32 4 673 %0 = bitcast i8* %x to <8 x i8>* 674 %1 = load <8 x i8>, <8 x i8>* %0, align 1 675 %2 = zext <8 x i8> %1 to <8 x i16> 676 %3 = bitcast i8* %y to <8 x i16>* 677 store <8 x i16> %2, <8 x i16>* %3, align 2 678 ret i8* %z 679} 680 681define i8* @ldrbu16_3(i8* %x, i8* %y) { 682; CHECK-LABEL: ldrbu16_3: 683; CHECK: @ %bb.0: @ %entry 684; CHECK-NEXT: vldrb.u16 q0, [r0], #3 685; CHECK-NEXT: vstrh.16 q0, [r1] 686; CHECK-NEXT: bx lr 687entry: 688 %z = getelementptr inbounds i8, i8* %x, i32 3 689 %0 = bitcast i8* %x to <8 x i8>* 690 %1 = load <8 x i8>, <8 x i8>* %0, align 1 691 %2 = zext <8 x i8> %1 to <8 x i16> 692 %3 = bitcast i8* %y to <8 x i16>* 693 store <8 x i16> %2, <8 x i16>* %3, align 2 694 ret i8* %z 695} 696 697define i8* @ldrbu16_127(i8* %x, i8* %y) { 698; CHECK-LABEL: ldrbu16_127: 699; CHECK: @ %bb.0: @ %entry 700; CHECK-NEXT: vldrb.u16 q0, [r0], #127 701; CHECK-NEXT: vstrh.16 q0, [r1] 702; CHECK-NEXT: bx lr 703entry: 704 %z = getelementptr inbounds i8, i8* %x, i32 127 705 %0 = bitcast i8* %x to <8 x i8>* 706 %1 = load <8 x i8>, <8 x i8>* %0, align 1 707 %2 = zext <8 x i8> %1 to <8 x i16> 708 %3 = bitcast i8* %y to <8 x i16>* 709 store <8 x i16> %2, <8 x i16>* %3, align 2 710 ret i8* %z 711} 712 713define i8* @ldrbu16_128(i8* %x, i8* %y) { 714; CHECK-LABEL: ldrbu16_128: 715; CHECK: @ %bb.0: @ %entry 716; CHECK-NEXT: vldrb.u16 q0, [r0] 717; CHECK-NEXT: adds r0, #128 718; CHECK-NEXT: vstrh.16 q0, [r1] 719; CHECK-NEXT: bx lr 720entry: 721 %z = getelementptr inbounds i8, i8* %x, i32 128 722 %0 = bitcast i8* %x to <8 x i8>* 723 %1 = load <8 x i8>, <8 x i8>* %0, align 1 724 %2 = zext <8 x i8> %1 to <8 x i16> 725 %3 = bitcast i8* %y to <8 x i16>* 726 store <8 x i16> %2, <8 x i16>* %3, align 2 727 ret i8* %z 728} 729 730define i8* @ldrbu16_m127(i8* %x, i8* %y) { 731; CHECK-LABEL: ldrbu16_m127: 732; CHECK: @ %bb.0: @ %entry 733; CHECK-NEXT: vldrb.u16 q0, [r0], #-127 734; CHECK-NEXT: vstrh.16 q0, [r1] 735; CHECK-NEXT: bx lr 736entry: 737 %z = getelementptr inbounds i8, i8* %x, i32 -127 738 %0 = bitcast i8* %x to <8 x i8>* 739 %1 = load <8 x i8>, <8 x i8>* %0, align 1 740 %2 = zext <8 x i8> %1 to <8 x i16> 741 %3 = bitcast i8* %y to <8 x i16>* 742 store <8 x i16> %2, <8 x i16>* %3, align 2 743 ret i8* %z 744} 745 746define i8* @ldrbu16_m128(i8* %x, i8* %y) { 747; CHECK-LABEL: ldrbu16_m128: 748; CHECK: @ %bb.0: @ %entry 749; CHECK-NEXT: vldrb.u16 q0, [r0] 750; CHECK-NEXT: subs r0, #128 751; CHECK-NEXT: vstrh.16 q0, [r1] 752; CHECK-NEXT: bx lr 753entry: 754 %z = getelementptr inbounds i8, i8* %x, i32 -128 755 %0 = bitcast i8* %x to <8 x i8>* 756 %1 = load <8 x i8>, <8 x i8>* %0, align 1 757 %2 = zext <8 x i8> %1 to <8 x i16> 758 %3 = bitcast i8* %y to <8 x i16>* 759 store <8 x i16> %2, <8 x i16>* %3, align 2 760 ret i8* %z 761} 762 763 764define i8* @ldrbs16_4(i8* %x, i8* %y) { 765; CHECK-LABEL: ldrbs16_4: 766; CHECK: @ %bb.0: @ %entry 767; CHECK-NEXT: vldrb.s16 q0, [r0], #4 768; CHECK-NEXT: vstrh.16 q0, [r1] 769; CHECK-NEXT: bx lr 770entry: 771 %z = getelementptr inbounds i8, i8* %x, i32 4 772 %0 = bitcast i8* %x to <8 x i8>* 773 %1 = load <8 x i8>, <8 x i8>* %0, align 1 774 %2 = sext <8 x i8> %1 to <8 x i16> 775 %3 = bitcast i8* %y to <8 x i16>* 776 store <8 x i16> %2, <8 x i16>* %3, align 2 777 ret i8* %z 778} 779 780define i8* @ldrbs16_3(i8* %x, i8* %y) { 781; CHECK-LABEL: ldrbs16_3: 782; CHECK: @ %bb.0: @ %entry 783; CHECK-NEXT: vldrb.s16 q0, [r0], #3 784; CHECK-NEXT: vstrh.16 q0, [r1] 785; CHECK-NEXT: bx lr 786entry: 787 %z = getelementptr inbounds i8, i8* %x, i32 3 788 %0 = bitcast i8* %x to <8 x i8>* 789 %1 = load <8 x i8>, <8 x i8>* %0, align 1 790 %2 = sext <8 x i8> %1 to <8 x i16> 791 %3 = bitcast i8* %y to <8 x i16>* 792 store <8 x i16> %2, <8 x i16>* %3, align 2 793 ret i8* %z 794} 795 796define i8* @ldrbs16_127(i8* %x, i8* %y) { 797; CHECK-LABEL: ldrbs16_127: 798; CHECK: @ %bb.0: @ %entry 799; CHECK-NEXT: vldrb.s16 q0, [r0], #127 800; CHECK-NEXT: vstrh.16 q0, [r1] 801; CHECK-NEXT: bx lr 802entry: 803 %z = getelementptr inbounds i8, i8* %x, i32 127 804 %0 = bitcast i8* %x to <8 x i8>* 805 %1 = load <8 x i8>, <8 x i8>* %0, align 1 806 %2 = sext <8 x i8> %1 to <8 x i16> 807 %3 = bitcast i8* %y to <8 x i16>* 808 store <8 x i16> %2, <8 x i16>* %3, align 2 809 ret i8* %z 810} 811 812define i8* @ldrbs16_128(i8* %x, i8* %y) { 813; CHECK-LABEL: ldrbs16_128: 814; CHECK: @ %bb.0: @ %entry 815; CHECK-NEXT: vldrb.s16 q0, [r0] 816; CHECK-NEXT: adds r0, #128 817; CHECK-NEXT: vstrh.16 q0, [r1] 818; CHECK-NEXT: bx lr 819entry: 820 %z = getelementptr inbounds i8, i8* %x, i32 128 821 %0 = bitcast i8* %x to <8 x i8>* 822 %1 = load <8 x i8>, <8 x i8>* %0, align 1 823 %2 = sext <8 x i8> %1 to <8 x i16> 824 %3 = bitcast i8* %y to <8 x i16>* 825 store <8 x i16> %2, <8 x i16>* %3, align 2 826 ret i8* %z 827} 828 829define i8* @ldrbs16_m127(i8* %x, i8* %y) { 830; CHECK-LABEL: ldrbs16_m127: 831; CHECK: @ %bb.0: @ %entry 832; CHECK-NEXT: vldrb.s16 q0, [r0], #-127 833; CHECK-NEXT: vstrh.16 q0, [r1] 834; CHECK-NEXT: bx lr 835entry: 836 %z = getelementptr inbounds i8, i8* %x, i32 -127 837 %0 = bitcast i8* %x to <8 x i8>* 838 %1 = load <8 x i8>, <8 x i8>* %0, align 1 839 %2 = sext <8 x i8> %1 to <8 x i16> 840 %3 = bitcast i8* %y to <8 x i16>* 841 store <8 x i16> %2, <8 x i16>* %3, align 2 842 ret i8* %z 843} 844 845define i8* @ldrbs16_m128(i8* %x, i8* %y) { 846; CHECK-LABEL: ldrbs16_m128: 847; CHECK: @ %bb.0: @ %entry 848; CHECK-NEXT: vldrb.s16 q0, [r0] 849; CHECK-NEXT: subs r0, #128 850; CHECK-NEXT: vstrh.16 q0, [r1] 851; CHECK-NEXT: bx lr 852entry: 853 %z = getelementptr inbounds i8, i8* %x, i32 -128 854 %0 = bitcast i8* %x to <8 x i8>* 855 %1 = load <8 x i8>, <8 x i8>* %0, align 1 856 %2 = sext <8 x i8> %1 to <8 x i16> 857 %3 = bitcast i8* %y to <8 x i16>* 858 store <8 x i16> %2, <8 x i16>* %3, align 2 859 ret i8* %z 860} 861 862 863define i8* @ldrbu8_4(i8* %x, i8* %y) { 864; CHECK-LABEL: ldrbu8_4: 865; CHECK: @ %bb.0: @ %entry 866; CHECK-NEXT: vldrb.u8 q0, [r0], #4 867; CHECK-NEXT: vstrb.8 q0, [r1] 868; CHECK-NEXT: bx lr 869entry: 870 %z = getelementptr inbounds i8, i8* %x, i32 4 871 %0 = bitcast i8* %x to <16 x i8>* 872 %1 = load <16 x i8>, <16 x i8>* %0, align 1 873 %2 = bitcast i8* %y to <16 x i8>* 874 store <16 x i8> %1, <16 x i8>* %2, align 1 875 ret i8* %z 876} 877 878define i8* @ldrbu8_3(i8* %x, i8* %y) { 879; CHECK-LABEL: ldrbu8_3: 880; CHECK: @ %bb.0: @ %entry 881; CHECK-NEXT: vldrb.u8 q0, [r0], #3 882; CHECK-NEXT: vstrb.8 q0, [r1] 883; CHECK-NEXT: bx lr 884entry: 885 %z = getelementptr inbounds i8, i8* %x, i32 3 886 %0 = bitcast i8* %x to <16 x i8>* 887 %1 = load <16 x i8>, <16 x i8>* %0, align 1 888 %2 = bitcast i8* %y to <16 x i8>* 889 store <16 x i8> %1, <16 x i8>* %2, align 1 890 ret i8* %z 891} 892 893define i8* @ldrbu8_127(i8* %x, i8* %y) { 894; CHECK-LABEL: ldrbu8_127: 895; CHECK: @ %bb.0: @ %entry 896; CHECK-NEXT: vldrb.u8 q0, [r0], #127 897; CHECK-NEXT: vstrb.8 q0, [r1] 898; CHECK-NEXT: bx lr 899entry: 900 %z = getelementptr inbounds i8, i8* %x, i32 127 901 %0 = bitcast i8* %x to <16 x i8>* 902 %1 = load <16 x i8>, <16 x i8>* %0, align 1 903 %2 = bitcast i8* %y to <16 x i8>* 904 store <16 x i8> %1, <16 x i8>* %2, align 1 905 ret i8* %z 906} 907 908define i8* @ldrbu8_128(i8* %x, i8* %y) { 909; CHECK-LABEL: ldrbu8_128: 910; CHECK: @ %bb.0: @ %entry 911; CHECK-NEXT: vldrb.u8 q0, [r0] 912; CHECK-NEXT: adds r0, #128 913; CHECK-NEXT: vstrb.8 q0, [r1] 914; CHECK-NEXT: bx lr 915entry: 916 %z = getelementptr inbounds i8, i8* %x, i32 128 917 %0 = bitcast i8* %x to <16 x i8>* 918 %1 = load <16 x i8>, <16 x i8>* %0, align 1 919 %2 = bitcast i8* %y to <16 x i8>* 920 store <16 x i8> %1, <16 x i8>* %2, align 1 921 ret i8* %z 922} 923 924define i8* @ldrbu8_m127(i8* %x, i8* %y) { 925; CHECK-LABEL: ldrbu8_m127: 926; CHECK: @ %bb.0: @ %entry 927; CHECK-NEXT: vldrb.u8 q0, [r0], #-127 928; CHECK-NEXT: vstrb.8 q0, [r1] 929; CHECK-NEXT: bx lr 930entry: 931 %z = getelementptr inbounds i8, i8* %x, i32 -127 932 %0 = bitcast i8* %x to <16 x i8>* 933 %1 = load <16 x i8>, <16 x i8>* %0, align 1 934 %2 = bitcast i8* %y to <16 x i8>* 935 store <16 x i8> %1, <16 x i8>* %2, align 1 936 ret i8* %z 937} 938 939define i8* @ldrbu8_m128(i8* %x, i8* %y) { 940; CHECK-LABEL: ldrbu8_m128: 941; CHECK: @ %bb.0: @ %entry 942; CHECK-NEXT: vldrb.u8 q0, [r0] 943; CHECK-NEXT: subs r0, #128 944; CHECK-NEXT: vstrb.8 q0, [r1] 945; CHECK-NEXT: bx lr 946entry: 947 %z = getelementptr inbounds i8, i8* %x, i32 -128 948 %0 = bitcast i8* %x to <16 x i8>* 949 %1 = load <16 x i8>, <16 x i8>* %0, align 1 950 %2 = bitcast i8* %y to <16 x i8>* 951 store <16 x i8> %1, <16 x i8>* %2, align 1 952 ret i8* %z 953} 954 955 956define i8* @ldrwf32_4(i8* %x, i8* %y) { 957; CHECK-LABEL: ldrwf32_4: 958; CHECK: @ %bb.0: @ %entry 959; CHECK-NEXT: vldrw.u32 q0, [r0], #4 960; CHECK-NEXT: vstrw.32 q0, [r1] 961; CHECK-NEXT: bx lr 962entry: 963 %z = getelementptr inbounds i8, i8* %x, i32 4 964 %0 = bitcast i8* %x to <4 x float>* 965 %1 = load <4 x float>, <4 x float>* %0, align 4 966 %2 = bitcast i8* %y to <4 x float>* 967 store <4 x float> %1, <4 x float>* %2, align 4 968 ret i8* %z 969} 970 971define i8* @ldrwf16_4(i8* %x, i8* %y) { 972; CHECK-LABEL: ldrwf16_4: 973; CHECK: @ %bb.0: @ %entry 974; CHECK-NEXT: vldrh.u16 q0, [r0], #4 975; CHECK-NEXT: vstrh.16 q0, [r1] 976; CHECK-NEXT: bx lr 977entry: 978 %z = getelementptr inbounds i8, i8* %x, i32 4 979 %0 = bitcast i8* %x to <8 x half>* 980 %1 = load <8 x half>, <8 x half>* %0, align 2 981 %2 = bitcast i8* %y to <8 x half>* 982 store <8 x half> %1, <8 x half>* %2, align 2 983 ret i8* %z 984} 985 986define i8* @ldrwi32_align1(i8* %x, i8* %y) { 987; CHECK-LE-LABEL: ldrwi32_align1: 988; CHECK-LE: @ %bb.0: @ %entry 989; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 990; CHECK-LE-NEXT: vstrw.32 q0, [r1] 991; CHECK-LE-NEXT: bx lr 992; 993; CHECK-BE-LABEL: ldrwi32_align1: 994; CHECK-BE: @ %bb.0: @ %entry 995; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3 996; CHECK-BE-NEXT: vrev32.8 q0, q0 997; CHECK-BE-NEXT: vstrw.32 q0, [r1] 998; CHECK-BE-NEXT: bx lr 999entry: 1000 %z = getelementptr inbounds i8, i8* %x, i32 3 1001 %0 = bitcast i8* %x to <4 x i32>* 1002 %1 = load <4 x i32>, <4 x i32>* %0, align 1 1003 %2 = bitcast i8* %y to <4 x i32>* 1004 store <4 x i32> %1, <4 x i32>* %2, align 4 1005 ret i8* %z 1006} 1007 1008define i8* @ldrhi16_align1(i8* %x, i8* %y) { 1009; CHECK-LE-LABEL: ldrhi16_align1: 1010; CHECK-LE: @ %bb.0: @ %entry 1011; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 1012; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1013; CHECK-LE-NEXT: bx lr 1014; 1015; CHECK-BE-LABEL: ldrhi16_align1: 1016; CHECK-BE: @ %bb.0: @ %entry 1017; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3 1018; CHECK-BE-NEXT: vrev16.8 q0, q0 1019; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1020; CHECK-BE-NEXT: bx lr 1021entry: 1022 %z = getelementptr inbounds i8, i8* %x, i32 3 1023 %0 = bitcast i8* %x to <8 x i16>* 1024 %1 = load <8 x i16>, <8 x i16>* %0, align 1 1025 %2 = bitcast i8* %y to <8 x i16>* 1026 store <8 x i16> %1, <8 x i16>* %2, align 2 1027 ret i8* %z 1028} 1029 1030define i8* @ldrhi32_align1(i8* %x, i8* %y) { 1031; CHECK-LABEL: ldrhi32_align1: 1032; CHECK: @ %bb.0: @ %entry 1033; CHECK-NEXT: .pad #8 1034; CHECK-NEXT: sub sp, #8 1035; CHECK-NEXT: ldr r3, [r0, #4] 1036; CHECK-NEXT: ldr r2, [r0] 1037; CHECK-NEXT: adds r0, #3 1038; CHECK-NEXT: strd r2, r3, [sp] 1039; CHECK-NEXT: mov r2, sp 1040; CHECK-NEXT: vldrh.s32 q0, [r2] 1041; CHECK-NEXT: vstrw.32 q0, [r1] 1042; CHECK-NEXT: add sp, #8 1043; CHECK-NEXT: bx lr 1044entry: 1045 %z = getelementptr inbounds i8, i8* %x, i32 3 1046 %0 = bitcast i8* %x to <4 x i16>* 1047 %1 = load <4 x i16>, <4 x i16>* %0, align 1 1048 %2 = bitcast i8* %y to <4 x i32>* 1049 %3 = sext <4 x i16> %1 to <4 x i32> 1050 store <4 x i32> %3, <4 x i32>* %2, align 4 1051 ret i8* %z 1052} 1053 1054define i8* @ldrf32_align1(i8* %x, i8* %y) { 1055; CHECK-LE-LABEL: ldrf32_align1: 1056; CHECK-LE: @ %bb.0: @ %entry 1057; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 1058; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1059; CHECK-LE-NEXT: bx lr 1060; 1061; CHECK-BE-LABEL: ldrf32_align1: 1062; CHECK-BE: @ %bb.0: @ %entry 1063; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3 1064; CHECK-BE-NEXT: vrev32.8 q0, q0 1065; CHECK-BE-NEXT: vstrw.32 q0, [r1] 1066; CHECK-BE-NEXT: bx lr 1067entry: 1068 %z = getelementptr inbounds i8, i8* %x, i32 3 1069 %0 = bitcast i8* %x to <4 x float>* 1070 %1 = load <4 x float>, <4 x float>* %0, align 1 1071 %2 = bitcast i8* %y to <4 x float>* 1072 store <4 x float> %1, <4 x float>* %2, align 4 1073 ret i8* %z 1074} 1075 1076define i8* @ldrf16_align1(i8* %x, i8* %y) { 1077; CHECK-LE-LABEL: ldrf16_align1: 1078; CHECK-LE: @ %bb.0: @ %entry 1079; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 1080; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1081; CHECK-LE-NEXT: bx lr 1082; 1083; CHECK-BE-LABEL: ldrf16_align1: 1084; CHECK-BE: @ %bb.0: @ %entry 1085; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3 1086; CHECK-BE-NEXT: vrev16.8 q0, q0 1087; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1088; CHECK-BE-NEXT: bx lr 1089entry: 1090 %z = getelementptr inbounds i8, i8* %x, i32 3 1091 %0 = bitcast i8* %x to <8 x half>* 1092 %1 = load <8 x half>, <8 x half>* %0, align 1 1093 %2 = bitcast i8* %y to <8 x half>* 1094 store <8 x half> %1, <8 x half>* %2, align 2 1095 ret i8* %z 1096} 1097 1098define i8* @ldrh16_align8(i8* %x, i8* %y) { 1099; CHECK-LE-LABEL: ldrh16_align8: 1100; CHECK-LE: @ %bb.0: @ %entry 1101; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4 1102; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1103; CHECK-LE-NEXT: bx lr 1104; 1105; CHECK-BE-LABEL: ldrh16_align8: 1106; CHECK-BE: @ %bb.0: @ %entry 1107; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4 1108; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1109; CHECK-BE-NEXT: bx lr 1110entry: 1111 %z = getelementptr inbounds i8, i8* %x, i32 4 1112 %0 = bitcast i8* %x to <8 x i16>* 1113 %1 = load <8 x i16>, <8 x i16>* %0, align 8 1114 %2 = bitcast i8* %y to <8 x i16>* 1115 store <8 x i16> %1, <8 x i16>* %2, align 2 1116 ret i8* %z 1117} 1118 1119 1120 1121 1122 1123define i8* @strw32_4(i8* %y, i8* %x) { 1124; CHECK-LE-LABEL: strw32_4: 1125; CHECK-LE: @ %bb.0: @ %entry 1126; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1127; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 1128; CHECK-LE-NEXT: bx lr 1129; 1130; CHECK-BE-LABEL: strw32_4: 1131; CHECK-BE: @ %bb.0: @ %entry 1132; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1133; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 1134; CHECK-BE-NEXT: bx lr 1135entry: 1136 %z = getelementptr inbounds i8, i8* %y, i32 4 1137 %0 = bitcast i8* %x to <4 x i32>* 1138 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1139 %2 = bitcast i8* %y to <4 x i32>* 1140 store <4 x i32> %1, <4 x i32>* %2, align 4 1141 ret i8* %z 1142} 1143 1144define i8* @strw32_3(i8* %y, i8* %x) { 1145; CHECK-LE-LABEL: strw32_3: 1146; CHECK-LE: @ %bb.0: @ %entry 1147; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1148; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 1149; CHECK-LE-NEXT: bx lr 1150; 1151; CHECK-BE-LABEL: strw32_3: 1152; CHECK-BE: @ %bb.0: @ %entry 1153; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1154; CHECK-BE-NEXT: vstrw.32 q0, [r0] 1155; CHECK-BE-NEXT: adds r0, #3 1156; CHECK-BE-NEXT: bx lr 1157entry: 1158 %z = getelementptr inbounds i8, i8* %y, i32 3 1159 %0 = bitcast i8* %x to <4 x i32>* 1160 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1161 %2 = bitcast i8* %y to <4 x i32>* 1162 store <4 x i32> %1, <4 x i32>* %2, align 4 1163 ret i8* %z 1164} 1165 1166define i8* @strw32_m4(i8* %y, i8* %x) { 1167; CHECK-LE-LABEL: strw32_m4: 1168; CHECK-LE: @ %bb.0: @ %entry 1169; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1170; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4 1171; CHECK-LE-NEXT: bx lr 1172; 1173; CHECK-BE-LABEL: strw32_m4: 1174; CHECK-BE: @ %bb.0: @ %entry 1175; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1176; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4 1177; CHECK-BE-NEXT: bx lr 1178entry: 1179 %z = getelementptr inbounds i8, i8* %y, i32 -4 1180 %0 = bitcast i8* %x to <4 x i32>* 1181 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1182 %2 = bitcast i8* %y to <4 x i32>* 1183 store <4 x i32> %1, <4 x i32>* %2, align 4 1184 ret i8* %z 1185} 1186 1187define i8* @strw32_508(i8* %y, i8* %x) { 1188; CHECK-LABEL: strw32_508: 1189; CHECK: @ %bb.0: @ %entry 1190; CHECK-NEXT: vldrw.u32 q0, [r1] 1191; CHECK-NEXT: vstrw.32 q0, [r0], #508 1192; CHECK-NEXT: bx lr 1193entry: 1194 %z = getelementptr inbounds i8, i8* %y, i32 508 1195 %0 = bitcast i8* %x to <4 x i32>* 1196 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1197 %2 = bitcast i8* %y to <4 x i32>* 1198 store <4 x i32> %1, <4 x i32>* %2, align 4 1199 ret i8* %z 1200} 1201 1202define i8* @strw32_512(i8* %y, i8* %x) { 1203; CHECK-LABEL: strw32_512: 1204; CHECK: @ %bb.0: @ %entry 1205; CHECK-NEXT: vldrw.u32 q0, [r1] 1206; CHECK-NEXT: vstrw.32 q0, [r0] 1207; CHECK-NEXT: add.w r0, r0, #512 1208; CHECK-NEXT: bx lr 1209entry: 1210 %z = getelementptr inbounds i8, i8* %y, i32 512 1211 %0 = bitcast i8* %x to <4 x i32>* 1212 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1213 %2 = bitcast i8* %y to <4 x i32>* 1214 store <4 x i32> %1, <4 x i32>* %2, align 4 1215 ret i8* %z 1216} 1217 1218define i8* @strw32_m508(i8* %y, i8* %x) { 1219; CHECK-LABEL: strw32_m508: 1220; CHECK: @ %bb.0: @ %entry 1221; CHECK-NEXT: vldrw.u32 q0, [r1] 1222; CHECK-NEXT: vstrw.32 q0, [r0], #-508 1223; CHECK-NEXT: bx lr 1224entry: 1225 %z = getelementptr inbounds i8, i8* %y, i32 -508 1226 %0 = bitcast i8* %x to <4 x i32>* 1227 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1228 %2 = bitcast i8* %y to <4 x i32>* 1229 store <4 x i32> %1, <4 x i32>* %2, align 4 1230 ret i8* %z 1231} 1232 1233define i8* @strw32_m512(i8* %y, i8* %x) { 1234; CHECK-LABEL: strw32_m512: 1235; CHECK: @ %bb.0: @ %entry 1236; CHECK-NEXT: vldrw.u32 q0, [r1] 1237; CHECK-NEXT: vstrw.32 q0, [r0] 1238; CHECK-NEXT: sub.w r0, r0, #512 1239; CHECK-NEXT: bx lr 1240entry: 1241 %z = getelementptr inbounds i8, i8* %y, i32 -512 1242 %0 = bitcast i8* %x to <4 x i32>* 1243 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1244 %2 = bitcast i8* %y to <4 x i32>* 1245 store <4 x i32> %1, <4 x i32>* %2, align 4 1246 ret i8* %z 1247} 1248 1249 1250define i8* @strh32_4(i8* %y, i8* %x) { 1251; CHECK-LABEL: strh32_4: 1252; CHECK: @ %bb.0: @ %entry 1253; CHECK-NEXT: vldrh.u32 q0, [r1] 1254; CHECK-NEXT: vstrh.32 q0, [r0], #4 1255; CHECK-NEXT: bx lr 1256entry: 1257 %z = getelementptr inbounds i8, i8* %y, i32 4 1258 %0 = bitcast i8* %x to <4 x i16>* 1259 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1260 %2 = bitcast i8* %y to <4 x i16>* 1261 store <4 x i16> %1, <4 x i16>* %2, align 2 1262 ret i8* %z 1263} 1264 1265define i8* @strh32_3(i8* %y, i8* %x) { 1266; CHECK-LABEL: strh32_3: 1267; CHECK: @ %bb.0: @ %entry 1268; CHECK-NEXT: vldrh.u32 q0, [r1] 1269; CHECK-NEXT: vstrh.32 q0, [r0] 1270; CHECK-NEXT: adds r0, #3 1271; CHECK-NEXT: bx lr 1272entry: 1273 %z = getelementptr inbounds i8, i8* %y, i32 3 1274 %0 = bitcast i8* %x to <4 x i16>* 1275 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1276 %2 = bitcast i8* %y to <4 x i16>* 1277 store <4 x i16> %1, <4 x i16>* %2, align 2 1278 ret i8* %z 1279} 1280 1281define i8* @strh32_2(i8* %y, i8* %x) { 1282; CHECK-LABEL: strh32_2: 1283; CHECK: @ %bb.0: @ %entry 1284; CHECK-NEXT: vldrh.u32 q0, [r1] 1285; CHECK-NEXT: vstrh.32 q0, [r0], #2 1286; CHECK-NEXT: bx lr 1287entry: 1288 %z = getelementptr inbounds i8, i8* %y, i32 2 1289 %0 = bitcast i8* %x to <4 x i16>* 1290 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1291 %2 = bitcast i8* %y to <4 x i16>* 1292 store <4 x i16> %1, <4 x i16>* %2, align 2 1293 ret i8* %z 1294} 1295 1296define i8* @strh32_254(i8* %y, i8* %x) { 1297; CHECK-LABEL: strh32_254: 1298; CHECK: @ %bb.0: @ %entry 1299; CHECK-NEXT: vldrh.u32 q0, [r1] 1300; CHECK-NEXT: vstrh.32 q0, [r0], #254 1301; CHECK-NEXT: bx lr 1302entry: 1303 %z = getelementptr inbounds i8, i8* %y, i32 254 1304 %0 = bitcast i8* %x to <4 x i16>* 1305 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1306 %2 = bitcast i8* %y to <4 x i16>* 1307 store <4 x i16> %1, <4 x i16>* %2, align 2 1308 ret i8* %z 1309} 1310 1311define i8* @strh32_256(i8* %y, i8* %x) { 1312; CHECK-LABEL: strh32_256: 1313; CHECK: @ %bb.0: @ %entry 1314; CHECK-NEXT: vldrh.u32 q0, [r1] 1315; CHECK-NEXT: vstrh.32 q0, [r0] 1316; CHECK-NEXT: add.w r0, r0, #256 1317; CHECK-NEXT: bx lr 1318entry: 1319 %z = getelementptr inbounds i8, i8* %y, i32 256 1320 %0 = bitcast i8* %x to <4 x i16>* 1321 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1322 %2 = bitcast i8* %y to <4 x i16>* 1323 store <4 x i16> %1, <4 x i16>* %2, align 2 1324 ret i8* %z 1325} 1326 1327define i8* @strh32_m254(i8* %y, i8* %x) { 1328; CHECK-LABEL: strh32_m254: 1329; CHECK: @ %bb.0: @ %entry 1330; CHECK-NEXT: vldrh.u32 q0, [r1] 1331; CHECK-NEXT: vstrh.32 q0, [r0], #-254 1332; CHECK-NEXT: bx lr 1333entry: 1334 %z = getelementptr inbounds i8, i8* %y, i32 -254 1335 %0 = bitcast i8* %x to <4 x i16>* 1336 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1337 %2 = bitcast i8* %y to <4 x i16>* 1338 store <4 x i16> %1, <4 x i16>* %2, align 2 1339 ret i8* %z 1340} 1341 1342define i8* @strh32_m256(i8* %y, i8* %x) { 1343; CHECK-LABEL: strh32_m256: 1344; CHECK: @ %bb.0: @ %entry 1345; CHECK-NEXT: vldrh.u32 q0, [r1] 1346; CHECK-NEXT: vstrh.32 q0, [r0] 1347; CHECK-NEXT: sub.w r0, r0, #256 1348; CHECK-NEXT: bx lr 1349entry: 1350 %z = getelementptr inbounds i8, i8* %y, i32 -256 1351 %0 = bitcast i8* %x to <4 x i16>* 1352 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1353 %2 = bitcast i8* %y to <4 x i16>* 1354 store <4 x i16> %1, <4 x i16>* %2, align 2 1355 ret i8* %z 1356} 1357 1358 1359define i8* @strh16_4(i8* %y, i8* %x) { 1360; CHECK-LE-LABEL: strh16_4: 1361; CHECK-LE: @ %bb.0: @ %entry 1362; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1363; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 1364; CHECK-LE-NEXT: bx lr 1365; 1366; CHECK-BE-LABEL: strh16_4: 1367; CHECK-BE: @ %bb.0: @ %entry 1368; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1369; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 1370; CHECK-BE-NEXT: bx lr 1371entry: 1372 %z = getelementptr inbounds i8, i8* %y, i32 4 1373 %0 = bitcast i8* %x to <8 x i16>* 1374 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1375 %2 = bitcast i8* %y to <8 x i16>* 1376 store <8 x i16> %1, <8 x i16>* %2, align 2 1377 ret i8* %z 1378} 1379 1380define i8* @strh16_3(i8* %y, i8* %x) { 1381; CHECK-LE-LABEL: strh16_3: 1382; CHECK-LE: @ %bb.0: @ %entry 1383; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1384; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 1385; CHECK-LE-NEXT: bx lr 1386; 1387; CHECK-BE-LABEL: strh16_3: 1388; CHECK-BE: @ %bb.0: @ %entry 1389; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1390; CHECK-BE-NEXT: vstrh.16 q0, [r0] 1391; CHECK-BE-NEXT: adds r0, #3 1392; CHECK-BE-NEXT: bx lr 1393entry: 1394 %z = getelementptr inbounds i8, i8* %y, i32 3 1395 %0 = bitcast i8* %x to <8 x i16>* 1396 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1397 %2 = bitcast i8* %y to <8 x i16>* 1398 store <8 x i16> %1, <8 x i16>* %2, align 2 1399 ret i8* %z 1400} 1401 1402define i8* @strh16_2(i8* %y, i8* %x) { 1403; CHECK-LE-LABEL: strh16_2: 1404; CHECK-LE: @ %bb.0: @ %entry 1405; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1406; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2 1407; CHECK-LE-NEXT: bx lr 1408; 1409; CHECK-BE-LABEL: strh16_2: 1410; CHECK-BE: @ %bb.0: @ %entry 1411; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1412; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2 1413; CHECK-BE-NEXT: bx lr 1414entry: 1415 %z = getelementptr inbounds i8, i8* %y, i32 2 1416 %0 = bitcast i8* %x to <8 x i16>* 1417 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1418 %2 = bitcast i8* %y to <8 x i16>* 1419 store <8 x i16> %1, <8 x i16>* %2, align 2 1420 ret i8* %z 1421} 1422 1423define i8* @strh16_254(i8* %y, i8* %x) { 1424; CHECK-LABEL: strh16_254: 1425; CHECK: @ %bb.0: @ %entry 1426; CHECK-NEXT: vldrh.u16 q0, [r1] 1427; CHECK-NEXT: vstrh.16 q0, [r0], #254 1428; CHECK-NEXT: bx lr 1429entry: 1430 %z = getelementptr inbounds i8, i8* %y, i32 254 1431 %0 = bitcast i8* %x to <8 x i16>* 1432 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1433 %2 = bitcast i8* %y to <8 x i16>* 1434 store <8 x i16> %1, <8 x i16>* %2, align 2 1435 ret i8* %z 1436} 1437 1438define i8* @strh16_256(i8* %y, i8* %x) { 1439; CHECK-LABEL: strh16_256: 1440; CHECK: @ %bb.0: @ %entry 1441; CHECK-NEXT: vldrh.u16 q0, [r1] 1442; CHECK-NEXT: vstrh.16 q0, [r0] 1443; CHECK-NEXT: add.w r0, r0, #256 1444; CHECK-NEXT: bx lr 1445entry: 1446 %z = getelementptr inbounds i8, i8* %y, i32 256 1447 %0 = bitcast i8* %x to <8 x i16>* 1448 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1449 %2 = bitcast i8* %y to <8 x i16>* 1450 store <8 x i16> %1, <8 x i16>* %2, align 2 1451 ret i8* %z 1452} 1453 1454define i8* @strh16_m254(i8* %y, i8* %x) { 1455; CHECK-LABEL: strh16_m254: 1456; CHECK: @ %bb.0: @ %entry 1457; CHECK-NEXT: vldrh.u16 q0, [r1] 1458; CHECK-NEXT: vstrh.16 q0, [r0], #-254 1459; CHECK-NEXT: bx lr 1460entry: 1461 %z = getelementptr inbounds i8, i8* %y, i32 -254 1462 %0 = bitcast i8* %x to <8 x i16>* 1463 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1464 %2 = bitcast i8* %y to <8 x i16>* 1465 store <8 x i16> %1, <8 x i16>* %2, align 2 1466 ret i8* %z 1467} 1468 1469define i8* @strh16_m256(i8* %y, i8* %x) { 1470; CHECK-LABEL: strh16_m256: 1471; CHECK: @ %bb.0: @ %entry 1472; CHECK-NEXT: vldrh.u16 q0, [r1] 1473; CHECK-NEXT: vstrh.16 q0, [r0] 1474; CHECK-NEXT: sub.w r0, r0, #256 1475; CHECK-NEXT: bx lr 1476entry: 1477 %z = getelementptr inbounds i8, i8* %y, i32 -256 1478 %0 = bitcast i8* %x to <8 x i16>* 1479 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1480 %2 = bitcast i8* %y to <8 x i16>* 1481 store <8 x i16> %1, <8 x i16>* %2, align 2 1482 ret i8* %z 1483} 1484 1485 1486define i8* @strb32_4(i8* %y, i8* %x) { 1487; CHECK-LABEL: strb32_4: 1488; CHECK: @ %bb.0: @ %entry 1489; CHECK-NEXT: vldrb.u32 q0, [r1] 1490; CHECK-NEXT: vstrb.32 q0, [r0], #4 1491; CHECK-NEXT: bx lr 1492entry: 1493 %z = getelementptr inbounds i8, i8* %y, i32 4 1494 %0 = bitcast i8* %x to <4 x i8>* 1495 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1496 %2 = bitcast i8* %y to <4 x i8>* 1497 store <4 x i8> %1, <4 x i8>* %2, align 1 1498 ret i8* %z 1499} 1500 1501define i8* @strb32_3(i8* %y, i8* %x) { 1502; CHECK-LABEL: strb32_3: 1503; CHECK: @ %bb.0: @ %entry 1504; CHECK-NEXT: vldrb.u32 q0, [r1] 1505; CHECK-NEXT: vstrb.32 q0, [r0], #3 1506; CHECK-NEXT: bx lr 1507entry: 1508 %z = getelementptr inbounds i8, i8* %y, i32 3 1509 %0 = bitcast i8* %x to <4 x i8>* 1510 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1511 %2 = bitcast i8* %y to <4 x i8>* 1512 store <4 x i8> %1, <4 x i8>* %2, align 1 1513 ret i8* %z 1514} 1515 1516define i8* @strb32_127(i8* %y, i8* %x) { 1517; CHECK-LABEL: strb32_127: 1518; CHECK: @ %bb.0: @ %entry 1519; CHECK-NEXT: vldrb.u32 q0, [r1] 1520; CHECK-NEXT: vstrb.32 q0, [r0], #127 1521; CHECK-NEXT: bx lr 1522entry: 1523 %z = getelementptr inbounds i8, i8* %y, i32 127 1524 %0 = bitcast i8* %x to <4 x i8>* 1525 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1526 %2 = bitcast i8* %y to <4 x i8>* 1527 store <4 x i8> %1, <4 x i8>* %2, align 1 1528 ret i8* %z 1529} 1530 1531define i8* @strb32_128(i8* %y, i8* %x) { 1532; CHECK-LABEL: strb32_128: 1533; CHECK: @ %bb.0: @ %entry 1534; CHECK-NEXT: vldrb.u32 q0, [r1] 1535; CHECK-NEXT: vstrb.32 q0, [r0] 1536; CHECK-NEXT: adds r0, #128 1537; CHECK-NEXT: bx lr 1538entry: 1539 %z = getelementptr inbounds i8, i8* %y, i32 128 1540 %0 = bitcast i8* %x to <4 x i8>* 1541 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1542 %2 = bitcast i8* %y to <4 x i8>* 1543 store <4 x i8> %1, <4 x i8>* %2, align 1 1544 ret i8* %z 1545} 1546 1547define i8* @strb32_m127(i8* %y, i8* %x) { 1548; CHECK-LABEL: strb32_m127: 1549; CHECK: @ %bb.0: @ %entry 1550; CHECK-NEXT: vldrb.u32 q0, [r1] 1551; CHECK-NEXT: vstrb.32 q0, [r0], #-127 1552; CHECK-NEXT: bx lr 1553entry: 1554 %z = getelementptr inbounds i8, i8* %y, i32 -127 1555 %0 = bitcast i8* %x to <4 x i8>* 1556 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1557 %2 = bitcast i8* %y to <4 x i8>* 1558 store <4 x i8> %1, <4 x i8>* %2, align 1 1559 ret i8* %z 1560} 1561 1562define i8* @strb32_m128(i8* %y, i8* %x) { 1563; CHECK-LABEL: strb32_m128: 1564; CHECK: @ %bb.0: @ %entry 1565; CHECK-NEXT: vldrb.u32 q0, [r1] 1566; CHECK-NEXT: vstrb.32 q0, [r0] 1567; CHECK-NEXT: subs r0, #128 1568; CHECK-NEXT: bx lr 1569entry: 1570 %z = getelementptr inbounds i8, i8* %y, i32 -128 1571 %0 = bitcast i8* %x to <4 x i8>* 1572 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1573 %2 = bitcast i8* %y to <4 x i8>* 1574 store <4 x i8> %1, <4 x i8>* %2, align 1 1575 ret i8* %z 1576} 1577 1578 1579define i8* @strb16_4(i8* %y, i8* %x) { 1580; CHECK-LABEL: strb16_4: 1581; CHECK: @ %bb.0: @ %entry 1582; CHECK-NEXT: vldrb.u16 q0, [r1] 1583; CHECK-NEXT: vstrb.16 q0, [r0], #4 1584; CHECK-NEXT: bx lr 1585entry: 1586 %z = getelementptr inbounds i8, i8* %y, i32 4 1587 %0 = bitcast i8* %x to <8 x i8>* 1588 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1589 %2 = bitcast i8* %y to <8 x i8>* 1590 store <8 x i8> %1, <8 x i8>* %2, align 1 1591 ret i8* %z 1592} 1593 1594define i8* @strb16_3(i8* %y, i8* %x) { 1595; CHECK-LABEL: strb16_3: 1596; CHECK: @ %bb.0: @ %entry 1597; CHECK-NEXT: vldrb.u16 q0, [r1] 1598; CHECK-NEXT: vstrb.16 q0, [r0], #3 1599; CHECK-NEXT: bx lr 1600entry: 1601 %z = getelementptr inbounds i8, i8* %y, i32 3 1602 %0 = bitcast i8* %x to <8 x i8>* 1603 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1604 %2 = bitcast i8* %y to <8 x i8>* 1605 store <8 x i8> %1, <8 x i8>* %2, align 1 1606 ret i8* %z 1607} 1608 1609define i8* @strb16_127(i8* %y, i8* %x) { 1610; CHECK-LABEL: strb16_127: 1611; CHECK: @ %bb.0: @ %entry 1612; CHECK-NEXT: vldrb.u16 q0, [r1] 1613; CHECK-NEXT: vstrb.16 q0, [r0], #127 1614; CHECK-NEXT: bx lr 1615entry: 1616 %z = getelementptr inbounds i8, i8* %y, i32 127 1617 %0 = bitcast i8* %x to <8 x i8>* 1618 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1619 %2 = bitcast i8* %y to <8 x i8>* 1620 store <8 x i8> %1, <8 x i8>* %2, align 1 1621 ret i8* %z 1622} 1623 1624define i8* @strb16_128(i8* %y, i8* %x) { 1625; CHECK-LABEL: strb16_128: 1626; CHECK: @ %bb.0: @ %entry 1627; CHECK-NEXT: vldrb.u16 q0, [r1] 1628; CHECK-NEXT: vstrb.16 q0, [r0] 1629; CHECK-NEXT: adds r0, #128 1630; CHECK-NEXT: bx lr 1631entry: 1632 %z = getelementptr inbounds i8, i8* %y, i32 128 1633 %0 = bitcast i8* %x to <8 x i8>* 1634 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1635 %2 = bitcast i8* %y to <8 x i8>* 1636 store <8 x i8> %1, <8 x i8>* %2, align 1 1637 ret i8* %z 1638} 1639 1640define i8* @strb16_m127(i8* %y, i8* %x) { 1641; CHECK-LABEL: strb16_m127: 1642; CHECK: @ %bb.0: @ %entry 1643; CHECK-NEXT: vldrb.u16 q0, [r1] 1644; CHECK-NEXT: vstrb.16 q0, [r0], #-127 1645; CHECK-NEXT: bx lr 1646entry: 1647 %z = getelementptr inbounds i8, i8* %y, i32 -127 1648 %0 = bitcast i8* %x to <8 x i8>* 1649 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1650 %2 = bitcast i8* %y to <8 x i8>* 1651 store <8 x i8> %1, <8 x i8>* %2, align 1 1652 ret i8* %z 1653} 1654 1655define i8* @strb16_m128(i8* %y, i8* %x) { 1656; CHECK-LABEL: strb16_m128: 1657; CHECK: @ %bb.0: @ %entry 1658; CHECK-NEXT: vldrb.u16 q0, [r1] 1659; CHECK-NEXT: vstrb.16 q0, [r0] 1660; CHECK-NEXT: subs r0, #128 1661; CHECK-NEXT: bx lr 1662entry: 1663 %z = getelementptr inbounds i8, i8* %y, i32 -128 1664 %0 = bitcast i8* %x to <8 x i8>* 1665 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1666 %2 = bitcast i8* %y to <8 x i8>* 1667 store <8 x i8> %1, <8 x i8>* %2, align 1 1668 ret i8* %z 1669} 1670 1671 1672define i8* @strb8_4(i8* %y, i8* %x) { 1673; CHECK-LABEL: strb8_4: 1674; CHECK: @ %bb.0: @ %entry 1675; CHECK-NEXT: vldrb.u8 q0, [r1] 1676; CHECK-NEXT: vstrb.8 q0, [r0], #4 1677; CHECK-NEXT: bx lr 1678entry: 1679 %z = getelementptr inbounds i8, i8* %y, i32 4 1680 %0 = bitcast i8* %x to <16 x i8>* 1681 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1682 %2 = bitcast i8* %y to <16 x i8>* 1683 store <16 x i8> %1, <16 x i8>* %2, align 1 1684 ret i8* %z 1685} 1686 1687define i8* @strb8_3(i8* %y, i8* %x) { 1688; CHECK-LABEL: strb8_3: 1689; CHECK: @ %bb.0: @ %entry 1690; CHECK-NEXT: vldrb.u8 q0, [r1] 1691; CHECK-NEXT: vstrb.8 q0, [r0], #3 1692; CHECK-NEXT: bx lr 1693entry: 1694 %z = getelementptr inbounds i8, i8* %y, i32 3 1695 %0 = bitcast i8* %x to <16 x i8>* 1696 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1697 %2 = bitcast i8* %y to <16 x i8>* 1698 store <16 x i8> %1, <16 x i8>* %2, align 1 1699 ret i8* %z 1700} 1701 1702define i8* @strb8_127(i8* %y, i8* %x) { 1703; CHECK-LABEL: strb8_127: 1704; CHECK: @ %bb.0: @ %entry 1705; CHECK-NEXT: vldrb.u8 q0, [r1] 1706; CHECK-NEXT: vstrb.8 q0, [r0], #127 1707; CHECK-NEXT: bx lr 1708entry: 1709 %z = getelementptr inbounds i8, i8* %y, i32 127 1710 %0 = bitcast i8* %x to <16 x i8>* 1711 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1712 %2 = bitcast i8* %y to <16 x i8>* 1713 store <16 x i8> %1, <16 x i8>* %2, align 1 1714 ret i8* %z 1715} 1716 1717define i8* @strb8_128(i8* %y, i8* %x) { 1718; CHECK-LABEL: strb8_128: 1719; CHECK: @ %bb.0: @ %entry 1720; CHECK-NEXT: vldrb.u8 q0, [r1] 1721; CHECK-NEXT: vstrb.8 q0, [r0] 1722; CHECK-NEXT: adds r0, #128 1723; CHECK-NEXT: bx lr 1724entry: 1725 %z = getelementptr inbounds i8, i8* %y, i32 128 1726 %0 = bitcast i8* %x to <16 x i8>* 1727 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1728 %2 = bitcast i8* %y to <16 x i8>* 1729 store <16 x i8> %1, <16 x i8>* %2, align 1 1730 ret i8* %z 1731} 1732 1733define i8* @strb8_m127(i8* %y, i8* %x) { 1734; CHECK-LABEL: strb8_m127: 1735; CHECK: @ %bb.0: @ %entry 1736; CHECK-NEXT: vldrb.u8 q0, [r1] 1737; CHECK-NEXT: vstrb.8 q0, [r0], #-127 1738; CHECK-NEXT: bx lr 1739entry: 1740 %z = getelementptr inbounds i8, i8* %y, i32 -127 1741 %0 = bitcast i8* %x to <16 x i8>* 1742 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1743 %2 = bitcast i8* %y to <16 x i8>* 1744 store <16 x i8> %1, <16 x i8>* %2, align 1 1745 ret i8* %z 1746} 1747 1748define i8* @strb8_m128(i8* %y, i8* %x) { 1749; CHECK-LABEL: strb8_m128: 1750; CHECK: @ %bb.0: @ %entry 1751; CHECK-NEXT: vldrb.u8 q0, [r1] 1752; CHECK-NEXT: vstrb.8 q0, [r0] 1753; CHECK-NEXT: subs r0, #128 1754; CHECK-NEXT: bx lr 1755entry: 1756 %z = getelementptr inbounds i8, i8* %y, i32 -128 1757 %0 = bitcast i8* %x to <16 x i8>* 1758 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1759 %2 = bitcast i8* %y to <16 x i8>* 1760 store <16 x i8> %1, <16 x i8>* %2, align 1 1761 ret i8* %z 1762} 1763 1764 1765define i8* @strf32_4(i8* %y, i8* %x) { 1766; CHECK-LE-LABEL: strf32_4: 1767; CHECK-LE: @ %bb.0: @ %entry 1768; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1769; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 1770; CHECK-LE-NEXT: bx lr 1771; 1772; CHECK-BE-LABEL: strf32_4: 1773; CHECK-BE: @ %bb.0: @ %entry 1774; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1775; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 1776; CHECK-BE-NEXT: bx lr 1777entry: 1778 %z = getelementptr inbounds i8, i8* %y, i32 4 1779 %0 = bitcast i8* %x to <4 x float>* 1780 %1 = load <4 x float>, <4 x float>* %0, align 4 1781 %2 = bitcast i8* %y to <4 x float>* 1782 store <4 x float> %1, <4 x float>* %2, align 4 1783 ret i8* %z 1784} 1785 1786define i8* @strf16_4(i8* %y, i8* %x) { 1787; CHECK-LE-LABEL: strf16_4: 1788; CHECK-LE: @ %bb.0: @ %entry 1789; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1790; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 1791; CHECK-LE-NEXT: bx lr 1792; 1793; CHECK-BE-LABEL: strf16_4: 1794; CHECK-BE: @ %bb.0: @ %entry 1795; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1796; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 1797; CHECK-BE-NEXT: bx lr 1798entry: 1799 %z = getelementptr inbounds i8, i8* %y, i32 4 1800 %0 = bitcast i8* %x to <8 x half>* 1801 %1 = load <8 x half>, <8 x half>* %0, align 2 1802 %2 = bitcast i8* %y to <8 x half>* 1803 store <8 x half> %1, <8 x half>* %2, align 2 1804 ret i8* %z 1805} 1806 1807define i8* @strwi32_align1(i8* %y, i8* %x) { 1808; CHECK-LE-LABEL: strwi32_align1: 1809; CHECK-LE: @ %bb.0: @ %entry 1810; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1811; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 1812; CHECK-LE-NEXT: bx lr 1813; 1814; CHECK-BE-LABEL: strwi32_align1: 1815; CHECK-BE: @ %bb.0: @ %entry 1816; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1817; CHECK-BE-NEXT: vrev32.8 q0, q0 1818; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3 1819; CHECK-BE-NEXT: bx lr 1820entry: 1821 %z = getelementptr inbounds i8, i8* %y, i32 3 1822 %0 = bitcast i8* %x to <4 x i32>* 1823 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1824 %2 = bitcast i8* %y to <4 x i32>* 1825 store <4 x i32> %1, <4 x i32>* %2, align 1 1826 ret i8* %z 1827} 1828 1829define i8* @strhi16_align1(i8* %y, i8* %x) { 1830; CHECK-LE-LABEL: strhi16_align1: 1831; CHECK-LE: @ %bb.0: @ %entry 1832; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1833; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 1834; CHECK-LE-NEXT: bx lr 1835; 1836; CHECK-BE-LABEL: strhi16_align1: 1837; CHECK-BE: @ %bb.0: @ %entry 1838; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1839; CHECK-BE-NEXT: vrev16.8 q0, q0 1840; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3 1841; CHECK-BE-NEXT: bx lr 1842entry: 1843 %z = getelementptr inbounds i8, i8* %y, i32 3 1844 %0 = bitcast i8* %x to <8 x i16>* 1845 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1846 %2 = bitcast i8* %y to <8 x i16>* 1847 store <8 x i16> %1, <8 x i16>* %2, align 1 1848 ret i8* %z 1849} 1850 1851define i8* @strhi32_align1(i8* %y, i8* %x) { 1852; CHECK-LABEL: strhi32_align1: 1853; CHECK: @ %bb.0: @ %entry 1854; CHECK-NEXT: .pad #8 1855; CHECK-NEXT: sub sp, #8 1856; CHECK-NEXT: vldrw.u32 q0, [r1] 1857; CHECK-NEXT: mov r1, sp 1858; CHECK-NEXT: vstrh.32 q0, [r1] 1859; CHECK-NEXT: ldrd r1, r2, [sp] 1860; CHECK-NEXT: str r1, [r0] 1861; CHECK-NEXT: str r2, [r0, #4] 1862; CHECK-NEXT: adds r0, #3 1863; CHECK-NEXT: add sp, #8 1864; CHECK-NEXT: bx lr 1865entry: 1866 %z = getelementptr inbounds i8, i8* %y, i32 3 1867 %0 = bitcast i8* %x to <4 x i32>* 1868 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1869 %2 = bitcast i8* %y to <4 x i16>* 1870 %3 = trunc <4 x i32> %1 to <4 x i16> 1871 store <4 x i16> %3, <4 x i16>* %2, align 1 1872 ret i8* %z 1873} 1874 1875define i8* @strf32_align1(i8* %y, i8* %x) { 1876; CHECK-LE-LABEL: strf32_align1: 1877; CHECK-LE: @ %bb.0: @ %entry 1878; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1879; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 1880; CHECK-LE-NEXT: bx lr 1881; 1882; CHECK-BE-LABEL: strf32_align1: 1883; CHECK-BE: @ %bb.0: @ %entry 1884; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1885; CHECK-BE-NEXT: vrev32.8 q0, q0 1886; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3 1887; CHECK-BE-NEXT: bx lr 1888entry: 1889 %z = getelementptr inbounds i8, i8* %y, i32 3 1890 %0 = bitcast i8* %x to <4 x float>* 1891 %1 = load <4 x float>, <4 x float>* %0, align 4 1892 %2 = bitcast i8* %y to <4 x float>* 1893 store <4 x float> %1, <4 x float>* %2, align 1 1894 ret i8* %z 1895} 1896 1897define i8* @strf16_align1(i8* %y, i8* %x) { 1898; CHECK-LE-LABEL: strf16_align1: 1899; CHECK-LE: @ %bb.0: @ %entry 1900; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1901; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 1902; CHECK-LE-NEXT: bx lr 1903; 1904; CHECK-BE-LABEL: strf16_align1: 1905; CHECK-BE: @ %bb.0: @ %entry 1906; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1907; CHECK-BE-NEXT: vrev16.8 q0, q0 1908; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3 1909; CHECK-BE-NEXT: bx lr 1910entry: 1911 %z = getelementptr inbounds i8, i8* %y, i32 3 1912 %0 = bitcast i8* %x to <8 x half>* 1913 %1 = load <8 x half>, <8 x half>* %0, align 2 1914 %2 = bitcast i8* %y to <8 x half>* 1915 store <8 x half> %1, <8 x half>* %2, align 1 1916 ret i8* %z 1917} 1918 1919define i8* @strf16_align8(i8* %y, i8* %x) { 1920; CHECK-LE-LABEL: strf16_align8: 1921; CHECK-LE: @ %bb.0: @ %entry 1922; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1923; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16 1924; CHECK-LE-NEXT: bx lr 1925; 1926; CHECK-BE-LABEL: strf16_align8: 1927; CHECK-BE: @ %bb.0: @ %entry 1928; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1929; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16 1930; CHECK-BE-NEXT: bx lr 1931entry: 1932 %z = getelementptr inbounds i8, i8* %y, i32 16 1933 %0 = bitcast i8* %x to <8 x i16>* 1934 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1935 %2 = bitcast i8* %y to <8 x i16>* 1936 store <8 x i16> %1, <8 x i16>* %2, align 8 1937 ret i8* %z 1938} 1939