1; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTRICTALIGN %s 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+strict-align -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=STRICTALIGN %s 3 4; This file contains tests for the AArch64 load/store optimizer. 5 6%padding = type { i8*, i8*, i8*, i8* } 7%s.byte = type { i8, i8 } 8%s.halfword = type { i16, i16 } 9%s.word = type { i32, i32 } 10%s.doubleword = type { i64, i32 } 11%s.quadword = type { fp128, i32 } 12%s.float = type { float, i32 } 13%s.double = type { double, i32 } 14%struct.byte = type { %padding, %s.byte } 15%struct.halfword = type { %padding, %s.halfword } 16%struct.word = type { %padding, %s.word } 17%struct.doubleword = type { %padding, %s.doubleword } 18%struct.quadword = type { %padding, %s.quadword } 19%struct.float = type { %padding, %s.float } 20%struct.double = type { %padding, %s.double } 21 22; Check the following transform: 23; 24; (ldr|str) X, [x0, #32] 25; ... 26; add x0, x0, #32 27; -> 28; (ldr|str) X, [x0, #32]! 29; 30; with X being either w1, x1, s0, d0 or q0. 31 32declare void @bar_byte(%s.byte*, i8) 33 34define void @load-pre-indexed-byte(%struct.byte* %ptr) nounwind { 35; CHECK-LABEL: load-pre-indexed-byte 36; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 37entry: 38 %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0 39 %add = load i8, i8* %a, align 4 40 br label %bar 41bar: 42 %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1 43 tail call void @bar_byte(%s.byte* %c, i8 %add) 44 ret void 45} 46 47define void @store-pre-indexed-byte(%struct.byte* %ptr, i8 %val) nounwind { 48; CHECK-LABEL: store-pre-indexed-byte 49; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 50entry: 51 %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0 52 store i8 %val, i8* %a, align 4 53 br label %bar 54bar: 55 %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1 56 tail call void @bar_byte(%s.byte* %c, i8 %val) 57 ret void 58} 59 60declare void @bar_halfword(%s.halfword*, i16) 61 62define void @load-pre-indexed-halfword(%struct.halfword* %ptr) nounwind { 63; CHECK-LABEL: load-pre-indexed-halfword 64; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 65entry: 66 %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0 67 %add = load i16, i16* %a, align 4 68 br label %bar 69bar: 70 %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1 71 tail call void @bar_halfword(%s.halfword* %c, i16 %add) 72 ret void 73} 74 75define void @store-pre-indexed-halfword(%struct.halfword* %ptr, i16 %val) nounwind { 76; CHECK-LABEL: store-pre-indexed-halfword 77; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 78entry: 79 %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0 80 store i16 %val, i16* %a, align 4 81 br label %bar 82bar: 83 %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1 84 tail call void @bar_halfword(%s.halfword* %c, i16 %val) 85 ret void 86} 87 88declare void @bar_word(%s.word*, i32) 89 90define void @load-pre-indexed-word(%struct.word* %ptr) nounwind { 91; CHECK-LABEL: load-pre-indexed-word 92; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 93entry: 94 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 95 %add = load i32, i32* %a, align 4 96 br label %bar 97bar: 98 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 99 tail call void @bar_word(%s.word* %c, i32 %add) 100 ret void 101} 102 103define void @store-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind { 104; CHECK-LABEL: store-pre-indexed-word 105; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 106entry: 107 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 108 store i32 %val, i32* %a, align 4 109 br label %bar 110bar: 111 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 112 tail call void @bar_word(%s.word* %c, i32 %val) 113 ret void 114} 115 116declare void @bar_doubleword(%s.doubleword*, i64) 117 118define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind { 119; CHECK-LABEL: load-pre-indexed-doubleword 120; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]! 121entry: 122 %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0 123 %add = load i64, i64* %a, align 8 124 br label %bar 125bar: 126 %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1 127 tail call void @bar_doubleword(%s.doubleword* %c, i64 %add) 128 ret void 129} 130 131define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) nounwind { 132; CHECK-LABEL: store-pre-indexed-doubleword 133; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]! 134entry: 135 %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0 136 store i64 %val, i64* %a, align 8 137 br label %bar 138bar: 139 %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1 140 tail call void @bar_doubleword(%s.doubleword* %c, i64 %val) 141 ret void 142} 143 144declare void @bar_quadword(%s.quadword*, fp128) 145 146define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind { 147; CHECK-LABEL: load-pre-indexed-quadword 148; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 149entry: 150 %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0 151 %add = load fp128, fp128* %a, align 16 152 br label %bar 153bar: 154 %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1 155 tail call void @bar_quadword(%s.quadword* %c, fp128 %add) 156 ret void 157} 158 159define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) nounwind { 160; CHECK-LABEL: store-pre-indexed-quadword 161; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 162entry: 163 %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0 164 store fp128 %val, fp128* %a, align 16 165 br label %bar 166bar: 167 %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1 168 tail call void @bar_quadword(%s.quadword* %c, fp128 %val) 169 ret void 170} 171 172declare void @bar_float(%s.float*, float) 173 174define void @load-pre-indexed-float(%struct.float* %ptr) nounwind { 175; CHECK-LABEL: load-pre-indexed-float 176; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]! 177entry: 178 %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0 179 %add = load float, float* %a, align 4 180 br label %bar 181bar: 182 %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1 183 tail call void @bar_float(%s.float* %c, float %add) 184 ret void 185} 186 187define void @store-pre-indexed-float(%struct.float* %ptr, float %val) nounwind { 188; CHECK-LABEL: store-pre-indexed-float 189; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #32]! 190entry: 191 %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0 192 store float %val, float* %a, align 4 193 br label %bar 194bar: 195 %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1 196 tail call void @bar_float(%s.float* %c, float %val) 197 ret void 198} 199 200declare void @bar_double(%s.double*, double) 201 202define void @load-pre-indexed-double(%struct.double* %ptr) nounwind { 203; CHECK-LABEL: load-pre-indexed-double 204; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]! 205entry: 206 %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0 207 %add = load double, double* %a, align 8 208 br label %bar 209bar: 210 %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1 211 tail call void @bar_double(%s.double* %c, double %add) 212 ret void 213} 214 215define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwind { 216; CHECK-LABEL: store-pre-indexed-double 217; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]! 218entry: 219 %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0 220 store double %val, double* %a, align 8 221 br label %bar 222bar: 223 %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1 224 tail call void @bar_double(%s.double* %c, double %val) 225 ret void 226} 227 228; Check the following transform: 229; 230; (ldp|stp) w1, w2 [x0, #32] 231; ... 232; add x0, x0, #32 233; -> 234; (ldp|stp) w1, w2, [x0, #32]! 235; 236 237define void @load-pair-pre-indexed-word(%struct.word* %ptr) nounwind { 238; CHECK-LABEL: load-pair-pre-indexed-word 239; CHECK: ldp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]! 240; CHECK-NOT: add x0, x0, #32 241entry: 242 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 243 %a1 = load i32, i32* %a, align 4 244 %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1 245 %b1 = load i32, i32* %b, align 4 246 %add = add i32 %a1, %b1 247 br label %bar 248bar: 249 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 250 tail call void @bar_word(%s.word* %c, i32 %add) 251 ret void 252} 253 254define void @store-pair-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind { 255; CHECK-LABEL: store-pair-pre-indexed-word 256; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]! 257; CHECK-NOT: add x0, x0, #32 258entry: 259 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 260 store i32 %val, i32* %a, align 4 261 %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1 262 store i32 %val, i32* %b, align 4 263 br label %bar 264bar: 265 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 266 tail call void @bar_word(%s.word* %c, i32 %val) 267 ret void 268} 269 270; Check the following transform: 271; 272; add x8, x8, #16 273; ... 274; ldr X, [x8] 275; -> 276; ldr X, [x8, #16]! 277; 278; with X being either w0, x0, s0, d0 or q0. 279 280%pre.struct.i32 = type { i32, i32, i32, i32, i32} 281%pre.struct.i64 = type { i32, i64, i64, i64, i64} 282%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>} 283%pre.struct.float = type { i32, float, float, float} 284%pre.struct.double = type { i32, double, double, double} 285 286define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, 287 %pre.struct.i32* %load2) nounwind { 288; CHECK-LABEL: load-pre-indexed-word2 289; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]! 290 br i1 %cond, label %if.then, label %if.end 291if.then: 292 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 293 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1 294 br label %return 295if.end: 296 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2 297 br label %return 298return: 299 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 300 %ret = load i32, i32* %retptr 301 ret i32 %ret 302} 303 304define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond, 305 %pre.struct.i64* %load2) nounwind { 306; CHECK-LABEL: load-pre-indexed-doubleword2 307; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]! 308 br i1 %cond, label %if.then, label %if.end 309if.then: 310 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 311 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1 312 br label %return 313if.end: 314 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2 315 br label %return 316return: 317 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 318 %ret = load i64, i64* %retptr 319 ret i64 %ret 320} 321 322define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond, 323 %pre.struct.i128* %load2) nounwind { 324; CHECK-LABEL: load-pre-indexed-quadword2 325; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]! 326 br i1 %cond, label %if.then, label %if.end 327if.then: 328 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 329 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1 330 br label %return 331if.end: 332 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2 333 br label %return 334return: 335 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 336 %ret = load <2 x i64>, <2 x i64>* %retptr 337 ret <2 x i64> %ret 338} 339 340define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond, 341 %pre.struct.float* %load2) nounwind { 342; CHECK-LABEL: load-pre-indexed-float2 343; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]! 344 br i1 %cond, label %if.then, label %if.end 345if.then: 346 %load1 = load %pre.struct.float*, %pre.struct.float** %this 347 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1 348 br label %return 349if.end: 350 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2 351 br label %return 352return: 353 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 354 %ret = load float, float* %retptr 355 ret float %ret 356} 357 358define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond, 359 %pre.struct.double* %load2) nounwind { 360; CHECK-LABEL: load-pre-indexed-double2 361; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]! 362 br i1 %cond, label %if.then, label %if.end 363if.then: 364 %load1 = load %pre.struct.double*, %pre.struct.double** %this 365 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1 366 br label %return 367if.end: 368 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2 369 br label %return 370return: 371 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 372 %ret = load double, double* %retptr 373 ret double %ret 374} 375 376define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, 377 %pre.struct.i32* %load2) nounwind { 378; CHECK-LABEL: load-pre-indexed-word3 379; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]! 380 br i1 %cond, label %if.then, label %if.end 381if.then: 382 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 383 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 384 br label %return 385if.end: 386 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 387 br label %return 388return: 389 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 390 %ret = load i32, i32* %retptr 391 ret i32 %ret 392} 393 394define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, 395 %pre.struct.i64* %load2) nounwind { 396; CHECK-LABEL: load-pre-indexed-doubleword3 397; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]! 398 br i1 %cond, label %if.then, label %if.end 399if.then: 400 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 401 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2 402 br label %return 403if.end: 404 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3 405 br label %return 406return: 407 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 408 %ret = load i64, i64* %retptr 409 ret i64 %ret 410} 411 412define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, 413 %pre.struct.i128* %load2) nounwind { 414; CHECK-LABEL: load-pre-indexed-quadword3 415; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 416 br i1 %cond, label %if.then, label %if.end 417if.then: 418 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 419 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 420 br label %return 421if.end: 422 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 423 br label %return 424return: 425 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 426 %ret = load <2 x i64>, <2 x i64>* %retptr 427 ret <2 x i64> %ret 428} 429 430define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, 431 %pre.struct.float* %load2) nounwind { 432; CHECK-LABEL: load-pre-indexed-float3 433; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]! 434 br i1 %cond, label %if.then, label %if.end 435if.then: 436 %load1 = load %pre.struct.float*, %pre.struct.float** %this 437 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 438 br label %return 439if.end: 440 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 441 br label %return 442return: 443 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 444 %ret = load float, float* %retptr 445 ret float %ret 446} 447 448define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, 449 %pre.struct.double* %load2) nounwind { 450; CHECK-LABEL: load-pre-indexed-double3 451; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]! 452 br i1 %cond, label %if.then, label %if.end 453if.then: 454 %load1 = load %pre.struct.double*, %pre.struct.double** %this 455 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 456 br label %return 457if.end: 458 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 459 br label %return 460return: 461 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 462 %ret = load double, double* %retptr 463 ret double %ret 464} 465 466; Check the following transform: 467; 468; add x8, x8, #16 469; ... 470; str X, [x8] 471; -> 472; str X, [x8, #16]! 473; 474; with X being either w0, x0, s0, d0 or q0. 475 476define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, 477 %pre.struct.i32* %load2, 478 i32 %val) nounwind { 479; CHECK-LABEL: store-pre-indexed-word2 480; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]! 481 br i1 %cond, label %if.then, label %if.end 482if.then: 483 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 484 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1 485 br label %return 486if.end: 487 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2 488 br label %return 489return: 490 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 491 store i32 %val, i32* %retptr 492 ret void 493} 494 495define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond, 496 %pre.struct.i64* %load2, 497 i64 %val) nounwind { 498; CHECK-LABEL: store-pre-indexed-doubleword2 499; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]! 500 br i1 %cond, label %if.then, label %if.end 501if.then: 502 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 503 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1 504 br label %return 505if.end: 506 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2 507 br label %return 508return: 509 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 510 store i64 %val, i64* %retptr 511 ret void 512} 513 514define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond, 515 %pre.struct.i128* %load2, 516 <2 x i64> %val) nounwind { 517; CHECK-LABEL: store-pre-indexed-quadword2 518; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]! 519 br i1 %cond, label %if.then, label %if.end 520if.then: 521 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 522 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1 523 br label %return 524if.end: 525 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2 526 br label %return 527return: 528 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 529 store <2 x i64> %val, <2 x i64>* %retptr 530 ret void 531} 532 533define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond, 534 %pre.struct.float* %load2, 535 float %val) nounwind { 536; CHECK-LABEL: store-pre-indexed-float2 537; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]! 538 br i1 %cond, label %if.then, label %if.end 539if.then: 540 %load1 = load %pre.struct.float*, %pre.struct.float** %this 541 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1 542 br label %return 543if.end: 544 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2 545 br label %return 546return: 547 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 548 store float %val, float* %retptr 549 ret void 550} 551 552define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond, 553 %pre.struct.double* %load2, 554 double %val) nounwind { 555; CHECK-LABEL: store-pre-indexed-double2 556; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]! 557 br i1 %cond, label %if.then, label %if.end 558if.then: 559 %load1 = load %pre.struct.double*, %pre.struct.double** %this 560 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1 561 br label %return 562if.end: 563 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2 564 br label %return 565return: 566 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 567 store double %val, double* %retptr 568 ret void 569} 570 571define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, 572 %pre.struct.i32* %load2, 573 i32 %val) nounwind { 574; CHECK-LABEL: store-pre-indexed-word3 575; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]! 576 br i1 %cond, label %if.then, label %if.end 577if.then: 578 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 579 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 580 br label %return 581if.end: 582 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 583 br label %return 584return: 585 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 586 store i32 %val, i32* %retptr 587 ret void 588} 589 590define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, 591 %pre.struct.i64* %load2, 592 i64 %val) nounwind { 593; CHECK-LABEL: store-pre-indexed-doubleword3 594; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]! 595 br i1 %cond, label %if.then, label %if.end 596if.then: 597 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 598 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3 599 br label %return 600if.end: 601 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4 602 br label %return 603return: 604 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 605 store i64 %val, i64* %retptr 606 ret void 607} 608 609define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, 610 %pre.struct.i128* %load2, 611 <2 x i64> %val) nounwind { 612; CHECK-LABEL: store-pre-indexed-quadword3 613; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 614 br i1 %cond, label %if.then, label %if.end 615if.then: 616 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 617 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 618 br label %return 619if.end: 620 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 621 br label %return 622return: 623 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 624 store <2 x i64> %val, <2 x i64>* %retptr 625 ret void 626} 627 628define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, 629 %pre.struct.float* %load2, 630 float %val) nounwind { 631; CHECK-LABEL: store-pre-indexed-float3 632; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]! 633 br i1 %cond, label %if.then, label %if.end 634if.then: 635 %load1 = load %pre.struct.float*, %pre.struct.float** %this 636 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 637 br label %return 638if.end: 639 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 640 br label %return 641return: 642 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 643 store float %val, float* %retptr 644 ret void 645} 646 647define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, 648 %pre.struct.double* %load2, 649 double %val) nounwind { 650; CHECK-LABEL: store-pre-indexed-double3 651; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]! 652 br i1 %cond, label %if.then, label %if.end 653if.then: 654 %load1 = load %pre.struct.double*, %pre.struct.double** %this 655 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 656 br label %return 657if.end: 658 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 659 br label %return 660return: 661 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 662 store double %val, double* %retptr 663 ret void 664} 665 666; Check the following transform: 667; 668; ldr X, [x20] 669; ... 670; add x20, x20, #32 671; -> 672; ldr X, [x20], #32 673; 674; with X being either w0, x0, s0, d0 or q0. 675 676define void @load-post-indexed-byte(i8* %array, i64 %count) nounwind { 677; CHECK-LABEL: load-post-indexed-byte 678; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}], #4 679entry: 680 %gep1 = getelementptr i8, i8* %array, i64 2 681 br label %body 682 683body: 684 %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ] 685 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 686 %gep2 = getelementptr i8, i8* %iv2, i64 -1 687 %load = load i8, i8* %gep2 688 call void @use-byte(i8 %load) 689 %load2 = load i8, i8* %iv2 690 call void @use-byte(i8 %load2) 691 %iv.next = add i64 %iv, -4 692 %gep3 = getelementptr i8, i8* %iv2, i64 4 693 %cond = icmp eq i64 %iv.next, 0 694 br i1 %cond, label %exit, label %body 695 696exit: 697 ret void 698} 699 700define void @load-post-indexed-halfword(i16* %array, i64 %count) nounwind { 701; CHECK-LABEL: load-post-indexed-halfword 702; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}], #8 703entry: 704 %gep1 = getelementptr i16, i16* %array, i64 2 705 br label %body 706 707body: 708 %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ] 709 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 710 %gep2 = getelementptr i16, i16* %iv2, i64 -1 711 %load = load i16, i16* %gep2 712 call void @use-halfword(i16 %load) 713 %load2 = load i16, i16* %iv2 714 call void @use-halfword(i16 %load2) 715 %iv.next = add i64 %iv, -4 716 %gep3 = getelementptr i16, i16* %iv2, i64 4 717 %cond = icmp eq i64 %iv.next, 0 718 br i1 %cond, label %exit, label %body 719 720exit: 721 ret void 722} 723 724define void @load-post-indexed-word(i32* %array, i64 %count) nounwind { 725; CHECK-LABEL: load-post-indexed-word 726; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16 727entry: 728 %gep1 = getelementptr i32, i32* %array, i64 2 729 br label %body 730 731body: 732 %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ] 733 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 734 %gep2 = getelementptr i32, i32* %iv2, i64 -1 735 %load = load i32, i32* %gep2 736 call void @use-word(i32 %load) 737 %load2 = load i32, i32* %iv2 738 call void @use-word(i32 %load2) 739 %iv.next = add i64 %iv, -4 740 %gep3 = getelementptr i32, i32* %iv2, i64 4 741 %cond = icmp eq i64 %iv.next, 0 742 br i1 %cond, label %exit, label %body 743 744exit: 745 ret void 746} 747 748define void @load-post-indexed-doubleword(i64* %array, i64 %count) nounwind { 749; CHECK-LABEL: load-post-indexed-doubleword 750; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #32 751entry: 752 %gep1 = getelementptr i64, i64* %array, i64 2 753 br label %body 754 755body: 756 %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ] 757 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 758 %gep2 = getelementptr i64, i64* %iv2, i64 -1 759 %load = load i64, i64* %gep2 760 call void @use-doubleword(i64 %load) 761 %load2 = load i64, i64* %iv2 762 call void @use-doubleword(i64 %load2) 763 %iv.next = add i64 %iv, -4 764 %gep3 = getelementptr i64, i64* %iv2, i64 4 765 %cond = icmp eq i64 %iv.next, 0 766 br i1 %cond, label %exit, label %body 767 768exit: 769 ret void 770} 771 772define void @load-post-indexed-quadword(<2 x i64>* %array, i64 %count) nounwind { 773; CHECK-LABEL: load-post-indexed-quadword 774; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #64 775entry: 776 %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2 777 br label %body 778 779body: 780 %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ] 781 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 782 %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1 783 %load = load <2 x i64>, <2 x i64>* %gep2 784 call void @use-quadword(<2 x i64> %load) 785 %load2 = load <2 x i64>, <2 x i64>* %iv2 786 call void @use-quadword(<2 x i64> %load2) 787 %iv.next = add i64 %iv, -4 788 %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4 789 %cond = icmp eq i64 %iv.next, 0 790 br i1 %cond, label %exit, label %body 791 792exit: 793 ret void 794} 795 796define void @load-post-indexed-float(float* %array, i64 %count) nounwind { 797; CHECK-LABEL: load-post-indexed-float 798; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #16 799entry: 800 %gep1 = getelementptr float, float* %array, i64 2 801 br label %body 802 803body: 804 %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ] 805 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 806 %gep2 = getelementptr float, float* %iv2, i64 -1 807 %load = load float, float* %gep2 808 call void @use-float(float %load) 809 %load2 = load float, float* %iv2 810 call void @use-float(float %load2) 811 %iv.next = add i64 %iv, -4 812 %gep3 = getelementptr float, float* %iv2, i64 4 813 %cond = icmp eq i64 %iv.next, 0 814 br i1 %cond, label %exit, label %body 815 816exit: 817 ret void 818} 819 820define void @load-post-indexed-double(double* %array, i64 %count) nounwind { 821; CHECK-LABEL: load-post-indexed-double 822; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #32 823entry: 824 %gep1 = getelementptr double, double* %array, i64 2 825 br label %body 826 827body: 828 %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ] 829 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 830 %gep2 = getelementptr double, double* %iv2, i64 -1 831 %load = load double, double* %gep2 832 call void @use-double(double %load) 833 %load2 = load double, double* %iv2 834 call void @use-double(double %load2) 835 %iv.next = add i64 %iv, -4 836 %gep3 = getelementptr double, double* %iv2, i64 4 837 %cond = icmp eq i64 %iv.next, 0 838 br i1 %cond, label %exit, label %body 839 840exit: 841 ret void 842} 843 844; Check the following transform: 845; 846; str X, [x20] 847; ... 848; add x20, x20, #32 849; -> 850; str X, [x20], #32 851; 852; with X being either w0, x0, s0, d0 or q0. 853 854define void @store-post-indexed-byte(i8* %array, i64 %count, i8 %val) nounwind { 855; CHECK-LABEL: store-post-indexed-byte 856; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}], #4 857entry: 858 %gep1 = getelementptr i8, i8* %array, i64 2 859 br label %body 860 861body: 862 %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ] 863 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 864 %gep2 = getelementptr i8, i8* %iv2, i64 -1 865 %load = load i8, i8* %gep2 866 call void @use-byte(i8 %load) 867 store i8 %val, i8* %iv2 868 %iv.next = add i64 %iv, -4 869 %gep3 = getelementptr i8, i8* %iv2, i64 4 870 %cond = icmp eq i64 %iv.next, 0 871 br i1 %cond, label %exit, label %body 872 873exit: 874 ret void 875} 876 877define void @store-post-indexed-halfword(i16* %array, i64 %count, i16 %val) nounwind { 878; CHECK-LABEL: store-post-indexed-halfword 879; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}], #8 880entry: 881 %gep1 = getelementptr i16, i16* %array, i64 2 882 br label %body 883 884body: 885 %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ] 886 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 887 %gep2 = getelementptr i16, i16* %iv2, i64 -1 888 %load = load i16, i16* %gep2 889 call void @use-halfword(i16 %load) 890 store i16 %val, i16* %iv2 891 %iv.next = add i64 %iv, -4 892 %gep3 = getelementptr i16, i16* %iv2, i64 4 893 %cond = icmp eq i64 %iv.next, 0 894 br i1 %cond, label %exit, label %body 895 896exit: 897 ret void 898} 899 900define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind { 901; CHECK-LABEL: store-post-indexed-word 902; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16 903entry: 904 %gep1 = getelementptr i32, i32* %array, i64 2 905 br label %body 906 907body: 908 %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ] 909 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 910 %gep2 = getelementptr i32, i32* %iv2, i64 -1 911 %load = load i32, i32* %gep2 912 call void @use-word(i32 %load) 913 store i32 %val, i32* %iv2 914 %iv.next = add i64 %iv, -4 915 %gep3 = getelementptr i32, i32* %iv2, i64 4 916 %cond = icmp eq i64 %iv.next, 0 917 br i1 %cond, label %exit, label %body 918 919exit: 920 ret void 921} 922 923define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind { 924; CHECK-LABEL: store-post-indexed-doubleword 925; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32 926entry: 927 %gep1 = getelementptr i64, i64* %array, i64 2 928 br label %body 929 930body: 931 %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ] 932 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 933 %gep2 = getelementptr i64, i64* %iv2, i64 -1 934 %load = load i64, i64* %gep2 935 call void @use-doubleword(i64 %load) 936 store i64 %val, i64* %iv2 937 %iv.next = add i64 %iv, -4 938 %gep3 = getelementptr i64, i64* %iv2, i64 4 939 %cond = icmp eq i64 %iv.next, 0 940 br i1 %cond, label %exit, label %body 941 942exit: 943 ret void 944} 945 946define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind { 947; CHECK-LABEL: store-post-indexed-quadword 948; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64 949entry: 950 %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2 951 br label %body 952 953body: 954 %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ] 955 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 956 %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1 957 %load = load <2 x i64>, <2 x i64>* %gep2 958 call void @use-quadword(<2 x i64> %load) 959 store <2 x i64> %val, <2 x i64>* %iv2 960 %iv.next = add i64 %iv, -4 961 %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4 962 %cond = icmp eq i64 %iv.next, 0 963 br i1 %cond, label %exit, label %body 964 965exit: 966 ret void 967} 968 969define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind { 970; CHECK-LABEL: store-post-indexed-float 971; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16 972entry: 973 %gep1 = getelementptr float, float* %array, i64 2 974 br label %body 975 976body: 977 %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ] 978 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 979 %gep2 = getelementptr float, float* %iv2, i64 -1 980 %load = load float, float* %gep2 981 call void @use-float(float %load) 982 store float %val, float* %iv2 983 %iv.next = add i64 %iv, -4 984 %gep3 = getelementptr float, float* %iv2, i64 4 985 %cond = icmp eq i64 %iv.next, 0 986 br i1 %cond, label %exit, label %body 987 988exit: 989 ret void 990} 991 992define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind { 993; CHECK-LABEL: store-post-indexed-double 994; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32 995entry: 996 %gep1 = getelementptr double, double* %array, i64 2 997 br label %body 998 999body: 1000 %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ] 1001 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 1002 %gep2 = getelementptr double, double* %iv2, i64 -1 1003 %load = load double, double* %gep2 1004 call void @use-double(double %load) 1005 store double %val, double* %iv2 1006 %iv.next = add i64 %iv, -4 1007 %gep3 = getelementptr double, double* %iv2, i64 4 1008 %cond = icmp eq i64 %iv.next, 0 1009 br i1 %cond, label %exit, label %body 1010 1011exit: 1012 ret void 1013} 1014 1015declare void @use-byte(i8) 1016declare void @use-halfword(i16) 1017declare void @use-word(i32) 1018declare void @use-doubleword(i64) 1019declare void @use-quadword(<2 x i64>) 1020declare void @use-float(float) 1021declare void @use-double(double) 1022 1023; Check the following transform: 1024; 1025; stp w0, [x20] 1026; ... 1027; add x20, x20, #32 1028; -> 1029; stp w0, [x20], #32 1030 1031define void @store-pair-post-indexed-word() nounwind { 1032; CHECK-LABEL: store-pair-post-indexed-word 1033; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [sp], #16 1034; CHECK: ret 1035 %src = alloca { i32, i32 }, align 8 1036 %dst = alloca { i32, i32 }, align 8 1037 1038 %src.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 0 1039 %src.real = load i32, i32* %src.realp 1040 %src.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 1 1041 %src.imag = load i32, i32* %src.imagp 1042 1043 %dst.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 0 1044 %dst.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 1 1045 store i32 %src.real, i32* %dst.realp 1046 store i32 %src.imag, i32* %dst.imagp 1047 ret void 1048} 1049 1050define void @store-pair-post-indexed-doubleword() nounwind { 1051; CHECK-LABEL: store-pair-post-indexed-doubleword 1052; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [sp], #32 1053; CHECK: ret 1054 %src = alloca { i64, i64 }, align 8 1055 %dst = alloca { i64, i64 }, align 8 1056 1057 %src.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 0 1058 %src.real = load i64, i64* %src.realp 1059 %src.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 1 1060 %src.imag = load i64, i64* %src.imagp 1061 1062 %dst.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 0 1063 %dst.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 1 1064 store i64 %src.real, i64* %dst.realp 1065 store i64 %src.imag, i64* %dst.imagp 1066 ret void 1067} 1068 1069define void @store-pair-post-indexed-float() nounwind { 1070; CHECK-LABEL: store-pair-post-indexed-float 1071; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [sp], #16 1072; CHECK: ret 1073 %src = alloca { float, float }, align 8 1074 %dst = alloca { float, float }, align 8 1075 1076 %src.realp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 0 1077 %src.real = load float, float* %src.realp 1078 %src.imagp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 1 1079 %src.imag = load float, float* %src.imagp 1080 1081 %dst.realp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 0 1082 %dst.imagp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 1 1083 store float %src.real, float* %dst.realp 1084 store float %src.imag, float* %dst.imagp 1085 ret void 1086} 1087 1088define void @store-pair-post-indexed-double() nounwind { 1089; CHECK-LABEL: store-pair-post-indexed-double 1090; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [sp], #32 1091; CHECK: ret 1092 %src = alloca { double, double }, align 8 1093 %dst = alloca { double, double }, align 8 1094 1095 %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0 1096 %src.real = load double, double* %src.realp 1097 %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1 1098 %src.imag = load double, double* %src.imagp 1099 1100 %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0 1101 %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1 1102 store double %src.real, double* %dst.realp 1103 store double %src.imag, double* %dst.imagp 1104 ret void 1105} 1106 1107; Check the following transform: 1108; 1109; (ldr|str) X, [x20] 1110; ... 1111; sub x20, x20, #16 1112; -> 1113; (ldr|str) X, [x20], #-16 1114; 1115; with X being either w0, x0, s0, d0 or q0. 1116 1117define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind { 1118; CHECK-LABEL: post-indexed-sub-word 1119; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8 1120; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8 1121 br label %for.body 1122for.body: 1123 %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ] 1124 %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ] 1125 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1126 %gep1 = getelementptr i32, i32* %phi1, i64 -1 1127 %load1 = load i32, i32* %gep1 1128 %gep2 = getelementptr i32, i32* %phi2, i64 -1 1129 store i32 %load1, i32* %gep2 1130 %load2 = load i32, i32* %phi1 1131 store i32 %load2, i32* %phi2 1132 %dec.i = add nsw i64 %i, -1 1133 %gep3 = getelementptr i32, i32* %phi2, i64 -2 1134 %gep4 = getelementptr i32, i32* %phi1, i64 -2 1135 %cond = icmp sgt i64 %dec.i, 0 1136 br i1 %cond, label %for.body, label %end 1137end: 1138 ret void 1139} 1140 1141define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind { 1142; CHECK-LABEL: post-indexed-sub-doubleword 1143; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16 1144; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16 1145 br label %for.body 1146for.body: 1147 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1148 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1149 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1150 %gep1 = getelementptr i64, i64* %phi1, i64 -1 1151 %load1 = load i64, i64* %gep1 1152 %gep2 = getelementptr i64, i64* %phi2, i64 -1 1153 store i64 %load1, i64* %gep2 1154 %load2 = load i64, i64* %phi1 1155 store i64 %load2, i64* %phi2 1156 %dec.i = add nsw i64 %i, -1 1157 %gep3 = getelementptr i64, i64* %phi2, i64 -2 1158 %gep4 = getelementptr i64, i64* %phi1, i64 -2 1159 %cond = icmp sgt i64 %dec.i, 0 1160 br i1 %cond, label %for.body, label %end 1161end: 1162 ret void 1163} 1164 1165define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind { 1166; CHECK-LABEL: post-indexed-sub-quadword 1167; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32 1168; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32 1169 br label %for.body 1170for.body: 1171 %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ] 1172 %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ] 1173 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1174 %gep1 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -1 1175 %load1 = load <2 x i64>, <2 x i64>* %gep1 1176 %gep2 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -1 1177 store <2 x i64> %load1, <2 x i64>* %gep2 1178 %load2 = load <2 x i64>, <2 x i64>* %phi1 1179 store <2 x i64> %load2, <2 x i64>* %phi2 1180 %dec.i = add nsw i64 %i, -1 1181 %gep3 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -2 1182 %gep4 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -2 1183 %cond = icmp sgt i64 %dec.i, 0 1184 br i1 %cond, label %for.body, label %end 1185end: 1186 ret void 1187} 1188 1189define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind { 1190; CHECK-LABEL: post-indexed-sub-float 1191; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8 1192; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8 1193 br label %for.body 1194for.body: 1195 %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ] 1196 %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ] 1197 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1198 %gep1 = getelementptr float, float* %phi1, i64 -1 1199 %load1 = load float, float* %gep1 1200 %gep2 = getelementptr float, float* %phi2, i64 -1 1201 store float %load1, float* %gep2 1202 %load2 = load float, float* %phi1 1203 store float %load2, float* %phi2 1204 %dec.i = add nsw i64 %i, -1 1205 %gep3 = getelementptr float, float* %phi2, i64 -2 1206 %gep4 = getelementptr float, float* %phi1, i64 -2 1207 %cond = icmp sgt i64 %dec.i, 0 1208 br i1 %cond, label %for.body, label %end 1209end: 1210 ret void 1211} 1212 1213define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind { 1214; CHECK-LABEL: post-indexed-sub-double 1215; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16 1216; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16 1217 br label %for.body 1218for.body: 1219 %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ] 1220 %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ] 1221 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1222 %gep1 = getelementptr double, double* %phi1, i64 -1 1223 %load1 = load double, double* %gep1 1224 %gep2 = getelementptr double, double* %phi2, i64 -1 1225 store double %load1, double* %gep2 1226 %load2 = load double, double* %phi1 1227 store double %load2, double* %phi2 1228 %dec.i = add nsw i64 %i, -1 1229 %gep3 = getelementptr double, double* %phi2, i64 -2 1230 %gep4 = getelementptr double, double* %phi1, i64 -2 1231 %cond = icmp sgt i64 %dec.i, 0 1232 br i1 %cond, label %for.body, label %end 1233end: 1234 ret void 1235} 1236 1237define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind { 1238; CHECK-LABEL: post-indexed-sub-doubleword-offset-min 1239; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256 1240; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256 1241 br label %for.body 1242for.body: 1243 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1244 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1245 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1246 %gep1 = getelementptr i64, i64* %phi1, i64 1 1247 %load1 = load i64, i64* %gep1 1248 %gep2 = getelementptr i64, i64* %phi2, i64 1 1249 store i64 %load1, i64* %gep2 1250 %load2 = load i64, i64* %phi1 1251 store i64 %load2, i64* %phi2 1252 %dec.i = add nsw i64 %i, -1 1253 %gep3 = getelementptr i64, i64* %phi2, i64 -32 1254 %gep4 = getelementptr i64, i64* %phi1, i64 -32 1255 %cond = icmp sgt i64 %dec.i, 0 1256 br i1 %cond, label %for.body, label %end 1257end: 1258 ret void 1259} 1260 1261define void @post-indexed-doubleword-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind { 1262; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range 1263; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}] 1264; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 1265; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}] 1266; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 1267 1268 br label %for.body 1269for.body: 1270 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1271 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1272 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1273 %gep1 = getelementptr i64, i64* %phi1, i64 1 1274 %load1 = load i64, i64* %gep1 1275 %gep2 = getelementptr i64, i64* %phi2, i64 1 1276 store i64 %load1, i64* %gep2 1277 %load2 = load i64, i64* %phi1 1278 store i64 %load2, i64* %phi2 1279 %dec.i = add nsw i64 %i, -1 1280 %gep3 = getelementptr i64, i64* %phi2, i64 32 1281 %gep4 = getelementptr i64, i64* %phi1, i64 32 1282 %cond = icmp sgt i64 %dec.i, 0 1283 br i1 %cond, label %for.body, label %end 1284end: 1285 ret void 1286} 1287 1288define void @post-indexed-paired-min-offset(i64* %a, i64* %b, i64 %count) nounwind { 1289; CHECK-LABEL: post-indexed-paired-min-offset 1290; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512 1291; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512 1292 br label %for.body 1293for.body: 1294 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1295 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1296 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1297 %gep1 = getelementptr i64, i64* %phi1, i64 1 1298 %load1 = load i64, i64* %gep1 1299 %gep2 = getelementptr i64, i64* %phi2, i64 1 1300 %load2 = load i64, i64* %phi1 1301 store i64 %load1, i64* %gep2 1302 store i64 %load2, i64* %phi2 1303 %dec.i = add nsw i64 %i, -1 1304 %gep3 = getelementptr i64, i64* %phi2, i64 -64 1305 %gep4 = getelementptr i64, i64* %phi1, i64 -64 1306 %cond = icmp sgt i64 %dec.i, 0 1307 br i1 %cond, label %for.body, label %end 1308end: 1309 ret void 1310} 1311 1312define void @post-indexed-paired-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind { 1313; CHECK-LABEL: post-indexed-paired-offset-out-of-range 1314; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}] 1315; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512 1316; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}] 1317; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512 1318 br label %for.body 1319for.body: 1320 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1321 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1322 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1323 %gep1 = getelementptr i64, i64* %phi1, i64 1 1324 %load1 = load i64, i64* %phi1 1325 %gep2 = getelementptr i64, i64* %phi2, i64 1 1326 %load2 = load i64, i64* %gep1 1327 store i64 %load1, i64* %gep2 1328 store i64 %load2, i64* %phi2 1329 %dec.i = add nsw i64 %i, -1 1330 %gep3 = getelementptr i64, i64* %phi2, i64 64 1331 %gep4 = getelementptr i64, i64* %phi1, i64 64 1332 %cond = icmp sgt i64 %dec.i, 0 1333 br i1 %cond, label %for.body, label %end 1334end: 1335 ret void 1336} 1337 1338; DAGCombiner::MergeConsecutiveStores merges this into a vector store, 1339; replaceZeroVectorStore should split the vector store back into 1340; scalar stores which should get merged by AArch64LoadStoreOptimizer. 1341define void @merge_zr32(i32* %p) { 1342; CHECK-LABEL: merge_zr32: 1343; CHECK: // %entry 1344; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1345; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1346; CHECK-NEXT: ret 1347entry: 1348 store i32 0, i32* %p 1349 %p1 = getelementptr i32, i32* %p, i32 1 1350 store i32 0, i32* %p1 1351 ret void 1352} 1353 1354; Same as merge_zr32 but the merged stores should also get paried. 1355define void @merge_zr32_2(i32* %p) { 1356; CHECK-LABEL: merge_zr32_2: 1357; CHECK: // %entry 1358; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1359; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1360; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1361; CHECK-NEXT: ret 1362entry: 1363 store i32 0, i32* %p 1364 %p1 = getelementptr i32, i32* %p, i32 1 1365 store i32 0, i32* %p1 1366 %p2 = getelementptr i32, i32* %p, i64 2 1367 store i32 0, i32* %p2 1368 %p3 = getelementptr i32, i32* %p, i64 3 1369 store i32 0, i32* %p3 1370 ret void 1371} 1372 1373; Like merge_zr32_2, but checking the largest allowed stp immediate offset. 1374define void @merge_zr32_2_offset(i32* %p) { 1375; CHECK-LABEL: merge_zr32_2_offset: 1376; CHECK: // %entry 1377; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504] 1378; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #504] 1379; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #508] 1380; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #512] 1381; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #516] 1382; CHECK-NEXT: ret 1383entry: 1384 %p0 = getelementptr i32, i32* %p, i32 126 1385 store i32 0, i32* %p0 1386 %p1 = getelementptr i32, i32* %p, i32 127 1387 store i32 0, i32* %p1 1388 %p2 = getelementptr i32, i32* %p, i64 128 1389 store i32 0, i32* %p2 1390 %p3 = getelementptr i32, i32* %p, i64 129 1391 store i32 0, i32* %p3 1392 ret void 1393} 1394 1395; Like merge_zr32, but replaceZeroVectorStore should not split this 1396; vector store since the address offset is too large for the stp 1397; instruction. 1398define void @no_merge_zr32_2_offset(i32* %p) { 1399; CHECK-LABEL: no_merge_zr32_2_offset: 1400; CHECK: // %entry 1401; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1402; NOSTRICTALIGN-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096] 1403; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4096] 1404; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4100] 1405; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4104] 1406; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4108] 1407; CHECK-NEXT: ret 1408entry: 1409 %p0 = getelementptr i32, i32* %p, i32 1024 1410 store i32 0, i32* %p0 1411 %p1 = getelementptr i32, i32* %p, i32 1025 1412 store i32 0, i32* %p1 1413 %p2 = getelementptr i32, i32* %p, i64 1026 1414 store i32 0, i32* %p2 1415 %p3 = getelementptr i32, i32* %p, i64 1027 1416 store i32 0, i32* %p3 1417 ret void 1418} 1419 1420; Like merge_zr32, but replaceZeroVectorStore should not split the 1421; vector store since the zero constant vector has multiple uses, so we 1422; err on the side that allows for stp q instruction generation. 1423define void @merge_zr32_3(i32* %p) { 1424; CHECK-LABEL: merge_zr32_3: 1425; CHECK: // %entry 1426; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1427; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1428; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1429; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1430; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #16] 1431; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #24] 1432; CHECK-NEXT: ret 1433entry: 1434 store i32 0, i32* %p 1435 %p1 = getelementptr i32, i32* %p, i32 1 1436 store i32 0, i32* %p1 1437 %p2 = getelementptr i32, i32* %p, i64 2 1438 store i32 0, i32* %p2 1439 %p3 = getelementptr i32, i32* %p, i64 3 1440 store i32 0, i32* %p3 1441 %p4 = getelementptr i32, i32* %p, i64 4 1442 store i32 0, i32* %p4 1443 %p5 = getelementptr i32, i32* %p, i64 5 1444 store i32 0, i32* %p5 1445 %p6 = getelementptr i32, i32* %p, i64 6 1446 store i32 0, i32* %p6 1447 %p7 = getelementptr i32, i32* %p, i64 7 1448 store i32 0, i32* %p7 1449 ret void 1450} 1451 1452; Like merge_zr32, but with 2-vector type. 1453define void @merge_zr32_2vec(<2 x i32>* %p) { 1454; CHECK-LABEL: merge_zr32_2vec: 1455; CHECK: // %entry 1456; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1457; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1458; CHECK-NEXT: ret 1459entry: 1460 store <2 x i32> zeroinitializer, <2 x i32>* %p 1461 ret void 1462} 1463 1464; Like merge_zr32, but with 3-vector type. 1465define void @merge_zr32_3vec(<3 x i32>* %p) { 1466; CHECK-LABEL: merge_zr32_3vec: 1467; CHECK: // %entry 1468; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8] 1469; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1470; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #4] 1471; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}] 1472; CHECK-NEXT: ret 1473entry: 1474 store <3 x i32> zeroinitializer, <3 x i32>* %p 1475 ret void 1476} 1477 1478; Like merge_zr32, but with 4-vector type. 1479define void @merge_zr32_4vec(<4 x i32>* %p) { 1480; CHECK-LABEL: merge_zr32_4vec: 1481; CHECK: // %entry 1482; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1483; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1484; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1485; CHECK-NEXT: ret 1486entry: 1487 store <4 x i32> zeroinitializer, <4 x i32>* %p 1488 ret void 1489} 1490 1491; Like merge_zr32, but with 2-vector float type. 1492define void @merge_zr32_2vecf(<2 x float>* %p) { 1493; CHECK-LABEL: merge_zr32_2vecf: 1494; CHECK: // %entry 1495; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1496; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1497; CHECK-NEXT: ret 1498entry: 1499 store <2 x float> zeroinitializer, <2 x float>* %p 1500 ret void 1501} 1502 1503; Like merge_zr32, but with 4-vector float type. 1504define void @merge_zr32_4vecf(<4 x float>* %p) { 1505; CHECK-LABEL: merge_zr32_4vecf: 1506; CHECK: // %entry 1507; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1508; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1509; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1510; CHECK-NEXT: ret 1511entry: 1512 store <4 x float> zeroinitializer, <4 x float>* %p 1513 ret void 1514} 1515 1516; Similar to merge_zr32, but for 64-bit values. 1517define void @merge_zr64(i64* %p) { 1518; CHECK-LABEL: merge_zr64: 1519; CHECK: // %entry 1520; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1521; CHECK-NEXT: ret 1522entry: 1523 store i64 0, i64* %p 1524 %p1 = getelementptr i64, i64* %p, i64 1 1525 store i64 0, i64* %p1 1526 ret void 1527} 1528 1529; Similar to merge_zr32, but for 64-bit values and with unaligned stores. 1530define void @merge_zr64_unalign(<2 x i64>* %p) { 1531; CHECK-LABEL: merge_zr64_unalign: 1532; CHECK: // %entry 1533; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1534; STRICTALIGN: strb 1535; STRICTALIGN: strb 1536; STRICTALIGN: strb 1537; STRICTALIGN: strb 1538; STRICTALIGN: strb 1539; STRICTALIGN: strb 1540; STRICTALIGN: strb 1541; STRICTALIGN: strb 1542; STRICTALIGN: strb 1543; STRICTALIGN: strb 1544; STRICTALIGN: strb 1545; STRICTALIGN: strb 1546; STRICTALIGN: strb 1547; STRICTALIGN: strb 1548; STRICTALIGN: strb 1549; STRICTALIGN: strb 1550; CHECK-NEXT: ret 1551entry: 1552 store <2 x i64> zeroinitializer, <2 x i64>* %p, align 1 1553 ret void 1554} 1555 1556; Similar to merge_zr32_3, replaceZeroVectorStore should not split the 1557; vector store since the zero constant vector has multiple uses. 1558define void @merge_zr64_2(i64* %p) { 1559; CHECK-LABEL: merge_zr64_2: 1560; CHECK: // %entry 1561; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1562; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1563; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1564; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #16] 1565; CHECK-NEXT: ret 1566entry: 1567 store i64 0, i64* %p 1568 %p1 = getelementptr i64, i64* %p, i64 1 1569 store i64 0, i64* %p1 1570 %p2 = getelementptr i64, i64* %p, i64 2 1571 store i64 0, i64* %p2 1572 %p3 = getelementptr i64, i64* %p, i64 3 1573 store i64 0, i64* %p3 1574 ret void 1575} 1576 1577; Like merge_zr64, but with 2-vector double type. 1578define void @merge_zr64_2vecd(<2 x double>* %p) { 1579; CHECK-LABEL: merge_zr64_2vecd: 1580; CHECK: // %entry 1581; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1582; CHECK-NEXT: ret 1583entry: 1584 store <2 x double> zeroinitializer, <2 x double>* %p 1585 ret void 1586} 1587 1588; Like merge_zr64, but with 3-vector i64 type. 1589define void @merge_zr64_3vec(<3 x i64>* %p) { 1590; CHECK-LABEL: merge_zr64_3vec: 1591; CHECK: // %entry 1592; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #8] 1593; CHECK-NEXT: str xzr, [x{{[0-9]+}}] 1594; CHECK-NEXT: ret 1595entry: 1596 store <3 x i64> zeroinitializer, <3 x i64>* %p 1597 ret void 1598} 1599 1600; Like merge_zr64_2, but with 4-vector double type. 1601define void @merge_zr64_4vecd(<4 x double>* %p) { 1602; CHECK-LABEL: merge_zr64_4vecd: 1603; CHECK: // %entry 1604; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1605; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1606; CHECK-NEXT: ret 1607entry: 1608 store <4 x double> zeroinitializer, <4 x double>* %p 1609 ret void 1610} 1611 1612; Verify that non-consecutive merges do not generate q0 1613define void @merge_multiple_128bit_stores(i64* %p) { 1614; CHECK-LABEL: merge_multiple_128bit_stores 1615; CHECK: // %entry 1616; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1617; NOSTRICTALIGN-NEXT: str q0, [x0] 1618; NOSTRICTALIGN-NEXT: stur q0, [x0, #24] 1619; NOSTRICTALIGN-NEXT: str q0, [x0, #48] 1620; STRICTALIGN-NEXT: stp xzr, xzr, [x0] 1621; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #24] 1622; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #48] 1623; CHECK-NEXT: ret 1624entry: 1625 store i64 0, i64* %p 1626 %p1 = getelementptr i64, i64* %p, i64 1 1627 store i64 0, i64* %p1 1628 %p3 = getelementptr i64, i64* %p, i64 3 1629 store i64 0, i64* %p3 1630 %p4 = getelementptr i64, i64* %p, i64 4 1631 store i64 0, i64* %p4 1632 %p6 = getelementptr i64, i64* %p, i64 6 1633 store i64 0, i64* %p6 1634 %p7 = getelementptr i64, i64* %p, i64 7 1635 store i64 0, i64* %p7 1636 ret void 1637} 1638 1639; Verify that large stores generate stp q 1640define void @merge_multiple_128bit_stores_consec(i64* %p) { 1641; CHECK-LABEL: merge_multiple_128bit_stores_consec 1642; CHECK: // %entry 1643; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1644; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1645; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}, #32] 1646; STRICTALIGN-NEXT: stp xzr, xzr, [x0] 1647; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #16] 1648; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #32] 1649; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #48] 1650; CHECK-NEXT: ret 1651entry: 1652 store i64 0, i64* %p 1653 %p1 = getelementptr i64, i64* %p, i64 1 1654 store i64 0, i64* %p1 1655 %p2 = getelementptr i64, i64* %p, i64 2 1656 store i64 0, i64* %p2 1657 %p3 = getelementptr i64, i64* %p, i64 3 1658 store i64 0, i64* %p3 1659 %p4 = getelementptr i64, i64* %p, i64 4 1660 store i64 0, i64* %p4 1661 %p5 = getelementptr i64, i64* %p, i64 5 1662 store i64 0, i64* %p5 1663 %p6 = getelementptr i64, i64* %p, i64 6 1664 store i64 0, i64* %p6 1665 %p7 = getelementptr i64, i64* %p, i64 7 1666 store i64 0, i64* %p7 1667 ret void 1668} 1669 1670; Check for bug 34674 where invalid add of xzr was being generated. 1671; CHECK-LABEL: bug34674: 1672; CHECK: // %entry 1673; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr 1674; CHECK-DAG: stp xzr, xzr, [x0] 1675; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1 1676define i64 @bug34674(<2 x i64>* %p) { 1677entry: 1678 store <2 x i64> zeroinitializer, <2 x i64>* %p 1679 %p2 = bitcast <2 x i64>* %p to i64* 1680 %ld = load i64, i64* %p2 1681 %add = add i64 %ld, 1 1682 ret i64 %add 1683} 1684 1685; CHECK-LABEL: trunc_splat_zero: 1686; CHECK-DAG: strh wzr, [x0] 1687define void @trunc_splat_zero(<2 x i8>* %ptr) { 1688 store <2 x i8> zeroinitializer, <2 x i8>* %ptr, align 2 1689 ret void 1690} 1691 1692; CHECK-LABEL: trunc_splat: 1693; CHECK: mov [[VAL:w[0-9]+]], #42 1694; CHECK: movk [[VAL]], #42, lsl #16 1695; CHECK: str [[VAL]], [x0] 1696define void @trunc_splat(<2 x i16>* %ptr) { 1697 store <2 x i16> <i16 42, i16 42>, <2 x i16>* %ptr, align 4 1698 ret void 1699} 1700