1; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s 2 3; CHECK-LABEL: ldp_int 4; CHECK: ldp 5define i32 @ldp_int(i32* %p) nounwind { 6 %tmp = load i32, i32* %p, align 4 7 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 8 %tmp1 = load i32, i32* %add.ptr, align 4 9 %add = add nsw i32 %tmp1, %tmp 10 ret i32 %add 11} 12 13; CHECK-LABEL: ldp_sext_int 14; CHECK: ldpsw 15define i64 @ldp_sext_int(i32* %p) nounwind { 16 %tmp = load i32, i32* %p, align 4 17 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 18 %tmp1 = load i32, i32* %add.ptr, align 4 19 %sexttmp = sext i32 %tmp to i64 20 %sexttmp1 = sext i32 %tmp1 to i64 21 %add = add nsw i64 %sexttmp1, %sexttmp 22 ret i64 %add 23} 24 25; CHECK-LABEL: ldp_half_sext_res0_int: 26; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 27; CHECK: sxtw x[[DST1]], w[[DST1]] 28define i64 @ldp_half_sext_res0_int(i32* %p) nounwind { 29 %tmp = load i32, i32* %p, align 4 30 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 31 %tmp1 = load i32, i32* %add.ptr, align 4 32 %sexttmp = sext i32 %tmp to i64 33 %sexttmp1 = zext i32 %tmp1 to i64 34 %add = add nsw i64 %sexttmp1, %sexttmp 35 ret i64 %add 36} 37 38; CHECK-LABEL: ldp_half_sext_res1_int: 39; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 40; CHECK: sxtw x[[DST2]], w[[DST2]] 41define i64 @ldp_half_sext_res1_int(i32* %p) nounwind { 42 %tmp = load i32, i32* %p, align 4 43 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 44 %tmp1 = load i32, i32* %add.ptr, align 4 45 %sexttmp = zext i32 %tmp to i64 46 %sexttmp1 = sext i32 %tmp1 to i64 47 %add = add nsw i64 %sexttmp1, %sexttmp 48 ret i64 %add 49} 50 51 52; CHECK-LABEL: ldp_long 53; CHECK: ldp 54define i64 @ldp_long(i64* %p) nounwind { 55 %tmp = load i64, i64* %p, align 8 56 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1 57 %tmp1 = load i64, i64* %add.ptr, align 8 58 %add = add nsw i64 %tmp1, %tmp 59 ret i64 %add 60} 61 62; CHECK-LABEL: ldp_float 63; CHECK: ldp 64define float @ldp_float(float* %p) nounwind { 65 %tmp = load float, float* %p, align 4 66 %add.ptr = getelementptr inbounds float, float* %p, i64 1 67 %tmp1 = load float, float* %add.ptr, align 4 68 %add = fadd float %tmp, %tmp1 69 ret float %add 70} 71 72; CHECK-LABEL: ldp_double 73; CHECK: ldp 74define double @ldp_double(double* %p) nounwind { 75 %tmp = load double, double* %p, align 8 76 %add.ptr = getelementptr inbounds double, double* %p, i64 1 77 %tmp1 = load double, double* %add.ptr, align 8 78 %add = fadd double %tmp, %tmp1 79 ret double %add 80} 81 82; CHECK-LABEL: ldp_doublex2 83; CHECK: ldp 84define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind { 85 %tmp = load <2 x double>, <2 x double>* %p, align 16 86 %add.ptr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 1 87 %tmp1 = load <2 x double>, <2 x double>* %add.ptr, align 16 88 %add = fadd <2 x double> %tmp, %tmp1 89 ret <2 x double> %add 90} 91 92; Test the load/store optimizer---combine ldurs into a ldp, if appropriate 93define i32 @ldur_int(i32* %a) nounwind { 94; CHECK-LABEL: ldur_int 95; CHECK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8] 96; CHECK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]] 97; CHECK-NEXT: ret 98 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 99 %tmp1 = load i32, i32* %p1, align 2 100 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 101 %tmp2 = load i32, i32* %p2, align 2 102 %tmp3 = add i32 %tmp1, %tmp2 103 ret i32 %tmp3 104} 105 106define i64 @ldur_sext_int(i32* %a) nounwind { 107; CHECK-LABEL: ldur_sext_int 108; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] 109; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 110; CHECK-NEXT: ret 111 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 112 %tmp1 = load i32, i32* %p1, align 2 113 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 114 %tmp2 = load i32, i32* %p2, align 2 115 %sexttmp1 = sext i32 %tmp1 to i64 116 %sexttmp2 = sext i32 %tmp2 to i64 117 %tmp3 = add i64 %sexttmp1, %sexttmp2 118 ret i64 %tmp3 119} 120 121define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { 122; CHECK-LABEL: ldur_half_sext_int_res0 123; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 124; CHECK: sxtw x[[DST1]], w[[DST1]] 125; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 126; CHECK-NEXT: ret 127 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 128 %tmp1 = load i32, i32* %p1, align 2 129 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 130 %tmp2 = load i32, i32* %p2, align 2 131 %sexttmp1 = zext i32 %tmp1 to i64 132 %sexttmp2 = sext i32 %tmp2 to i64 133 %tmp3 = add i64 %sexttmp1, %sexttmp2 134 ret i64 %tmp3 135} 136 137define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { 138; CHECK-LABEL: ldur_half_sext_int_res1 139; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 140; CHECK: sxtw x[[DST2]], w[[DST2]] 141; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 142; CHECK-NEXT: ret 143 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 144 %tmp1 = load i32, i32* %p1, align 2 145 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 146 %tmp2 = load i32, i32* %p2, align 2 147 %sexttmp1 = sext i32 %tmp1 to i64 148 %sexttmp2 = zext i32 %tmp2 to i64 149 %tmp3 = add i64 %sexttmp1, %sexttmp2 150 ret i64 %tmp3 151} 152 153 154define i64 @ldur_long(i64* %a) nounwind ssp { 155; CHECK-LABEL: ldur_long 156; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] 157; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 158; CHECK-NEXT: ret 159 %p1 = getelementptr inbounds i64, i64* %a, i64 -1 160 %tmp1 = load i64, i64* %p1, align 2 161 %p2 = getelementptr inbounds i64, i64* %a, i64 -2 162 %tmp2 = load i64, i64* %p2, align 2 163 %tmp3 = add i64 %tmp1, %tmp2 164 ret i64 %tmp3 165} 166 167define float @ldur_float(float* %a) { 168; CHECK-LABEL: ldur_float 169; CHECK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8] 170; CHECK-NEXT: fadd s{{[0-9]+}}, [[DST2]], [[DST1]] 171; CHECK-NEXT: ret 172 %p1 = getelementptr inbounds float, float* %a, i64 -1 173 %tmp1 = load float, float* %p1, align 2 174 %p2 = getelementptr inbounds float, float* %a, i64 -2 175 %tmp2 = load float, float* %p2, align 2 176 %tmp3 = fadd float %tmp1, %tmp2 177 ret float %tmp3 178} 179 180define double @ldur_double(double* %a) { 181; CHECK-LABEL: ldur_double 182; CHECK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16] 183; CHECK-NEXT: fadd d{{[0-9]+}}, [[DST2]], [[DST1]] 184; CHECK-NEXT: ret 185 %p1 = getelementptr inbounds double, double* %a, i64 -1 186 %tmp1 = load double, double* %p1, align 2 187 %p2 = getelementptr inbounds double, double* %a, i64 -2 188 %tmp2 = load double, double* %p2, align 2 189 %tmp3 = fadd double %tmp1, %tmp2 190 ret double %tmp3 191} 192 193define <2 x double> @ldur_doublex2(<2 x double>* %a) { 194; CHECK-LABEL: ldur_doublex2 195; CHECK: ldp q[[DST1:[0-9]+]], q[[DST2:[0-9]+]], [x0, #-32] 196; CHECK-NEXT: fadd v{{[0-9]+}}.2d, v[[DST2]].2d, v[[DST1]].2d 197; CHECK-NEXT: ret 198 %p1 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -1 199 %tmp1 = load <2 x double>, <2 x double>* %p1, align 2 200 %p2 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -2 201 %tmp2 = load <2 x double>, <2 x double>* %p2, align 2 202 %tmp3 = fadd <2 x double> %tmp1, %tmp2 203 ret <2 x double> %tmp3 204} 205 206; Now check some boundary conditions 207define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { 208; CHECK-LABEL: pairUpBarelyIn 209; CHECK-NOT: ldur 210; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 211; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 212; CHECK-NEXT: ret 213 %p1 = getelementptr inbounds i64, i64* %a, i64 -31 214 %tmp1 = load i64, i64* %p1, align 2 215 %p2 = getelementptr inbounds i64, i64* %a, i64 -32 216 %tmp2 = load i64, i64* %p2, align 2 217 %tmp3 = add i64 %tmp1, %tmp2 218 ret i64 %tmp3 219} 220 221define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { 222; CHECK-LABEL: pairUpBarelyInSext 223; CHECK-NOT: ldur 224; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 225; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 226; CHECK-NEXT: ret 227 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 228 %tmp1 = load i32, i32* %p1, align 2 229 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 230 %tmp2 = load i32, i32* %p2, align 2 231 %sexttmp1 = sext i32 %tmp1 to i64 232 %sexttmp2 = sext i32 %tmp2 to i64 233 %tmp3 = add i64 %sexttmp1, %sexttmp2 234 ret i64 %tmp3 235} 236 237define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { 238; CHECK-LABEL: pairUpBarelyInHalfSextRes0 239; CHECK-NOT: ldur 240; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 241; CHECK: sxtw x[[DST1]], w[[DST1]] 242; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 243; CHECK-NEXT: ret 244 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 245 %tmp1 = load i32, i32* %p1, align 2 246 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 247 %tmp2 = load i32, i32* %p2, align 2 248 %sexttmp1 = zext i32 %tmp1 to i64 249 %sexttmp2 = sext i32 %tmp2 to i64 250 %tmp3 = add i64 %sexttmp1, %sexttmp2 251 ret i64 %tmp3 252} 253 254define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { 255; CHECK-LABEL: pairUpBarelyInHalfSextRes1 256; CHECK-NOT: ldur 257; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 258; CHECK: sxtw x[[DST2]], w[[DST2]] 259; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 260; CHECK-NEXT: ret 261 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 262 %tmp1 = load i32, i32* %p1, align 2 263 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 264 %tmp2 = load i32, i32* %p2, align 2 265 %sexttmp1 = sext i32 %tmp1 to i64 266 %sexttmp2 = zext i32 %tmp2 to i64 267 %tmp3 = add i64 %sexttmp1, %sexttmp2 268 ret i64 %tmp3 269} 270 271define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { 272; CHECK-LABEL: pairUpBarelyOut 273; CHECK-NOT: ldp 274; Don't be fragile about which loads or manipulations of the base register 275; are used---just check that there isn't an ldp before the add 276; CHECK: add 277; CHECK-NEXT: ret 278 %p1 = getelementptr inbounds i64, i64* %a, i64 -32 279 %tmp1 = load i64, i64* %p1, align 2 280 %p2 = getelementptr inbounds i64, i64* %a, i64 -33 281 %tmp2 = load i64, i64* %p2, align 2 282 %tmp3 = add i64 %tmp1, %tmp2 283 ret i64 %tmp3 284} 285 286define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { 287; CHECK-LABEL: pairUpBarelyOutSext 288; CHECK-NOT: ldp 289; Don't be fragile about which loads or manipulations of the base register 290; are used---just check that there isn't an ldp before the add 291; CHECK: add 292; CHECK-NEXT: ret 293 %p1 = getelementptr inbounds i32, i32* %a, i64 -64 294 %tmp1 = load i32, i32* %p1, align 2 295 %p2 = getelementptr inbounds i32, i32* %a, i64 -65 296 %tmp2 = load i32, i32* %p2, align 2 297 %sexttmp1 = sext i32 %tmp1 to i64 298 %sexttmp2 = sext i32 %tmp2 to i64 299 %tmp3 = add i64 %sexttmp1, %sexttmp2 300 ret i64 %tmp3 301} 302 303define i64 @pairUpNotAligned(i64* %a) nounwind ssp { 304; CHECK-LABEL: pairUpNotAligned 305; CHECK-NOT: ldp 306; CHECK: ldur 307; CHECK-NEXT: ldur 308; CHECK-NEXT: add 309; CHECK-NEXT: ret 310 %p1 = getelementptr inbounds i64, i64* %a, i64 -18 311 %bp1 = bitcast i64* %p1 to i8* 312 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 313 %dp1 = bitcast i8* %bp1p1 to i64* 314 %tmp1 = load i64, i64* %dp1, align 1 315 316 %p2 = getelementptr inbounds i64, i64* %a, i64 -17 317 %bp2 = bitcast i64* %p2 to i8* 318 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 319 %dp2 = bitcast i8* %bp2p1 to i64* 320 %tmp2 = load i64, i64* %dp2, align 1 321 322 %tmp3 = add i64 %tmp1, %tmp2 323 ret i64 %tmp3 324} 325 326define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { 327; CHECK-LABEL: pairUpNotAlignedSext 328; CHECK-NOT: ldp 329; CHECK: ldursw 330; CHECK-NEXT: ldursw 331; CHECK-NEXT: add 332; CHECK-NEXT: ret 333 %p1 = getelementptr inbounds i32, i32* %a, i64 -18 334 %bp1 = bitcast i32* %p1 to i8* 335 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 336 %dp1 = bitcast i8* %bp1p1 to i32* 337 %tmp1 = load i32, i32* %dp1, align 1 338 339 %p2 = getelementptr inbounds i32, i32* %a, i64 -17 340 %bp2 = bitcast i32* %p2 to i8* 341 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 342 %dp2 = bitcast i8* %bp2p1 to i32* 343 %tmp2 = load i32, i32* %dp2, align 1 344 345 %sexttmp1 = sext i32 %tmp1 to i64 346 %sexttmp2 = sext i32 %tmp2 to i64 347 %tmp3 = add i64 %sexttmp1, %sexttmp2 348 ret i64 %tmp3 349} 350 351declare void @use-ptr(i32*) 352 353; CHECK-LABEL: ldp_sext_int_pre 354; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8] 355define i64 @ldp_sext_int_pre(i32* %p) nounwind { 356 %ptr = getelementptr inbounds i32, i32* %p, i64 2 357 call void @use-ptr(i32* %ptr) 358 %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0 359 %tmp = load i32, i32* %add.ptr, align 4 360 %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1 361 %tmp1 = load i32, i32* %add.ptr1, align 4 362 %sexttmp = sext i32 %tmp to i64 363 %sexttmp1 = sext i32 %tmp1 to i64 364 %add = add nsw i64 %sexttmp1, %sexttmp 365 ret i64 %add 366} 367 368; CHECK-LABEL: ldp_sext_int_post 369; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8 370define i64 @ldp_sext_int_post(i32* %p) nounwind { 371 %tmp = load i32, i32* %p, align 4 372 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 373 %tmp1 = load i32, i32* %add.ptr, align 4 374 %sexttmp = sext i32 %tmp to i64 375 %sexttmp1 = sext i32 %tmp1 to i64 376 %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1 377 call void @use-ptr(i32* %ptr) 378 %add = add nsw i64 %sexttmp1, %sexttmp 379 ret i64 %add 380} 381 382