1; REQUIRES: asserts 2; The regression tests need to test for order of emitted instructions, and 3; therefore, the tests are a bit fragile/reliant on instruction scheduling. The 4; test cases have been minimized as much as possible, but still most of the test 5; cases could break if instruction scheduling heuristics for cortex-a53 change 6; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=1 -stats 2>&1 \ 7; RUN: | FileCheck %s --check-prefix CHECK 8; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=0 -stats 2>&1 \ 9; RUN: | FileCheck %s --check-prefix CHECK-NOWORKAROUND 10; The following run lines are just to verify whether or not this pass runs by 11; default for given CPUs. Given the fragility of the tests, this is only run on 12; a test case where the scheduler has not freedom at all to reschedule the 13; instructions, so the potentially massively different scheduling heuristics 14; will not break the test case. 15; RUN: llc < %s -mcpu=generic | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED 16; RUN: llc < %s -mcpu=cortex-a53 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED 17; RUN: llc < %s -mcpu=cortex-a57 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED 18; RUN: llc < %s -mcpu=cyclone | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED 19 20target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 21target triple = "aarch64--linux-gnu" 22 23define i64 @f_load_madd_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 { 24entry: 25 %0 = load i64, i64* %c, align 8 26 %mul = mul nsw i64 %0, %b 27 %add = add nsw i64 %mul, %a 28 ret i64 %add 29} 30; CHECK-LABEL: f_load_madd_64: 31; CHECK: ldr 32; CHECK-NEXT: nop 33; CHECK-NEXT: madd 34; CHECK-NOWORKAROUND-LABEL: f_load_madd_64: 35; CHECK-NOWORKAROUND: ldr 36; CHECK-NOWORKAROUND-NEXT: madd 37; CHECK-BASIC-PASS-DISABLED-LABEL: f_load_madd_64: 38; CHECK-BASIC-PASS-DISABLED: ldr 39; CHECK-BASIC-PASS-DISABLED-NEXT: madd 40 41 42define i32 @f_load_madd_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 { 43entry: 44 %0 = load i32, i32* %c, align 4 45 %mul = mul nsw i32 %0, %b 46 %add = add nsw i32 %mul, %a 47 ret i32 %add 48} 49; CHECK-LABEL: f_load_madd_32: 50; CHECK: ldr 51; CHECK-NEXT: madd 52; CHECK-NOWORKAROUND-LABEL: f_load_madd_32: 53; CHECK-NOWORKAROUND: ldr 54; CHECK-NOWORKAROUND-NEXT: madd 55 56 57define i64 @f_load_msub_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 { 58entry: 59 %0 = load i64, i64* %c, align 8 60 %mul = mul nsw i64 %0, %b 61 %sub = sub nsw i64 %a, %mul 62 ret i64 %sub 63} 64; CHECK-LABEL: f_load_msub_64: 65; CHECK: ldr 66; CHECK-NEXT: nop 67; CHECK-NEXT: msub 68; CHECK-NOWORKAROUND-LABEL: f_load_msub_64: 69; CHECK-NOWORKAROUND: ldr 70; CHECK-NOWORKAROUND-NEXT: msub 71 72 73define i32 @f_load_msub_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 { 74entry: 75 %0 = load i32, i32* %c, align 4 76 %mul = mul nsw i32 %0, %b 77 %sub = sub nsw i32 %a, %mul 78 ret i32 %sub 79} 80; CHECK-LABEL: f_load_msub_32: 81; CHECK: ldr 82; CHECK-NEXT: msub 83; CHECK-NOWORKAROUND-LABEL: f_load_msub_32: 84; CHECK-NOWORKAROUND: ldr 85; CHECK-NOWORKAROUND-NEXT: msub 86 87 88define i64 @f_load_mul_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 { 89entry: 90 %0 = load i64, i64* %c, align 8 91 %mul = mul nsw i64 %0, %b 92 ret i64 %mul 93} 94; CHECK-LABEL: f_load_mul_64: 95; CHECK: ldr 96; CHECK-NEXT: mul 97; CHECK-NOWORKAROUND-LABEL: f_load_mul_64: 98; CHECK-NOWORKAROUND: ldr 99; CHECK-NOWORKAROUND-NEXT: mul 100 101 102define i32 @f_load_mul_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 { 103entry: 104 %0 = load i32, i32* %c, align 4 105 %mul = mul nsw i32 %0, %b 106 ret i32 %mul 107} 108; CHECK-LABEL: f_load_mul_32: 109; CHECK: ldr 110; CHECK-NEXT: mul 111; CHECK-NOWORKAROUND-LABEL: f_load_mul_32: 112; CHECK-NOWORKAROUND: ldr 113; CHECK-NOWORKAROUND-NEXT: mul 114 115 116define i64 @f_load_mneg_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 { 117entry: 118 %0 = load i64, i64* %c, align 8 119 %mul = sub i64 0, %b 120 %sub = mul i64 %0, %mul 121 ret i64 %sub 122} 123; CHECK-LABEL: f_load_mneg_64: 124; CHECK-NOWORKAROUND-LABEL: f_load_mneg_64: 125; FIXME: only add further checks here once LLVM actually produces 126; neg instructions 127; FIXME-CHECK: ldr 128; FIXME-CHECK-NEXT: nop 129; FIXME-CHECK-NEXT: mneg 130; FIXME-CHECK-NOWORKAROUND: ldr 131; FIXME-CHECK-NOWORKAROUND-NEXT: mneg 132 133 134define i32 @f_load_mneg_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 { 135entry: 136 %0 = load i32, i32* %c, align 4 137 %mul = sub i32 0, %b 138 %sub = mul i32 %0, %mul 139 ret i32 %sub 140} 141; CHECK-LABEL: f_load_mneg_32: 142; CHECK-NOWORKAROUND-LABEL: f_load_mneg_32: 143; FIXME: only add further checks here once LLVM actually produces 144; neg instructions 145; FIXME-CHECK: ldr 146; FIXME-CHECK-NEXT: mneg 147; FIXME-CHECK-NOWORKAROUND: ldr 148; FIXME-CHECK-NOWORKAROUND-NEXT: mneg 149 150 151define i64 @f_load_smaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 152entry: 153 %conv = sext i32 %b to i64 154 %conv1 = sext i32 %c to i64 155 %mul = mul nsw i64 %conv1, %conv 156 %add = add nsw i64 %mul, %a 157 %0 = load i32, i32* %d, align 4 158 %conv2 = sext i32 %0 to i64 159 %add3 = add nsw i64 %add, %conv2 160 ret i64 %add3 161} 162; CHECK-LABEL: f_load_smaddl: 163; CHECK: ldrsw 164; CHECK-NEXT: nop 165; CHECK-NEXT: smaddl 166; CHECK-NOWORKAROUND-LABEL: f_load_smaddl: 167; CHECK-NOWORKAROUND: ldrsw 168; CHECK-NOWORKAROUND-NEXT: smaddl 169 170 171define i64 @f_load_smsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 172entry: 173 %conv = sext i32 %b to i64 174 %conv1 = sext i32 %c to i64 175 %mul = mul nsw i64 %conv1, %conv 176 %sub = sub i64 %a, %mul 177 %0 = load i32, i32* %d, align 4 178 %conv2 = sext i32 %0 to i64 179 %add = add nsw i64 %sub, %conv2 180 ret i64 %add 181} 182; CHECK-LABEL: f_load_smsubl_64: 183; CHECK: ldrsw 184; CHECK-NEXT: nop 185; CHECK-NEXT: smsubl 186; CHECK-NOWORKAROUND-LABEL: f_load_smsubl_64: 187; CHECK-NOWORKAROUND: ldrsw 188; CHECK-NOWORKAROUND-NEXT: smsubl 189 190 191define i64 @f_load_smull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 192entry: 193 %conv = sext i32 %b to i64 194 %conv1 = sext i32 %c to i64 195 %mul = mul nsw i64 %conv1, %conv 196 %0 = load i32, i32* %d, align 4 197 %conv2 = sext i32 %0 to i64 198 %div = sdiv i64 %mul, %conv2 199 ret i64 %div 200} 201; CHECK-LABEL: f_load_smull: 202; CHECK: ldrsw 203; CHECK-NEXT: smull 204; CHECK-NOWORKAROUND-LABEL: f_load_smull: 205; CHECK-NOWORKAROUND: ldrsw 206; CHECK-NOWORKAROUND-NEXT: smull 207 208 209define i64 @f_load_smnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 210entry: 211 %conv = sext i32 %b to i64 212 %conv1 = sext i32 %c to i64 213 %mul = sub nsw i64 0, %conv 214 %sub = mul i64 %conv1, %mul 215 %0 = load i32, i32* %d, align 4 216 %conv2 = sext i32 %0 to i64 217 %div = sdiv i64 %sub, %conv2 218 ret i64 %div 219} 220; CHECK-LABEL: f_load_smnegl_64: 221; CHECK-NOWORKAROUND-LABEL: f_load_smnegl_64: 222; FIXME: only add further checks here once LLVM actually produces 223; smnegl instructions 224 225 226define i64 @f_load_umaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 227entry: 228 %conv = zext i32 %b to i64 229 %conv1 = zext i32 %c to i64 230 %mul = mul i64 %conv1, %conv 231 %add = add i64 %mul, %a 232 %0 = load i32, i32* %d, align 4 233 %conv2 = zext i32 %0 to i64 234 %add3 = add i64 %add, %conv2 235 ret i64 %add3 236} 237; CHECK-LABEL: f_load_umaddl: 238; CHECK: ldr 239; CHECK-NEXT: nop 240; CHECK-NEXT: umaddl 241; CHECK-NOWORKAROUND-LABEL: f_load_umaddl: 242; CHECK-NOWORKAROUND: ldr 243; CHECK-NOWORKAROUND-NEXT: umaddl 244 245 246define i64 @f_load_umsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 247entry: 248 %conv = zext i32 %b to i64 249 %conv1 = zext i32 %c to i64 250 %mul = mul i64 %conv1, %conv 251 %sub = sub i64 %a, %mul 252 %0 = load i32, i32* %d, align 4 253 %conv2 = zext i32 %0 to i64 254 %add = add i64 %sub, %conv2 255 ret i64 %add 256} 257; CHECK-LABEL: f_load_umsubl_64: 258; CHECK: ldr 259; CHECK-NEXT: nop 260; CHECK-NEXT: umsubl 261; CHECK-NOWORKAROUND-LABEL: f_load_umsubl_64: 262; CHECK-NOWORKAROUND: ldr 263; CHECK-NOWORKAROUND-NEXT: umsubl 264 265 266define i64 @f_load_umull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 267entry: 268 %conv = zext i32 %b to i64 269 %conv1 = zext i32 %c to i64 270 %mul = mul i64 %conv1, %conv 271 %0 = load i32, i32* %d, align 4 272 %conv2 = zext i32 %0 to i64 273 %div = udiv i64 %mul, %conv2 274 ret i64 %div 275} 276; CHECK-LABEL: f_load_umull: 277; CHECK: ldr 278; CHECK-NEXT: umull 279; CHECK-NOWORKAROUND-LABEL: f_load_umull: 280; CHECK-NOWORKAROUND: ldr 281; CHECK-NOWORKAROUND-NEXT: umull 282 283 284define i64 @f_load_umnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 { 285entry: 286 %conv = zext i32 %b to i64 287 %conv1 = zext i32 %c to i64 288 %mul = sub nsw i64 0, %conv 289 %sub = mul i64 %conv1, %mul 290 %0 = load i32, i32* %d, align 4 291 %conv2 = zext i32 %0 to i64 292 %div = udiv i64 %sub, %conv2 293 ret i64 %div 294} 295; CHECK-LABEL: f_load_umnegl_64: 296; CHECK-NOWORKAROUND-LABEL: f_load_umnegl_64: 297; FIXME: only add further checks here once LLVM actually produces 298; umnegl instructions 299 300 301define i64 @f_store_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 { 302entry: 303 %0 = load i64, i64* %cp, align 8 304 store i64 %a, i64* %e, align 8 305 %mul = mul nsw i64 %0, %b 306 %add = add nsw i64 %mul, %a 307 ret i64 %add 308} 309; CHECK-LABEL: f_store_madd_64: 310; CHECK: str 311; CHECK-NEXT: nop 312; CHECK-NEXT: madd 313; CHECK-NOWORKAROUND-LABEL: f_store_madd_64: 314; CHECK-NOWORKAROUND: str 315; CHECK-NOWORKAROUND-NEXT: madd 316 317 318define i32 @f_store_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 { 319entry: 320 %0 = load i32, i32* %cp, align 4 321 store i32 %a, i32* %e, align 4 322 %mul = mul nsw i32 %0, %b 323 %add = add nsw i32 %mul, %a 324 ret i32 %add 325} 326; CHECK-LABEL: f_store_madd_32: 327; CHECK: str 328; CHECK-NEXT: madd 329; CHECK-NOWORKAROUND-LABEL: f_store_madd_32: 330; CHECK-NOWORKAROUND: str 331; CHECK-NOWORKAROUND-NEXT: madd 332 333 334define i64 @f_store_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 { 335entry: 336 %0 = load i64, i64* %cp, align 8 337 store i64 %a, i64* %e, align 8 338 %mul = mul nsw i64 %0, %b 339 %sub = sub nsw i64 %a, %mul 340 ret i64 %sub 341} 342; CHECK-LABEL: f_store_msub_64: 343; CHECK: str 344; CHECK-NEXT: nop 345; CHECK-NEXT: msub 346; CHECK-NOWORKAROUND-LABEL: f_store_msub_64: 347; CHECK-NOWORKAROUND: str 348; CHECK-NOWORKAROUND-NEXT: msub 349 350 351define i32 @f_store_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 { 352entry: 353 %0 = load i32, i32* %cp, align 4 354 store i32 %a, i32* %e, align 4 355 %mul = mul nsw i32 %0, %b 356 %sub = sub nsw i32 %a, %mul 357 ret i32 %sub 358} 359; CHECK-LABEL: f_store_msub_32: 360; CHECK: str 361; CHECK-NEXT: msub 362; CHECK-NOWORKAROUND-LABEL: f_store_msub_32: 363; CHECK-NOWORKAROUND: str 364; CHECK-NOWORKAROUND-NEXT: msub 365 366 367define i64 @f_store_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 { 368entry: 369 %0 = load i64, i64* %cp, align 8 370 store i64 %a, i64* %e, align 8 371 %mul = mul nsw i64 %0, %b 372 ret i64 %mul 373} 374; CHECK-LABEL: f_store_mul_64: 375; CHECK: str 376; CHECK-NEXT: mul 377; CHECK-NOWORKAROUND-LABEL: f_store_mul_64: 378; CHECK-NOWORKAROUND: str 379; CHECK-NOWORKAROUND-NEXT: mul 380 381 382define i32 @f_store_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 { 383entry: 384 %0 = load i32, i32* %cp, align 4 385 store i32 %a, i32* %e, align 4 386 %mul = mul nsw i32 %0, %b 387 ret i32 %mul 388} 389; CHECK-LABEL: f_store_mul_32: 390; CHECK: str 391; CHECK-NEXT: mul 392; CHECK-NOWORKAROUND-LABEL: f_store_mul_32: 393; CHECK-NOWORKAROUND: str 394; CHECK-NOWORKAROUND-NEXT: mul 395 396 397define i64 @f_prefetch_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 { 398entry: 399 %0 = load i64, i64* %cp, align 8 400 %1 = bitcast i64* %e to i8* 401 tail call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1) 402 %mul = mul nsw i64 %0, %b 403 %add = add nsw i64 %mul, %a 404 ret i64 %add 405} 406; CHECK-LABEL: f_prefetch_madd_64: 407; CHECK: prfm 408; CHECK-NEXT: nop 409; CHECK-NEXT: madd 410; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_64: 411; CHECK-NOWORKAROUND: prfm 412; CHECK-NOWORKAROUND-NEXT: madd 413 414declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) #2 415 416define i32 @f_prefetch_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 { 417entry: 418 %0 = load i32, i32* %cp, align 4 419 %1 = bitcast i32* %e to i8* 420 tail call void @llvm.prefetch(i8* %1, i32 1, i32 0, i32 1) 421 %mul = mul nsw i32 %0, %b 422 %add = add nsw i32 %mul, %a 423 ret i32 %add 424} 425; CHECK-LABEL: f_prefetch_madd_32: 426; CHECK: prfm 427; CHECK-NEXT: madd 428; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_32: 429; CHECK-NOWORKAROUND: prfm 430; CHECK-NOWORKAROUND-NEXT: madd 431 432define i64 @f_prefetch_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 { 433entry: 434 %0 = load i64, i64* %cp, align 8 435 %1 = bitcast i64* %e to i8* 436 tail call void @llvm.prefetch(i8* %1, i32 0, i32 1, i32 1) 437 %mul = mul nsw i64 %0, %b 438 %sub = sub nsw i64 %a, %mul 439 ret i64 %sub 440} 441; CHECK-LABEL: f_prefetch_msub_64: 442; CHECK: prfm 443; CHECK-NEXT: nop 444; CHECK-NEXT: msub 445; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_64: 446; CHECK-NOWORKAROUND: prfm 447; CHECK-NOWORKAROUND-NEXT: msub 448 449define i32 @f_prefetch_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 { 450entry: 451 %0 = load i32, i32* %cp, align 4 452 %1 = bitcast i32* %e to i8* 453 tail call void @llvm.prefetch(i8* %1, i32 1, i32 1, i32 1) 454 %mul = mul nsw i32 %0, %b 455 %sub = sub nsw i32 %a, %mul 456 ret i32 %sub 457} 458; CHECK-LABEL: f_prefetch_msub_32: 459; CHECK: prfm 460; CHECK-NEXT: msub 461; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_32: 462; CHECK-NOWORKAROUND: prfm 463; CHECK-NOWORKAROUND-NEXT: msub 464 465define i64 @f_prefetch_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 { 466entry: 467 %0 = load i64, i64* %cp, align 8 468 %1 = bitcast i64* %e to i8* 469 tail call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1) 470 %mul = mul nsw i64 %0, %b 471 ret i64 %mul 472} 473; CHECK-LABEL: f_prefetch_mul_64: 474; CHECK: prfm 475; CHECK-NEXT: mul 476; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_64: 477; CHECK-NOWORKAROUND: prfm 478; CHECK-NOWORKAROUND-NEXT: mul 479 480define i32 @f_prefetch_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 { 481entry: 482 %0 = load i32, i32* %cp, align 4 483 %1 = bitcast i32* %e to i8* 484 tail call void @llvm.prefetch(i8* %1, i32 1, i32 3, i32 1) 485 %mul = mul nsw i32 %0, %b 486 ret i32 %mul 487} 488; CHECK-LABEL: f_prefetch_mul_32: 489; CHECK: prfm 490; CHECK-NEXT: mul 491; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_32: 492; CHECK-NOWORKAROUND: prfm 493; CHECK-NOWORKAROUND-NEXT: mul 494 495define i64 @fall_through(i64 %a, i64 %b, i64* nocapture readonly %c) #0 { 496entry: 497 %0 = load i64, i64* %c, align 8 498 br label %block1 499 500block1: 501 %mul = mul nsw i64 %0, %b 502 %add = add nsw i64 %mul, %a 503 %tmp = ptrtoint i8* blockaddress(@fall_through, %block1) to i64 504 %ret = add nsw i64 %tmp, %add 505 ret i64 %ret 506} 507; CHECK-LABEL: fall_through 508; CHECK: ldr 509; CHECK-NEXT: nop 510; CHECK-NEXT: .Ltmp 511; CHECK-NEXT: BB 512; CHECK-NEXT: madd 513; CHECK-NOWORKAROUND-LABEL: fall_through 514; CHECK-NOWORKAROUND: ldr 515; CHECK-NOWORKAROUND-NEXT: .Ltmp 516; CHECK-NOWORKAROUND-NEXT: BB 517; CHECK-NOWORKAROUND-NEXT: madd 518 519; No checks for this, just check it doesn't crash 520define i32 @crash_check(i8** nocapture readnone %data) #0 { 521entry: 522 br label %while.cond 523 524while.cond: 525 br label %while.cond 526} 527 528attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 529attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 530attributes #2 = { nounwind } 531 532 533; CHECK-LABEL: ... Statistics Collected ... 534; CHECK: 11 aarch64-fix-cortex-a53-835769 - Number of Nops added to work around erratum 835769 535