1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s 3 4; Verify that for the architectures that are known to have poor latency 5; double precision shift instructions we generate alternative sequence 6; of instructions with lower latencies instead of shrd instruction. 7 8;uint64_t rshift1(uint64_t a, uint64_t b) 9;{ 10; return (a >> 1) | (b << 63); 11;} 12 13define i64 @rshift1(i64 %a, i64 %b) nounwind readnone uwtable { 14; CHECK-LABEL: rshift1: 15; CHECK: # %bb.0: 16; CHECK-NEXT: shrq %rdi 17; CHECK-NEXT: shlq $63, %rsi 18; CHECK-NEXT: leaq (%rsi,%rdi), %rax 19; CHECK-NEXT: retq 20 %1 = lshr i64 %a, 1 21 %2 = shl i64 %b, 63 22 %3 = or i64 %2, %1 23 ret i64 %3 24} 25 26;uint64_t rshift2(uint64_t a, uint64_t b) 27;{ 28; return (a >> 2) | (b << 62); 29;} 30 31define i64 @rshift2(i64 %a, i64 %b) nounwind readnone uwtable { 32; CHECK-LABEL: rshift2: 33; CHECK: # %bb.0: 34; CHECK-NEXT: shrq $2, %rdi 35; CHECK-NEXT: shlq $62, %rsi 36; CHECK-NEXT: leaq (%rsi,%rdi), %rax 37; CHECK-NEXT: retq 38 %1 = lshr i64 %a, 2 39 %2 = shl i64 %b, 62 40 %3 = or i64 %2, %1 41 ret i64 %3 42} 43 44;uint64_t rshift7(uint64_t a, uint64_t b) 45;{ 46; return (a >> 7) | (b << 57); 47;} 48 49define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable { 50; CHECK-LABEL: rshift7: 51; CHECK: # %bb.0: 52; CHECK-NEXT: shrq $7, %rdi 53; CHECK-NEXT: shlq $57, %rsi 54; CHECK-NEXT: leaq (%rsi,%rdi), %rax 55; CHECK-NEXT: retq 56 %1 = lshr i64 %a, 7 57 %2 = shl i64 %b, 57 58 %3 = or i64 %2, %1 59 ret i64 %3 60} 61 62;uint64_t rshift63(uint64_t a, uint64_t b) 63;{ 64; return (a >> 63) | (b << 1); 65;} 66 67define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable { 68; CHECK-LABEL: rshift63: 69; CHECK: # %bb.0: 70; CHECK-NEXT: shrq $63, %rdi 71; CHECK-NEXT: leaq (%rdi,%rsi,2), %rax 72; CHECK-NEXT: retq 73 %1 = lshr i64 %a, 63 74 %2 = shl i64 %b, 1 75 %3 = or i64 %2, %1 76 ret i64 %3 77} 78