1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s 2; Verify that for the architectures that are known to have poor latency 3; double precision shift instructions we generate alternative sequence 4; of instructions with lower latencies instead of shrd instruction. 5 6;uint64_t rshift1(uint64_t a, uint64_t b) 7;{ 8; return (a >> 1) | (b << 63); 9;} 10 11; CHECK: rshift1: 12; CHECK: shrq {{.*}} 13; CHECK-NEXT: shlq $63, {{.*}} 14; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 15 16define i64 @rshift1(i64 %a, i64 %b) nounwind readnone uwtable { 17 %1 = lshr i64 %a, 1 18 %2 = shl i64 %b, 63 19 %3 = or i64 %2, %1 20 ret i64 %3 21} 22 23;uint64_t rshift2(uint64_t a, uint64_t b) 24;{ 25; return (a >> 2) | (b << 62); 26;} 27 28; CHECK: rshift2: 29; CHECK: shrq $2, {{.*}} 30; CHECK-NEXT: shlq $62, {{.*}} 31; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 32 33 34define i64 @rshift2(i64 %a, i64 %b) nounwind readnone uwtable { 35 %1 = lshr i64 %a, 2 36 %2 = shl i64 %b, 62 37 %3 = or i64 %2, %1 38 ret i64 %3 39} 40 41;uint64_t rshift7(uint64_t a, uint64_t b) 42;{ 43; return (a >> 7) | (b << 57); 44;} 45 46; CHECK: rshift7: 47; CHECK: shrq $7, {{.*}} 48; CHECK-NEXT: shlq $57, {{.*}} 49; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 50 51 52define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable { 53 %1 = lshr i64 %a, 7 54 %2 = shl i64 %b, 57 55 %3 = or i64 %2, %1 56 ret i64 %3 57} 58 59;uint64_t rshift63(uint64_t a, uint64_t b) 60;{ 61; return (a >> 63) | (b << 1); 62;} 63 64; CHECK-LABEL: rshift63: 65; CHECK: shrq $63, %rdi 66; CHECK-NEXT: leaq (%rdi,%rsi,2), %rax 67 68define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable { 69 %1 = lshr i64 %a, 63 70 %2 = shl i64 %b, 1 71 %3 = or i64 %2, %1 72 ret i64 %3 73} 74