• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s
2; Verify that for the architectures that are known to have poor latency
3; double precision shift instructions we generate alternative sequence
4; of instructions with lower latencies instead of shrd instruction.
5
6;uint64_t rshift1(uint64_t a, uint64_t b)
7;{
8;    return (a >> 1) | (b << 63);
9;}
10
11; CHECK:             rshift1:
12; CHECK:             shrq    {{.*}}
13; CHECK-NEXT:        shlq    $63, {{.*}}
14; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
15
16define i64 @rshift1(i64 %a, i64 %b) nounwind readnone uwtable {
17  %1 = lshr i64 %a, 1
18  %2 = shl i64 %b, 63
19  %3 = or i64 %2, %1
20  ret i64 %3
21}
22
23;uint64_t rshift2(uint64_t a, uint64_t b)
24;{
25;    return (a >> 2) | (b << 62);
26;}
27
28; CHECK:             rshift2:
29; CHECK:             shrq    $2, {{.*}}
30; CHECK-NEXT:        shlq    $62, {{.*}}
31; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
32
33
34define i64 @rshift2(i64 %a, i64 %b) nounwind readnone uwtable {
35  %1 = lshr i64 %a, 2
36  %2 = shl i64 %b, 62
37  %3 = or i64 %2, %1
38  ret i64 %3
39}
40
41;uint64_t rshift7(uint64_t a, uint64_t b)
42;{
43;    return (a >> 7) | (b << 57);
44;}
45
46; CHECK:             rshift7:
47; CHECK:             shrq    $7, {{.*}}
48; CHECK-NEXT:        shlq    $57, {{.*}}
49; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
50
51
52define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable {
53  %1 = lshr i64 %a, 7
54  %2 = shl i64 %b, 57
55  %3 = or i64 %2, %1
56  ret i64 %3
57}
58
59;uint64_t rshift63(uint64_t a, uint64_t b)
60;{
61;    return (a >> 63) | (b << 1);
62;}
63
64; CHECK-LABEL:       rshift63:
65; CHECK:             shrq    $63, %rdi
66; CHECK-NEXT:        leaq    (%rdi,%rsi,2), %rax
67
68define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable {
69  %1 = lshr i64 %a, 63
70  %2 = shl i64 %b, 1
71  %3 = or i64 %2, %1
72  ret i64 %3
73}
74