• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s
3
4; clang -Oz -c test1.cpp -emit-llvm -S -o
5; Verify that we generate shld insruction when we are optimizing for size,
6; even for X86_64 processors that are known to have poor latency double
7; precision shift instructions.
8; uint64_t lshift10(uint64_t a, uint64_t b)
9; {
10;     return (a << 10) | (b >> 54);
11; }
12
13; Function Attrs: minsize nounwind readnone uwtable
14define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 {
15; CHECK-LABEL: _Z8lshift10mm:
16; CHECK:       # %bb.0: # %entry
17; CHECK-NEXT:    movq %rdi, %rax
18; CHECK-NEXT:    shldq $10, %rsi, %rax
19; CHECK-NEXT:    retq
20entry:
21  %shl = shl i64 %a, 10
22  %shr = lshr i64 %b, 54
23  %or = or i64 %shr, %shl
24  ret i64 %or
25}
26
27attributes #0 = { minsize nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
28
29
30; clang -Os -c test2.cpp -emit-llvm -S
31; Verify that we generate shld insruction when we are optimizing for size,
32; even for X86_64 processors that are known to have poor latency double
33; precision shift instructions.
34; uint64_t lshift11(uint64_t a, uint64_t b)
35; {
36;     return (a << 11) | (b >> 53);
37; }
38
39; Function Attrs: nounwind optsize readnone uwtable
40define i64 @_Z8lshift11mm(i64 %a, i64 %b) #1 {
41; CHECK-LABEL: _Z8lshift11mm:
42; CHECK:       # %bb.0: # %entry
43; CHECK-NEXT:    movq %rdi, %rax
44; CHECK-NEXT:    shldq $11, %rsi, %rax
45; CHECK-NEXT:    retq
46entry:
47  %shl = shl i64 %a, 11
48  %shr = lshr i64 %b, 53
49  %or = or i64 %shr, %shl
50  ret i64 %or
51}
52
53define i64 @_Z8lshift11mm_pgso(i64 %a, i64 %b) !prof !14 {
54; CHECK-LABEL: _Z8lshift11mm_pgso:
55; CHECK:       # %bb.0: # %entry
56; CHECK-NEXT:    movq %rdi, %rax
57; CHECK-NEXT:    shldq $11, %rsi, %rax
58; CHECK-NEXT:    retq
59entry:
60  %shl = shl i64 %a, 11
61  %shr = lshr i64 %b, 53
62  %or = or i64 %shr, %shl
63  ret i64 %or
64}
65
66attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
67
68; clang -O2 -c test2.cpp -emit-llvm -S
69; Verify that we do not generate shld insruction when we are not optimizing
70; for size for X86_64 processors that are known to have poor latency double
71; precision shift instructions.
72; uint64_t lshift12(uint64_t a, uint64_t b)
73; {
74;     return (a << 12) | (b >> 52);
75; }
76
77; Function Attrs: nounwind optsize readnone uwtable
78define i64 @_Z8lshift12mm(i64 %a, i64 %b) #2 {
79; CHECK-LABEL: _Z8lshift12mm:
80; CHECK:       # %bb.0: # %entry
81; CHECK-NEXT:    shrq $52, %rsi
82; CHECK-NEXT:    shlq $12, %rdi
83; CHECK-NEXT:    leaq (%rdi,%rsi), %rax
84; CHECK-NEXT:    retq
85entry:
86  %shl = shl i64 %a, 12
87  %shr = lshr i64 %b, 52
88  %or = or i64 %shr, %shl
89  ret i64 %or
90}
91
92attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
93
94!llvm.module.flags = !{!0}
95!0 = !{i32 1, !"ProfileSummary", !1}
96!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
97!2 = !{!"ProfileFormat", !"InstrProf"}
98!3 = !{!"TotalCount", i64 10000}
99!4 = !{!"MaxCount", i64 10}
100!5 = !{!"MaxInternalCount", i64 1}
101!6 = !{!"MaxFunctionCount", i64 1000}
102!7 = !{!"NumCounts", i64 3}
103!8 = !{!"NumFunctions", i64 3}
104!9 = !{!"DetailedSummary", !10}
105!10 = !{!11, !12, !13}
106!11 = !{i32 10000, i64 100, i32 1}
107!12 = !{i32 999000, i64 100, i32 1}
108!13 = !{i32 999999, i64 1, i32 2}
109!14 = !{!"function_entry_count", i64 0}
110