1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s 3 4; clang -Oz -c test1.cpp -emit-llvm -S -o 5; Verify that we generate shld insruction when we are optimizing for size, 6; even for X86_64 processors that are known to have poor latency double 7; precision shift instructions. 8; uint64_t lshift10(uint64_t a, uint64_t b) 9; { 10; return (a << 10) | (b >> 54); 11; } 12 13; Function Attrs: minsize nounwind readnone uwtable 14define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 { 15; CHECK-LABEL: _Z8lshift10mm: 16; CHECK: # %bb.0: # %entry 17; CHECK-NEXT: movq %rdi, %rax 18; CHECK-NEXT: shldq $10, %rsi, %rax 19; CHECK-NEXT: retq 20entry: 21 %shl = shl i64 %a, 10 22 %shr = lshr i64 %b, 54 23 %or = or i64 %shr, %shl 24 ret i64 %or 25} 26 27attributes #0 = { minsize nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 28 29 30; clang -Os -c test2.cpp -emit-llvm -S 31; Verify that we generate shld insruction when we are optimizing for size, 32; even for X86_64 processors that are known to have poor latency double 33; precision shift instructions. 34; uint64_t lshift11(uint64_t a, uint64_t b) 35; { 36; return (a << 11) | (b >> 53); 37; } 38 39; Function Attrs: nounwind optsize readnone uwtable 40define i64 @_Z8lshift11mm(i64 %a, i64 %b) #1 { 41; CHECK-LABEL: _Z8lshift11mm: 42; CHECK: # %bb.0: # %entry 43; CHECK-NEXT: movq %rdi, %rax 44; CHECK-NEXT: shldq $11, %rsi, %rax 45; CHECK-NEXT: retq 46entry: 47 %shl = shl i64 %a, 11 48 %shr = lshr i64 %b, 53 49 %or = or i64 %shr, %shl 50 ret i64 %or 51} 52 53define i64 @_Z8lshift11mm_pgso(i64 %a, i64 %b) !prof !14 { 54; CHECK-LABEL: _Z8lshift11mm_pgso: 55; CHECK: # %bb.0: # %entry 56; CHECK-NEXT: movq %rdi, %rax 57; CHECK-NEXT: shldq $11, %rsi, %rax 58; CHECK-NEXT: retq 59entry: 60 %shl = shl i64 %a, 11 61 %shr = lshr i64 %b, 53 62 %or = or i64 %shr, %shl 63 ret i64 %or 64} 65 66attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 67 68; clang -O2 -c test2.cpp -emit-llvm -S 69; Verify that we do not generate shld insruction when we are not optimizing 70; for size for X86_64 processors that are known to have poor latency double 71; precision shift instructions. 72; uint64_t lshift12(uint64_t a, uint64_t b) 73; { 74; return (a << 12) | (b >> 52); 75; } 76 77; Function Attrs: nounwind optsize readnone uwtable 78define i64 @_Z8lshift12mm(i64 %a, i64 %b) #2 { 79; CHECK-LABEL: _Z8lshift12mm: 80; CHECK: # %bb.0: # %entry 81; CHECK-NEXT: shrq $52, %rsi 82; CHECK-NEXT: shlq $12, %rdi 83; CHECK-NEXT: leaq (%rdi,%rsi), %rax 84; CHECK-NEXT: retq 85entry: 86 %shl = shl i64 %a, 12 87 %shr = lshr i64 %b, 52 88 %or = or i64 %shr, %shl 89 ret i64 %or 90} 91 92attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 93 94!llvm.module.flags = !{!0} 95!0 = !{i32 1, !"ProfileSummary", !1} 96!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 97!2 = !{!"ProfileFormat", !"InstrProf"} 98!3 = !{!"TotalCount", i64 10000} 99!4 = !{!"MaxCount", i64 10} 100!5 = !{!"MaxInternalCount", i64 1} 101!6 = !{!"MaxFunctionCount", i64 1000} 102!7 = !{!"NumCounts", i64 3} 103!8 = !{!"NumFunctions", i64 3} 104!9 = !{!"DetailedSummary", !10} 105!10 = !{!11, !12, !13} 106!11 = !{i32 10000, i64 100, i32 1} 107!12 = !{i32 999000, i64 100, i32 1} 108!13 = !{i32 999999, i64 1, i32 2} 109!14 = !{!"function_entry_count", i64 0} 110