1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "Latency.h"
11
12 #include "Assembler.h"
13 #include "BenchmarkRunner.h"
14 #include "MCInstrDescView.h"
15 #include "PerfHelper.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
20
21 namespace exegesis {
22
hasUnknownOperand(const llvm::MCOperandInfo & OpInfo)23 static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
24 return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN;
25 }
26
27 // FIXME: Handle memory, see PR36905.
hasMemoryOperand(const llvm::MCOperandInfo & OpInfo)28 static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) {
29 return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY;
30 }
31
32 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
33
isInfeasible(const llvm::MCInstrDesc & MCInstrDesc) const34 llvm::Error LatencyBenchmarkRunner::isInfeasible(
35 const llvm::MCInstrDesc &MCInstrDesc) const {
36 if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
37 return llvm::make_error<BenchmarkFailure>(
38 "Infeasible : has unknown operands");
39 if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand))
40 return llvm::make_error<BenchmarkFailure>(
41 "Infeasible : has memory operands");
42 return llvm::Error::success();
43 }
44
45 llvm::Expected<SnippetPrototype>
generateTwoInstructionPrototype(const Instruction & Instr) const46 LatencyBenchmarkRunner::generateTwoInstructionPrototype(
47 const Instruction &Instr) const {
48 std::vector<unsigned> Opcodes;
49 Opcodes.resize(State.getInstrInfo().getNumOpcodes());
50 std::iota(Opcodes.begin(), Opcodes.end(), 0U);
51 std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
52 for (const unsigned OtherOpcode : Opcodes) {
53 if (OtherOpcode == Instr.Description->Opcode)
54 continue;
55 const auto &OtherInstrDesc = State.getInstrInfo().get(OtherOpcode);
56 if (auto E = isInfeasible(OtherInstrDesc)) {
57 llvm::consumeError(std::move(E));
58 continue;
59 }
60 const Instruction OtherInstr(OtherInstrDesc, RATC);
61 const AliasingConfigurations Forward(Instr, OtherInstr);
62 const AliasingConfigurations Back(OtherInstr, Instr);
63 if (Forward.empty() || Back.empty())
64 continue;
65 InstructionInstance ThisII(Instr);
66 InstructionInstance OtherII(OtherInstr);
67 if (!Forward.hasImplicitAliasing())
68 setRandomAliasing(Forward, ThisII, OtherII);
69 if (!Back.hasImplicitAliasing())
70 setRandomAliasing(Back, OtherII, ThisII);
71 SnippetPrototype Prototype;
72 Prototype.Explanation =
73 llvm::formatv("creating cycle through {0}.",
74 State.getInstrInfo().getName(OtherOpcode));
75 Prototype.Snippet.push_back(std::move(ThisII));
76 Prototype.Snippet.push_back(std::move(OtherII));
77 return std::move(Prototype);
78 }
79 return llvm::make_error<BenchmarkFailure>(
80 "Infeasible : Didn't find any scheme to make the instruction serial");
81 }
82
83 llvm::Expected<SnippetPrototype>
generatePrototype(unsigned Opcode) const84 LatencyBenchmarkRunner::generatePrototype(unsigned Opcode) const {
85 const auto &InstrDesc = State.getInstrInfo().get(Opcode);
86 if (auto E = isInfeasible(InstrDesc))
87 return std::move(E);
88 const Instruction Instr(InstrDesc, RATC);
89 if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr))
90 return SelfAliasingPrototype;
91 else
92 llvm::consumeError(SelfAliasingPrototype.takeError());
93 // No self aliasing, trying to create a dependency through another opcode.
94 return generateTwoInstructionPrototype(Instr);
95 }
96
getCounterName() const97 const char *LatencyBenchmarkRunner::getCounterName() const {
98 if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
99 llvm::report_fatal_error("sched model is missing extra processor info!");
100 const char *CounterName = State.getSubtargetInfo()
101 .getSchedModel()
102 .getExtraProcessorInfo()
103 .PfmCounters.CycleCounter;
104 if (!CounterName)
105 llvm::report_fatal_error("sched model does not define a cycle counter");
106 return CounterName;
107 }
108
109 std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction & Function,const unsigned NumRepetitions) const110 LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
111 const unsigned NumRepetitions) const {
112 // Cycle measurements include some overhead from the kernel. Repeat the
113 // measure several times and take the minimum value.
114 constexpr const int NumMeasurements = 30;
115 int64_t MinLatency = std::numeric_limits<int64_t>::max();
116 const char *CounterName = getCounterName();
117 if (!CounterName)
118 llvm::report_fatal_error("could not determine cycle counter name");
119 const pfm::PerfEvent CyclesPerfEvent(CounterName);
120 if (!CyclesPerfEvent.valid())
121 llvm::report_fatal_error("invalid perf event");
122 for (size_t I = 0; I < NumMeasurements; ++I) {
123 pfm::Counter Counter(CyclesPerfEvent);
124 Counter.start();
125 Function();
126 Counter.stop();
127 const int64_t Value = Counter.read();
128 if (Value < MinLatency)
129 MinLatency = Value;
130 }
131 return {{"latency", static_cast<double>(MinLatency) / NumRepetitions, ""}};
132 }
133
134 } // namespace exegesis
135