//===-- Latency.cpp ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Latency.h" #include "Assembler.h" #include "BenchmarkRunner.h" #include "MCInstrDescView.h" #include "PerfHelper.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/FormatVariadic.h" namespace exegesis { static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) { return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN; } // FIXME: Handle memory, see PR36905. static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) { return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY; } LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; llvm::Error LatencyBenchmarkRunner::isInfeasible( const llvm::MCInstrDesc &MCInstrDesc) const { if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand)) return llvm::make_error( "Infeasible : has unknown operands"); if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand)) return llvm::make_error( "Infeasible : has memory operands"); return llvm::Error::success(); } llvm::Expected LatencyBenchmarkRunner::generateTwoInstructionPrototype( const Instruction &Instr) const { std::vector Opcodes; Opcodes.resize(State.getInstrInfo().getNumOpcodes()); std::iota(Opcodes.begin(), Opcodes.end(), 0U); std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator()); for (const unsigned OtherOpcode : Opcodes) { if (OtherOpcode == Instr.Description->Opcode) continue; const auto &OtherInstrDesc = State.getInstrInfo().get(OtherOpcode); if (auto E = isInfeasible(OtherInstrDesc)) { llvm::consumeError(std::move(E)); continue; } const Instruction OtherInstr(OtherInstrDesc, RATC); const AliasingConfigurations Forward(Instr, OtherInstr); const AliasingConfigurations Back(OtherInstr, Instr); if (Forward.empty() || Back.empty()) continue; InstructionInstance ThisII(Instr); InstructionInstance OtherII(OtherInstr); if (!Forward.hasImplicitAliasing()) setRandomAliasing(Forward, ThisII, OtherII); if (!Back.hasImplicitAliasing()) setRandomAliasing(Back, OtherII, ThisII); SnippetPrototype Prototype; Prototype.Explanation = llvm::formatv("creating cycle through {0}.", State.getInstrInfo().getName(OtherOpcode)); Prototype.Snippet.push_back(std::move(ThisII)); Prototype.Snippet.push_back(std::move(OtherII)); return std::move(Prototype); } return llvm::make_error( "Infeasible : Didn't find any scheme to make the instruction serial"); } llvm::Expected LatencyBenchmarkRunner::generatePrototype(unsigned Opcode) const { const auto &InstrDesc = State.getInstrInfo().get(Opcode); if (auto E = isInfeasible(InstrDesc)) return std::move(E); const Instruction Instr(InstrDesc, RATC); if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr)) return SelfAliasingPrototype; else llvm::consumeError(SelfAliasingPrototype.takeError()); // No self aliasing, trying to create a dependency through another opcode. return generateTwoInstructionPrototype(Instr); } const char *LatencyBenchmarkRunner::getCounterName() const { if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo()) llvm::report_fatal_error("sched model is missing extra processor info!"); const char *CounterName = State.getSubtargetInfo() .getSchedModel() .getExtraProcessorInfo() .PfmCounters.CycleCounter; if (!CounterName) llvm::report_fatal_error("sched model does not define a cycle counter"); return CounterName; } std::vector LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, const unsigned NumRepetitions) const { // Cycle measurements include some overhead from the kernel. Repeat the // measure several times and take the minimum value. constexpr const int NumMeasurements = 30; int64_t MinLatency = std::numeric_limits::max(); const char *CounterName = getCounterName(); if (!CounterName) llvm::report_fatal_error("could not determine cycle counter name"); const pfm::PerfEvent CyclesPerfEvent(CounterName); if (!CyclesPerfEvent.valid()) llvm::report_fatal_error("invalid perf event"); for (size_t I = 0; I < NumMeasurements; ++I) { pfm::Counter Counter(CyclesPerfEvent); Counter.start(); Function(); Counter.stop(); const int64_t Value = Counter.read(); if (Value < MinLatency) MinLatency = Value; } return {{"latency", static_cast(MinLatency) / NumRepetitions, ""}}; } } // namespace exegesis