1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H 10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H 11 #include "ErrorHandling.h" 12 #include "ProfiledBinary.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/Regex.h" 15 #include <fstream> 16 #include <list> 17 #include <map> 18 #include <vector> 19 20 using namespace llvm; 21 using namespace sampleprof; 22 23 namespace llvm { 24 namespace sampleprof { 25 26 // Stream based trace line iterator 27 class TraceStream { 28 std::string CurrentLine; 29 std::ifstream Fin; 30 bool IsAtEoF = false; 31 uint64_t LineNumber = 0; 32 33 public: TraceStream(StringRef Filename)34 TraceStream(StringRef Filename) : Fin(Filename.str()) { 35 if (!Fin.good()) 36 exitWithError("Error read input perf script file", Filename); 37 advance(); 38 } 39 getCurrentLine()40 StringRef getCurrentLine() { 41 assert(!IsAtEoF && "Line iterator reaches the End-of-File!"); 42 return CurrentLine; 43 } 44 getLineNumber()45 uint64_t getLineNumber() { return LineNumber; } 46 isAtEoF()47 bool isAtEoF() { return IsAtEoF; } 48 49 // Read the next line advance()50 void advance() { 51 if (!std::getline(Fin, CurrentLine)) { 52 IsAtEoF = true; 53 return; 54 } 55 LineNumber++; 56 } 57 }; 58 59 // The type of perfscript 60 enum PerfScriptType { 61 PERF_INVILID = 0, 62 PERF_LBR = 1, // Only LBR sample 63 PERF_LBR_STACK = 2, // Hybrid sample including call stack and LBR stack. 64 }; 65 66 // The parsed LBR sample entry. 67 struct LBREntry { 68 uint64_t Source = 0; 69 uint64_t Target = 0; 70 // An artificial branch stands for a series of consecutive branches starting 71 // from the current binary with a transition through external code and 72 // eventually landing back in the current binary. 73 bool IsArtificial = false; LBREntryLBREntry74 LBREntry(uint64_t S, uint64_t T, bool I) 75 : Source(S), Target(T), IsArtificial(I) {} 76 }; 77 78 // The parsed hybrid sample including call stack and LBR stack. 79 struct HybridSample { 80 // Profiled binary that current frame address belongs to 81 ProfiledBinary *Binary; 82 // Call stack recorded in FILO(leaf to root) order 83 std::list<uint64_t> CallStack; 84 // LBR stack recorded in FIFO order 85 SmallVector<LBREntry, 16> LBRStack; 86 87 // Used for sample aggregation 88 bool operator==(const HybridSample &Other) const { 89 if (Other.Binary != Binary) 90 return false; 91 const std::list<uint64_t> &OtherCallStack = Other.CallStack; 92 const SmallVector<LBREntry, 16> &OtherLBRStack = Other.LBRStack; 93 94 if (CallStack.size() != OtherCallStack.size() || 95 LBRStack.size() != OtherLBRStack.size()) 96 return false; 97 98 auto Iter = CallStack.begin(); 99 for (auto Address : OtherCallStack) { 100 if (Address != *Iter++) 101 return false; 102 } 103 104 for (size_t I = 0; I < OtherLBRStack.size(); I++) { 105 if (LBRStack[I].Source != OtherLBRStack[I].Source || 106 LBRStack[I].Target != OtherLBRStack[I].Target) 107 return false; 108 } 109 return true; 110 } 111 }; 112 113 // The state for the unwinder, it doesn't hold the data but only keep the 114 // pointer/index of the data, While unwinding, the CallStack is changed 115 // dynamicially and will be recorded as the context of the sample 116 struct UnwindState { 117 // Profiled binary that current frame address belongs to 118 const ProfiledBinary *Binary; 119 // TODO: switch to use trie for call stack 120 std::list<uint64_t> CallStack; 121 // Used to fall through the LBR stack 122 uint32_t LBRIndex = 0; 123 // Reference to HybridSample.LBRStack 124 const SmallVector<LBREntry, 16> &LBRStack; 125 // Used to iterate the address range 126 InstructionPointer InstPtr; UnwindStateUnwindState127 UnwindState(const HybridSample &Sample) 128 : Binary(Sample.Binary), CallStack(Sample.CallStack), 129 LBRStack(Sample.LBRStack), 130 InstPtr(Sample.Binary, Sample.CallStack.front()) {} 131 validateInitialStateUnwindState132 bool validateInitialState() { 133 uint64_t LBRLeaf = LBRStack[LBRIndex].Target; 134 uint64_t StackLeaf = CallStack.front(); 135 // When we take a stack sample, ideally the sampling distance between the 136 // leaf IP of stack and the last LBR target shouldn't be very large. 137 // Use a heuristic size (0x100) to filter out broken records. 138 if (StackLeaf < LBRLeaf || StackLeaf >= LBRLeaf + 0x100) { 139 WithColor::warning() << "Bogus trace: stack tip = " 140 << format("%#010x", StackLeaf) 141 << ", LBR tip = " << format("%#010x\n", LBRLeaf); 142 return false; 143 } 144 return true; 145 } 146 checkStateConsistencyUnwindState147 void checkStateConsistency() { 148 assert(InstPtr.Address == CallStack.front() && 149 "IP should align with context leaf"); 150 } 151 getExpandedContextStrUnwindState152 std::string getExpandedContextStr() const { 153 return Binary->getExpandedContextStr(CallStack); 154 } getBinaryUnwindState155 const ProfiledBinary *getBinary() const { return Binary; } hasNextLBRUnwindState156 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); } getCurrentLBRSourceUnwindState157 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; } getCurrentLBRTargetUnwindState158 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; } getCurrentLBRUnwindState159 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; } advanceLBRUnwindState160 void advanceLBR() { LBRIndex++; } 161 }; 162 163 // The counter of branch samples for one function indexed by the branch, 164 // which is represented as the source and target offset pair. 165 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; 166 // The counter of range samples for one function indexed by the range, 167 // which is represented as the start and end offset pair. 168 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; 169 // Range sample counters indexed by the context string 170 using ContextRangeCounter = std::unordered_map<std::string, RangeSample>; 171 // Branch sample counters indexed by the context string 172 using ContextBranchCounter = std::unordered_map<std::string, BranchSample>; 173 174 // For Hybrid sample counters 175 struct ContextSampleCounters { 176 ContextRangeCounter RangeCounter; 177 ContextBranchCounter BranchCounter; 178 recordRangeCountContextSampleCounters179 void recordRangeCount(std::string &ContextId, uint64_t Start, uint64_t End, 180 uint64_t Repeat) { 181 RangeCounter[ContextId][{Start, End}] += Repeat; 182 } recordBranchCountContextSampleCounters183 void recordBranchCount(std::string &ContextId, uint64_t Source, 184 uint64_t Target, uint64_t Repeat) { 185 BranchCounter[ContextId][{Source, Target}] += Repeat; 186 } 187 }; 188 189 struct HybridSampleHash { hashCombineHybridSampleHash190 uint64_t hashCombine(uint64_t Hash, uint64_t Value) const { 191 // Simple DJB2 hash 192 return ((Hash << 5) + Hash) + Value; 193 } 194 operatorHybridSampleHash195 uint64_t operator()(const HybridSample &Sample) const { 196 uint64_t Hash = 5381; 197 Hash = hashCombine(Hash, reinterpret_cast<uint64_t>(Sample.Binary)); 198 for (const auto &Value : Sample.CallStack) { 199 Hash = hashCombine(Hash, Value); 200 } 201 for (const auto &Entry : Sample.LBRStack) { 202 Hash = hashCombine(Hash, Entry.Source); 203 Hash = hashCombine(Hash, Entry.Target); 204 } 205 return Hash; 206 } 207 }; 208 209 // After parsing the sample, we record the samples by aggregating them 210 // into this structure and the value is the sample counter. 211 using AggregationCounter = 212 std::unordered_map<HybridSample, uint64_t, HybridSampleHash>; 213 214 /* 215 As in hybrid sample we have a group of LBRs and the most recent sampling call 216 stack, we can walk through those LBRs to infer more call stacks which would be 217 used as context for profile. VirtualUnwinder is the class to do the call stack 218 unwinding based on LBR state. Two types of unwinding are processd here: 219 1) LBR unwinding and 2) linear range unwinding. 220 Specifically, for each LBR entry(can be classified into call, return, regular 221 branch), LBR unwinding will replay the operation by pushing, popping or 222 switching leaf frame towards the call stack and since the initial call stack 223 is most recently sampled, the replay should be in anti-execution order, i.e. for 224 the regular case, pop the call stack when LBR is call, push frame on call stack 225 when LBR is return. After each LBR processed, it also needs to align with the 226 next LBR by going through instructions from previous LBR's target to current 227 LBR's source, which is the linear unwinding. As instruction from linear range 228 can come from different function by inlining, linear unwinding will do the range 229 splitting and record counters by the range with same inline context. Over those 230 unwinding process we will record each call stack as context id and LBR/linear 231 range as sample counter for further CS profile generation. 232 */ 233 class VirtualUnwinder { 234 public: VirtualUnwinder(ContextSampleCounters * Counters)235 VirtualUnwinder(ContextSampleCounters *Counters) : SampleCounters(Counters) {} 236 isCallState(UnwindState & State)237 bool isCallState(UnwindState &State) const { 238 // The tail call frame is always missing here in stack sample, we will 239 // use a specific tail call tracker to infer it. 240 return State.getBinary()->addressIsCall(State.getCurrentLBRSource()); 241 } 242 isReturnState(UnwindState & State)243 bool isReturnState(UnwindState &State) const { 244 // Simply check addressIsReturn, as ret is always reliable, both for 245 // regular call and tail call. 246 return State.getBinary()->addressIsReturn(State.getCurrentLBRSource()); 247 } 248 249 void unwindCall(UnwindState &State); 250 void unwindLinear(UnwindState &State, uint64_t Repeat); 251 void unwindReturn(UnwindState &State); 252 void unwindBranchWithinFrame(UnwindState &State); 253 bool unwind(const HybridSample &Sample, uint64_t Repeat); 254 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State, 255 uint64_t Repeat); 256 void recordBranchCount(const LBREntry &Branch, UnwindState &State, 257 uint64_t Repeat); 258 259 private: 260 ContextSampleCounters *SampleCounters; 261 }; 262 263 // Filename to binary map 264 using BinaryMap = StringMap<ProfiledBinary>; 265 // Address to binary map for fast look-up 266 using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>; 267 // Binary to ContextSampleCounters Map to support multiple binary, we may have 268 // same binary loaded at different addresses, they should share the same sample 269 // counter 270 using BinarySampleCounterMap = 271 std::unordered_map<ProfiledBinary *, ContextSampleCounters>; 272 273 // Load binaries and read perf trace to parse the events and samples 274 class PerfReader { 275 276 public: 277 PerfReader(cl::list<std::string> &BinaryFilenames); 278 279 // Hybrid sample(call stack + LBRs) profile traces are seprated by double line 280 // break, search for that within the first 4k charactors to avoid going 281 // through the whole file. isHybridPerfScript(StringRef FileName)282 static bool isHybridPerfScript(StringRef FileName) { 283 auto BufOrError = MemoryBuffer::getFileOrSTDIN(FileName, 4000); 284 if (!BufOrError) 285 exitWithError(BufOrError.getError(), FileName); 286 auto Buffer = std::move(BufOrError.get()); 287 if (Buffer->getBuffer().find("\n\n") == StringRef::npos) 288 return false; 289 return true; 290 } 291 292 // The parsed MMap event 293 struct MMapEvent { 294 uint64_t PID = 0; 295 uint64_t BaseAddress = 0; 296 uint64_t Size = 0; 297 uint64_t Offset = 0; 298 StringRef BinaryPath; 299 }; 300 301 /// Load symbols and disassemble the code of a give binary. 302 /// Also register the binary in the binary table. 303 /// 304 ProfiledBinary &loadBinary(const StringRef BinaryPath, 305 bool AllowNameConflict = true); 306 void updateBinaryAddress(const MMapEvent &Event); getPerfScriptType()307 PerfScriptType getPerfScriptType() const { return PerfType; } 308 // Entry of the reader to parse multiple perf traces 309 void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames); getBinarySampleCounters()310 const BinarySampleCounterMap &getBinarySampleCounters() const { 311 return BinarySampleCounters; 312 } 313 314 private: 315 /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a 316 /// mapping between the binary name and its memory layout. 317 /// 318 void parseMMap2Event(TraceStream &TraceIt); 319 // Parse perf events/samples and do aggregation 320 void parseAndAggregateTrace(StringRef Filename); 321 // Parse either an MMAP event or a perf sample 322 void parseEventOrSample(TraceStream &TraceIt); 323 // Parse the hybrid sample including the call and LBR line 324 void parseHybridSample(TraceStream &TraceIt); 325 // Extract call stack from the perf trace lines 326 bool extractCallstack(TraceStream &TraceIt, std::list<uint64_t> &CallStack); 327 // Extract LBR stack from one perf trace line 328 bool extractLBRStack(TraceStream &TraceIt, 329 SmallVector<LBREntry, 16> &LBRStack, 330 ProfiledBinary *Binary); 331 void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames); 332 // Post process the profile after trace aggregation, we will do simple range 333 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample). 334 void generateRawProfile(); 335 // Unwind the hybrid samples after aggregration 336 void unwindSamples(); 337 void printUnwinderOutput(); 338 // Helper function for looking up binary in AddressBinaryMap 339 ProfiledBinary *getBinary(uint64_t Address); 340 341 BinaryMap BinaryTable; 342 AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup. 343 344 private: 345 BinarySampleCounterMap BinarySampleCounters; 346 // Samples with the repeating time generated by the perf reader 347 AggregationCounter AggregatedSamples; 348 PerfScriptType PerfType; 349 }; 350 351 } // end namespace sampleprof 352 } // end namespace llvm 353 354 #endif 355