• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- Trace.cpp - XRay Trace Loading implementation. ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // XRay log reader implementation.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/XRay/Trace.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Support/DataExtractor.h"
16 #include "llvm/Support/Error.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/XRay/YAMLXRayRecord.h"
19 
20 using namespace llvm;
21 using namespace llvm::xray;
22 using llvm::yaml::Input;
23 
24 namespace {
25 using XRayRecordStorage =
26     std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
27 
28 // Populates the FileHeader reference by reading the first 32 bytes of the file.
readBinaryFormatHeader(StringRef Data,XRayFileHeader & FileHeader)29 Error readBinaryFormatHeader(StringRef Data, XRayFileHeader &FileHeader) {
30   // FIXME: Maybe deduce whether the data is little or big-endian using some
31   // magic bytes in the beginning of the file?
32 
33   // First 32 bytes of the file will always be the header. We assume a certain
34   // format here:
35   //
36   //   (2)   uint16 : version
37   //   (2)   uint16 : type
38   //   (4)   uint32 : bitfield
39   //   (8)   uint64 : cycle frequency
40   //   (16)  -      : padding
41 
42   DataExtractor HeaderExtractor(Data, true, 8);
43   uint32_t OffsetPtr = 0;
44   FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr);
45   FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
46   uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr);
47   FileHeader.ConstantTSC = Bitfield & 1uL;
48   FileHeader.NonstopTSC = Bitfield & 1uL << 1;
49   FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr);
50   std::memcpy(&FileHeader.FreeFormData, Data.bytes_begin() + OffsetPtr, 16);
51   if (FileHeader.Version != 1 && FileHeader.Version != 2 &&
52       FileHeader.Version != 3)
53     return make_error<StringError>(
54         Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
55         std::make_error_code(std::errc::invalid_argument));
56   return Error::success();
57 }
58 
loadNaiveFormatLog(StringRef Data,XRayFileHeader & FileHeader,std::vector<XRayRecord> & Records)59 Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
60                          std::vector<XRayRecord> &Records) {
61   if (Data.size() < 32)
62     return make_error<StringError>(
63         "Not enough bytes for an XRay log.",
64         std::make_error_code(std::errc::invalid_argument));
65 
66   if (Data.size() - 32 == 0 || Data.size() % 32 != 0)
67     return make_error<StringError>(
68         "Invalid-sized XRay data.",
69         std::make_error_code(std::errc::invalid_argument));
70 
71   if (auto E = readBinaryFormatHeader(Data, FileHeader))
72     return E;
73 
74   // Each record after the header will be 32 bytes, in the following format:
75   //
76   //   (2)   uint16 : record type
77   //   (1)   uint8  : cpu id
78   //   (1)   uint8  : type
79   //   (4)   sint32 : function id
80   //   (8)   uint64 : tsc
81   //   (4)   uint32 : thread id
82   //   (4)   uint32 : process id
83   //   (8)   -      : padding
84   for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(32)) {
85     DataExtractor RecordExtractor(S, true, 8);
86     uint32_t OffsetPtr = 0;
87     switch (auto RecordType = RecordExtractor.getU16(&OffsetPtr)) {
88     case 0: { // Normal records.
89       Records.emplace_back();
90       auto &Record = Records.back();
91       Record.RecordType = RecordType;
92       Record.CPU = RecordExtractor.getU8(&OffsetPtr);
93       auto Type = RecordExtractor.getU8(&OffsetPtr);
94       switch (Type) {
95       case 0:
96         Record.Type = RecordTypes::ENTER;
97         break;
98       case 1:
99         Record.Type = RecordTypes::EXIT;
100         break;
101       case 2:
102         Record.Type = RecordTypes::TAIL_EXIT;
103         break;
104       case 3:
105         Record.Type = RecordTypes::ENTER_ARG;
106         break;
107       default:
108         return make_error<StringError>(
109             Twine("Unknown record type '") + Twine(int{Type}) + "'",
110             std::make_error_code(std::errc::executable_format_error));
111       }
112       Record.FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t));
113       Record.TSC = RecordExtractor.getU64(&OffsetPtr);
114       Record.TId = RecordExtractor.getU32(&OffsetPtr);
115       Record.PId = RecordExtractor.getU32(&OffsetPtr);
116       break;
117     }
118     case 1: { // Arg payload record.
119       auto &Record = Records.back();
120       // Advance two bytes to avoid padding.
121       OffsetPtr += 2;
122       int32_t FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t));
123       auto TId = RecordExtractor.getU32(&OffsetPtr);
124       auto PId = RecordExtractor.getU32(&OffsetPtr);
125 
126       // Make a check for versions above 3 for the Pid field
127       if (Record.FuncId != FuncId || Record.TId != TId ||
128           (FileHeader.Version >= 3 ? Record.PId != PId : false))
129         return make_error<StringError>(
130             Twine("Corrupted log, found arg payload following non-matching "
131                   "function + thread record. Record for function ") +
132                 Twine(Record.FuncId) + " != " + Twine(FuncId) + "; offset: " +
133                 Twine(S.data() - Data.data()),
134             std::make_error_code(std::errc::executable_format_error));
135 
136       auto Arg = RecordExtractor.getU64(&OffsetPtr);
137       Record.CallArgs.push_back(Arg);
138       break;
139     }
140     default:
141       return make_error<StringError>(
142           Twine("Unknown record type == ") + Twine(RecordType),
143           std::make_error_code(std::errc::executable_format_error));
144     }
145   }
146   return Error::success();
147 }
148 
149 /// When reading from a Flight Data Recorder mode log, metadata records are
150 /// sparse compared to packed function records, so we must maintain state as we
151 /// read through the sequence of entries. This allows the reader to denormalize
152 /// the CPUId and Thread Id onto each Function Record and transform delta
153 /// encoded TSC values into absolute encodings on each record.
154 struct FDRState {
155   uint16_t CPUId;
156   uint16_t ThreadId;
157   int32_t ProcessId;
158   uint64_t BaseTSC;
159 
160   /// Encode some of the state transitions for the FDR log reader as explicit
161   /// checks. These are expectations for the next Record in the stream.
162   enum class Token {
163     NEW_BUFFER_RECORD_OR_EOF,
164     WALLCLOCK_RECORD,
165     NEW_CPU_ID_RECORD,
166     FUNCTION_SEQUENCE,
167     SCAN_TO_END_OF_THREAD_BUF,
168     CUSTOM_EVENT_DATA,
169     CALL_ARGUMENT,
170     BUFFER_EXTENTS,
171     PID_RECORD,
172   };
173   Token Expects;
174 
175   // Each threads buffer may have trailing garbage to scan over, so we track our
176   // progress.
177   uint64_t CurrentBufferSize;
178   uint64_t CurrentBufferConsumed;
179 };
180 
fdrStateToTwine(const FDRState::Token & state)181 const char *fdrStateToTwine(const FDRState::Token &state) {
182   switch (state) {
183   case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
184     return "NEW_BUFFER_RECORD_OR_EOF";
185   case FDRState::Token::WALLCLOCK_RECORD:
186     return "WALLCLOCK_RECORD";
187   case FDRState::Token::NEW_CPU_ID_RECORD:
188     return "NEW_CPU_ID_RECORD";
189   case FDRState::Token::FUNCTION_SEQUENCE:
190     return "FUNCTION_SEQUENCE";
191   case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF:
192     return "SCAN_TO_END_OF_THREAD_BUF";
193   case FDRState::Token::CUSTOM_EVENT_DATA:
194     return "CUSTOM_EVENT_DATA";
195   case FDRState::Token::CALL_ARGUMENT:
196     return "CALL_ARGUMENT";
197   case FDRState::Token::BUFFER_EXTENTS:
198     return "BUFFER_EXTENTS";
199   case FDRState::Token::PID_RECORD:
200     return "PID_RECORD";
201   }
202   return "UNKNOWN";
203 }
204 
205 /// State transition when a NewBufferRecord is encountered.
processFDRNewBufferRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor)206 Error processFDRNewBufferRecord(FDRState &State, uint8_t RecordFirstByte,
207                                 DataExtractor &RecordExtractor) {
208 
209   if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
210     return make_error<StringError>(
211         Twine("Malformed log. Read New Buffer record kind out of sequence; "
212               "expected: ") +
213             fdrStateToTwine(State.Expects),
214         std::make_error_code(std::errc::executable_format_error));
215   uint32_t OffsetPtr = 1; // 1 byte into record.
216   State.ThreadId = RecordExtractor.getU16(&OffsetPtr);
217   State.Expects = FDRState::Token::WALLCLOCK_RECORD;
218   return Error::success();
219 }
220 
221 /// State transition when an EndOfBufferRecord is encountered.
processFDREndOfBufferRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor)222 Error processFDREndOfBufferRecord(FDRState &State, uint8_t RecordFirstByte,
223                                   DataExtractor &RecordExtractor) {
224   if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
225     return make_error<StringError>(
226         Twine("Malformed log. Received EOB message without current buffer; "
227               "expected: ") +
228             fdrStateToTwine(State.Expects),
229         std::make_error_code(std::errc::executable_format_error));
230   State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF;
231   return Error::success();
232 }
233 
234 /// State transition when a NewCPUIdRecord is encountered.
processFDRNewCPUIdRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor)235 Error processFDRNewCPUIdRecord(FDRState &State, uint8_t RecordFirstByte,
236                                DataExtractor &RecordExtractor) {
237   if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE &&
238       State.Expects != FDRState::Token::NEW_CPU_ID_RECORD)
239     return make_error<StringError>(
240         Twine("Malformed log. Read NewCPUId record kind out of sequence; "
241               "expected: ") +
242             fdrStateToTwine(State.Expects),
243         std::make_error_code(std::errc::executable_format_error));
244   uint32_t OffsetPtr = 1; // Read starting after the first byte.
245   State.CPUId = RecordExtractor.getU16(&OffsetPtr);
246   State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
247   State.Expects = FDRState::Token::FUNCTION_SEQUENCE;
248   return Error::success();
249 }
250 
251 /// State transition when a TSCWrapRecord (overflow detection) is encountered.
processFDRTSCWrapRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor)252 Error processFDRTSCWrapRecord(FDRState &State, uint8_t RecordFirstByte,
253                               DataExtractor &RecordExtractor) {
254   if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE)
255     return make_error<StringError>(
256         Twine("Malformed log. Read TSCWrap record kind out of sequence; "
257               "expecting: ") +
258             fdrStateToTwine(State.Expects),
259         std::make_error_code(std::errc::executable_format_error));
260   uint32_t OffsetPtr = 1; // Read starting after the first byte.
261   State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
262   return Error::success();
263 }
264 
265 /// State transition when a WallTimeMarkerRecord is encountered.
processFDRWallTimeRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor)266 Error processFDRWallTimeRecord(FDRState &State, uint8_t RecordFirstByte,
267                                DataExtractor &RecordExtractor) {
268   if (State.Expects != FDRState::Token::WALLCLOCK_RECORD)
269     return make_error<StringError>(
270         Twine("Malformed log. Read Wallclock record kind out of sequence; "
271               "expecting: ") +
272             fdrStateToTwine(State.Expects),
273         std::make_error_code(std::errc::executable_format_error));
274 
275   // TODO: Someday, reconcile the TSC ticks to wall clock time for presentation
276   // purposes. For now, we're ignoring these records.
277   State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
278   return Error::success();
279 }
280 
281 /// State transition when a PidRecord is encountered.
processFDRPidRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor)282 Error processFDRPidRecord(FDRState &State, uint8_t RecordFirstByte,
283                           DataExtractor &RecordExtractor) {
284 
285   if (State.Expects != FDRState::Token::PID_RECORD)
286     return make_error<StringError>(
287         Twine("Malformed log. Read Pid record kind out of sequence; "
288               "expected: ") +
289             fdrStateToTwine(State.Expects),
290         std::make_error_code(std::errc::executable_format_error));
291 
292   uint32_t OffsetPtr = 1; // Read starting after the first byte.
293   State.ProcessId = RecordExtractor.getU32(&OffsetPtr);
294   State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
295   return Error::success();
296 }
297 
298 /// State transition when a CustomEventMarker is encountered.
processCustomEventMarker(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor,size_t & RecordSize)299 Error processCustomEventMarker(FDRState &State, uint8_t RecordFirstByte,
300                                DataExtractor &RecordExtractor,
301                                size_t &RecordSize) {
302   // We can encounter a CustomEventMarker anywhere in the log, so we can handle
303   // it regardless of the expectation. However, we do set the expectation to
304   // read a set number of fixed bytes, as described in the metadata.
305   uint32_t OffsetPtr = 1; // Read after the first byte.
306   uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr);
307   uint64_t TSC = RecordExtractor.getU64(&OffsetPtr);
308 
309   // FIXME: Actually represent the record through the API. For now we only
310   // skip through the data.
311   (void)TSC;
312   RecordSize = 16 + DataSize;
313   return Error::success();
314 }
315 
316 /// State transition when an BufferExtents record is encountered.
processBufferExtents(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor)317 Error processBufferExtents(FDRState &State, uint8_t RecordFirstByte,
318                            DataExtractor &RecordExtractor) {
319   if (State.Expects != FDRState::Token::BUFFER_EXTENTS)
320     return make_error<StringError>(
321         Twine("Malformed log. Buffer Extents unexpected; expected: ") +
322             fdrStateToTwine(State.Expects),
323         std::make_error_code(std::errc::executable_format_error));
324   uint32_t OffsetPtr = 1; // Read after the first byte.
325   State.CurrentBufferSize = RecordExtractor.getU64(&OffsetPtr);
326   State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
327   return Error::success();
328 }
329 
330 /// State transition when a CallArgumentRecord is encountered.
processFDRCallArgumentRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor,std::vector<XRayRecord> & Records)331 Error processFDRCallArgumentRecord(FDRState &State, uint8_t RecordFirstByte,
332                                    DataExtractor &RecordExtractor,
333                                    std::vector<XRayRecord> &Records) {
334   uint32_t OffsetPtr = 1; // Read starting after the first byte.
335   auto &Enter = Records.back();
336 
337   if (Enter.Type != RecordTypes::ENTER)
338     return make_error<StringError>(
339         "CallArgument needs to be right after a function entry",
340         std::make_error_code(std::errc::executable_format_error));
341   Enter.Type = RecordTypes::ENTER_ARG;
342   Enter.CallArgs.emplace_back(RecordExtractor.getU64(&OffsetPtr));
343   return Error::success();
344 }
345 
346 /// Advances the state machine for reading the FDR record type by reading one
347 /// Metadata Record and updating the State appropriately based on the kind of
348 /// record encountered. The RecordKind is encoded in the first byte of the
349 /// Record, which the caller should pass in because they have already read it
350 /// to determine that this is a metadata record as opposed to a function record.
351 ///
352 /// Beginning with Version 2 of the FDR log, we do not depend on the size of the
353 /// buffer, but rather use the extents to determine how far to read in the log
354 /// for this particular buffer.
355 ///
356 /// In Version 3, FDR log now includes a pid metadata record after
357 /// WallTimeMarker
processFDRMetadataRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor,size_t & RecordSize,std::vector<XRayRecord> & Records,uint16_t Version)358 Error processFDRMetadataRecord(FDRState &State, uint8_t RecordFirstByte,
359                                DataExtractor &RecordExtractor,
360                                size_t &RecordSize,
361                                std::vector<XRayRecord> &Records,
362                                uint16_t Version) {
363   // The remaining 7 bits are the RecordKind enum.
364   uint8_t RecordKind = RecordFirstByte >> 1;
365   switch (RecordKind) {
366   case 0: // NewBuffer
367     if (auto E =
368             processFDRNewBufferRecord(State, RecordFirstByte, RecordExtractor))
369       return E;
370     break;
371   case 1: // EndOfBuffer
372     if (Version >= 2)
373       return make_error<StringError>(
374           "Since Version 2 of FDR logging, we no longer support EOB records.",
375           std::make_error_code(std::errc::executable_format_error));
376     if (auto E = processFDREndOfBufferRecord(State, RecordFirstByte,
377                                              RecordExtractor))
378       return E;
379     break;
380   case 2: // NewCPUId
381     if (auto E =
382             processFDRNewCPUIdRecord(State, RecordFirstByte, RecordExtractor))
383       return E;
384     break;
385   case 3: // TSCWrap
386     if (auto E =
387             processFDRTSCWrapRecord(State, RecordFirstByte, RecordExtractor))
388       return E;
389     break;
390   case 4: // WallTimeMarker
391     if (auto E =
392             processFDRWallTimeRecord(State, RecordFirstByte, RecordExtractor))
393       return E;
394     // In Version 3 and and above, a PidRecord is expected after WallTimeRecord
395     if (Version >= 3)
396       State.Expects = FDRState::Token::PID_RECORD;
397     break;
398   case 5: // CustomEventMarker
399     if (auto E = processCustomEventMarker(State, RecordFirstByte,
400                                           RecordExtractor, RecordSize))
401       return E;
402     break;
403   case 6: // CallArgument
404     if (auto E = processFDRCallArgumentRecord(State, RecordFirstByte,
405                                               RecordExtractor, Records))
406       return E;
407     break;
408   case 7: // BufferExtents
409     if (auto E = processBufferExtents(State, RecordFirstByte, RecordExtractor))
410       return E;
411     break;
412   case 9: // Pid
413     if (auto E = processFDRPidRecord(State, RecordFirstByte, RecordExtractor))
414       return E;
415     break;
416   default:
417     // Widen the record type to uint16_t to prevent conversion to char.
418     return make_error<StringError>(
419         Twine("Illegal metadata record type: ")
420             .concat(Twine(static_cast<unsigned>(RecordKind))),
421         std::make_error_code(std::errc::executable_format_error));
422   }
423   return Error::success();
424 }
425 
426 /// Reads a function record from an FDR format log, appending a new XRayRecord
427 /// to the vector being populated and updating the State with a new value
428 /// reference value to interpret TSC deltas.
429 ///
430 /// The XRayRecord constructed includes information from the function record
431 /// processed here as well as Thread ID and CPU ID formerly extracted into
432 /// State.
processFDRFunctionRecord(FDRState & State,uint8_t RecordFirstByte,DataExtractor & RecordExtractor,std::vector<XRayRecord> & Records)433 Error processFDRFunctionRecord(FDRState &State, uint8_t RecordFirstByte,
434                                DataExtractor &RecordExtractor,
435                                std::vector<XRayRecord> &Records) {
436   switch (State.Expects) {
437   case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
438     return make_error<StringError>(
439         "Malformed log. Received Function Record before new buffer setup.",
440         std::make_error_code(std::errc::executable_format_error));
441   case FDRState::Token::WALLCLOCK_RECORD:
442     return make_error<StringError>(
443         "Malformed log. Received Function Record when expecting wallclock.",
444         std::make_error_code(std::errc::executable_format_error));
445   case FDRState::Token::PID_RECORD:
446     return make_error<StringError>(
447         "Malformed log. Received Function Record when expecting pid.",
448         std::make_error_code(std::errc::executable_format_error));
449   case FDRState::Token::NEW_CPU_ID_RECORD:
450     return make_error<StringError>(
451         "Malformed log. Received Function Record before first CPU record.",
452         std::make_error_code(std::errc::executable_format_error));
453   default:
454     Records.emplace_back();
455     auto &Record = Records.back();
456     Record.RecordType = 0; // Record is type NORMAL.
457     // Strip off record type bit and use the next three bits.
458     uint8_t RecordType = (RecordFirstByte >> 1) & 0x07;
459     switch (RecordType) {
460     case static_cast<uint8_t>(RecordTypes::ENTER):
461       Record.Type = RecordTypes::ENTER;
462       break;
463     case static_cast<uint8_t>(RecordTypes::EXIT):
464       Record.Type = RecordTypes::EXIT;
465       break;
466     case static_cast<uint8_t>(RecordTypes::TAIL_EXIT):
467       Record.Type = RecordTypes::TAIL_EXIT;
468       break;
469     default:
470       // Cast to an unsigned integer to not interpret the record type as a char.
471       return make_error<StringError>(
472           Twine("Illegal function record type: ")
473               .concat(Twine(static_cast<unsigned>(RecordType))),
474           std::make_error_code(std::errc::executable_format_error));
475     }
476     Record.CPU = State.CPUId;
477     Record.TId = State.ThreadId;
478     Record.PId = State.ProcessId;
479     // Back up to read first 32 bits, including the 4 we pulled RecordType
480     // and RecordKind out of. The remaining 28 are FunctionId.
481     uint32_t OffsetPtr = 0;
482     // Despite function Id being a signed int on XRayRecord,
483     // when it is written to an FDR format, the top bits are truncated,
484     // so it is effectively an unsigned value. When we shift off the
485     // top four bits, we want the shift to be logical, so we read as
486     // uint32_t.
487     uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr);
488     Record.FuncId = FuncIdBitField >> 4;
489     // FunctionRecords have a 32 bit delta from the previous absolute TSC
490     // or TSC delta. If this would overflow, we should read a TSCWrap record
491     // with an absolute TSC reading.
492     uint64_t NewTSC = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr);
493     State.BaseTSC = NewTSC;
494     Record.TSC = NewTSC;
495   }
496   return Error::success();
497 }
498 
499 /// Reads a log in FDR mode for version 1 of this binary format. FDR mode is
500 /// defined as part of the compiler-rt project in xray_fdr_logging.h, and such
501 /// a log consists of the familiar 32 bit XRayHeader, followed by sequences of
502 /// of interspersed 16 byte Metadata Records and 8 byte Function Records.
503 ///
504 /// The following is an attempt to document the grammar of the format, which is
505 /// parsed by this function for little-endian machines. Since the format makes
506 /// use of BitFields, when we support big-endian architectures, we will need to
507 /// adjust not only the endianness parameter to llvm's RecordExtractor, but also
508 /// the bit twiddling logic, which is consistent with the little-endian
509 /// convention that BitFields within a struct will first be packed into the
510 /// least significant bits the address they belong to.
511 ///
512 /// We expect a format complying with the grammar in the following pseudo-EBNF
513 /// in Version 1 of the FDR log.
514 ///
515 /// FDRLog: XRayFileHeader ThreadBuffer*
516 /// XRayFileHeader: 32 bytes to identify the log as FDR with machine metadata.
517 ///     Includes BufferSize
518 /// ThreadBuffer: NewBuffer WallClockTime NewCPUId FunctionSequence EOB
519 /// BufSize: 8 byte unsigned integer indicating how large the buffer is.
520 /// NewBuffer: 16 byte metadata record with Thread Id.
521 /// WallClockTime: 16 byte metadata record with human readable time.
522 /// Pid: 16 byte metadata record with Pid
523 /// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading.
524 /// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize.
525 /// FunctionSequence: NewCPUId | TSCWrap | FunctionRecord
526 /// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading.
527 /// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta.
528 ///
529 /// In Version 2, we make the following changes:
530 ///
531 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime NewCPUId
532 ///               FunctionSequence
533 /// BufferExtents: 16 byte metdata record describing how many usable bytes are
534 ///                in the buffer. This is measured from the start of the buffer
535 ///                and must always be at least 48 (bytes).
536 ///
537 /// In Version 3, we make the following changes:
538 ///
539 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId
540 ///               FunctionSequence
541 /// EOB: *deprecated*
loadFDRLog(StringRef Data,XRayFileHeader & FileHeader,std::vector<XRayRecord> & Records)542 Error loadFDRLog(StringRef Data, XRayFileHeader &FileHeader,
543                  std::vector<XRayRecord> &Records) {
544   if (Data.size() < 32)
545     return make_error<StringError>(
546         "Not enough bytes for an XRay log.",
547         std::make_error_code(std::errc::invalid_argument));
548 
549   // For an FDR log, there are records sized 16 and 8 bytes.
550   // There actually may be no records if no non-trivial functions are
551   // instrumented.
552   if (Data.size() % 8 != 0)
553     return make_error<StringError>(
554         "Invalid-sized XRay data.",
555         std::make_error_code(std::errc::invalid_argument));
556 
557   if (auto E = readBinaryFormatHeader(Data, FileHeader))
558     return E;
559 
560   uint64_t BufferSize = 0;
561   {
562     StringRef ExtraDataRef(FileHeader.FreeFormData, 16);
563     DataExtractor ExtraDataExtractor(ExtraDataRef, true, 8);
564     uint32_t ExtraDataOffset = 0;
565     BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset);
566   }
567 
568   FDRState::Token InitialExpectation;
569   switch (FileHeader.Version) {
570   case 1:
571     InitialExpectation = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
572     break;
573   case 2:
574   case 3:
575     InitialExpectation = FDRState::Token::BUFFER_EXTENTS;
576     break;
577   default:
578     return make_error<StringError>(
579         Twine("Unsupported version '") + Twine(FileHeader.Version) + "'",
580         std::make_error_code(std::errc::executable_format_error));
581   }
582   FDRState State{0, 0, 0, 0, InitialExpectation, BufferSize, 0};
583 
584   // RecordSize will tell the loop how far to seek ahead based on the record
585   // type that we have just read.
586   size_t RecordSize = 0;
587   for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(RecordSize)) {
588     DataExtractor RecordExtractor(S, true, 8);
589     uint32_t OffsetPtr = 0;
590     if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) {
591       RecordSize = State.CurrentBufferSize - State.CurrentBufferConsumed;
592       if (S.size() < RecordSize) {
593         return make_error<StringError>(
594             Twine("Incomplete thread buffer. Expected at least ") +
595                 Twine(RecordSize) + " bytes but found " + Twine(S.size()),
596             make_error_code(std::errc::invalid_argument));
597       }
598       State.CurrentBufferConsumed = 0;
599       State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
600       continue;
601     }
602     uint8_t BitField = RecordExtractor.getU8(&OffsetPtr);
603     bool isMetadataRecord = BitField & 0x01uL;
604     bool isBufferExtents =
605         (BitField >> 1) == 7; // BufferExtents record kind == 7
606     if (isMetadataRecord) {
607       RecordSize = 16;
608       if (auto E =
609               processFDRMetadataRecord(State, BitField, RecordExtractor,
610                                        RecordSize, Records, FileHeader.Version))
611         return E;
612     } else { // Process Function Record
613       RecordSize = 8;
614       if (auto E = processFDRFunctionRecord(State, BitField, RecordExtractor,
615                                             Records))
616         return E;
617     }
618 
619     // The BufferExtents record is technically not part of the buffer, so we
620     // don't count the size of that record against the buffer's actual size.
621     if (!isBufferExtents)
622       State.CurrentBufferConsumed += RecordSize;
623     assert(State.CurrentBufferConsumed <= State.CurrentBufferSize);
624     if ((FileHeader.Version == 2 || FileHeader.Version == 3) &&
625         State.CurrentBufferSize == State.CurrentBufferConsumed) {
626       // In Version 2 of the log, we don't need to scan to the end of the thread
627       // buffer if we've already consumed all the bytes we need to.
628       State.Expects = FDRState::Token::BUFFER_EXTENTS;
629       State.CurrentBufferSize = BufferSize;
630       State.CurrentBufferConsumed = 0;
631     }
632   }
633 
634   // Having iterated over everything we've been given, we've either consumed
635   // everything and ended up in the end state, or were told to skip the rest.
636   bool Finished = State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF &&
637                   State.CurrentBufferSize == State.CurrentBufferConsumed;
638   if ((State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF &&
639        State.Expects != FDRState::Token::BUFFER_EXTENTS) &&
640       !Finished)
641     return make_error<StringError>(
642         Twine("Encountered EOF with unexpected state expectation ") +
643             fdrStateToTwine(State.Expects) +
644             ". Remaining expected bytes in thread buffer total " +
645             Twine(State.CurrentBufferSize - State.CurrentBufferConsumed),
646         std::make_error_code(std::errc::executable_format_error));
647 
648   return Error::success();
649 }
650 
loadYAMLLog(StringRef Data,XRayFileHeader & FileHeader,std::vector<XRayRecord> & Records)651 Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
652                   std::vector<XRayRecord> &Records) {
653   YAMLXRayTrace Trace;
654   Input In(Data);
655   In >> Trace;
656   if (In.error())
657     return make_error<StringError>("Failed loading YAML Data.", In.error());
658 
659   FileHeader.Version = Trace.Header.Version;
660   FileHeader.Type = Trace.Header.Type;
661   FileHeader.ConstantTSC = Trace.Header.ConstantTSC;
662   FileHeader.NonstopTSC = Trace.Header.NonstopTSC;
663   FileHeader.CycleFrequency = Trace.Header.CycleFrequency;
664 
665   if (FileHeader.Version != 1)
666     return make_error<StringError>(
667         Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
668         std::make_error_code(std::errc::invalid_argument));
669 
670   Records.clear();
671   std::transform(Trace.Records.begin(), Trace.Records.end(),
672                  std::back_inserter(Records), [&](const YAMLXRayRecord &R) {
673                    return XRayRecord{R.RecordType, R.CPU, R.Type, R.FuncId,
674                                      R.TSC,        R.TId, R.PId,  R.CallArgs};
675                  });
676   return Error::success();
677 }
678 } // namespace
679 
loadTraceFile(StringRef Filename,bool Sort)680 Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
681   int Fd;
682   if (auto EC = sys::fs::openFileForRead(Filename, Fd)) {
683     return make_error<StringError>(
684         Twine("Cannot read log from '") + Filename + "'", EC);
685   }
686 
687   uint64_t FileSize;
688   if (auto EC = sys::fs::file_size(Filename, FileSize)) {
689     return make_error<StringError>(
690         Twine("Cannot read log from '") + Filename + "'", EC);
691   }
692   if (FileSize < 4) {
693     return make_error<StringError>(
694         Twine("File '") + Filename + "' too small for XRay.",
695         std::make_error_code(std::errc::executable_format_error));
696   }
697 
698   // Map the opened file into memory and use a StringRef to access it later.
699   std::error_code EC;
700   sys::fs::mapped_file_region MappedFile(
701       Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
702   if (EC) {
703     return make_error<StringError>(
704         Twine("Cannot read log from '") + Filename + "'", EC);
705   }
706   auto Data = StringRef(MappedFile.data(), MappedFile.size());
707 
708   // Attempt to detect the file type using file magic. We have a slight bias
709   // towards the binary format, and we do this by making sure that the first 4
710   // bytes of the binary file is some combination of the following byte
711   // patterns: (observe the code loading them assumes they're little endian)
712   //
713   //   0x01 0x00 0x00 0x00 - version 1, "naive" format
714   //   0x01 0x00 0x01 0x00 - version 1, "flight data recorder" format
715   //   0x02 0x00 0x01 0x00 - version 2, "flight data recorder" format
716   //
717   // YAML files don't typically have those first four bytes as valid text so we
718   // try loading assuming YAML if we don't find these bytes.
719   //
720   // Only if we can't load either the binary or the YAML format will we yield an
721   // error.
722   StringRef Magic(MappedFile.data(), 4);
723   DataExtractor HeaderExtractor(Magic, true, 8);
724   uint32_t OffsetPtr = 0;
725   uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
726   uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
727 
728   enum BinaryFormatType { NAIVE_FORMAT = 0, FLIGHT_DATA_RECORDER_FORMAT = 1 };
729 
730   Trace T;
731   switch (Type) {
732   case NAIVE_FORMAT:
733     if (Version == 1 || Version == 2 || Version == 3) {
734       if (auto E = loadNaiveFormatLog(Data, T.FileHeader, T.Records))
735         return std::move(E);
736     } else {
737       return make_error<StringError>(
738           Twine("Unsupported version for Basic/Naive Mode logging: ") +
739               Twine(Version),
740           std::make_error_code(std::errc::executable_format_error));
741     }
742     break;
743   case FLIGHT_DATA_RECORDER_FORMAT:
744     if (Version == 1 || Version == 2 || Version == 3) {
745       if (auto E = loadFDRLog(Data, T.FileHeader, T.Records))
746         return std::move(E);
747     } else {
748       return make_error<StringError>(
749           Twine("Unsupported version for FDR Mode logging: ") + Twine(Version),
750           std::make_error_code(std::errc::executable_format_error));
751     }
752     break;
753   default:
754     if (auto E = loadYAMLLog(Data, T.FileHeader, T.Records))
755       return std::move(E);
756   }
757 
758   if (Sort)
759     std::stable_sort(T.Records.begin(), T.Records.end(),
760               [&](const XRayRecord &L, const XRayRecord &R) {
761                 return L.TSC < R.TSC;
762               });
763 
764   return std::move(T);
765 }
766