1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H 10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H 11 12 13 #include "llvm/ADT/ArrayRef.h" 14 #include "llvm/DebugInfo/GSYM/FileEntry.h" 15 #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 16 #include "llvm/DebugInfo/GSYM/Header.h" 17 #include "llvm/DebugInfo/GSYM/LineEntry.h" 18 #include "llvm/DebugInfo/GSYM/StringTable.h" 19 #include "llvm/Support/DataExtractor.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/ErrorOr.h" 22 23 #include <inttypes.h> 24 #include <memory> 25 #include <stdint.h> 26 #include <string> 27 #include <vector> 28 29 namespace llvm { 30 class MemoryBuffer; 31 class raw_ostream; 32 33 namespace gsym { 34 35 /// GsymReader is used to read GSYM data from a file or buffer. 36 /// 37 /// This class is optimized for very quick lookups when the endianness matches 38 /// the host system. The Header, address table, address info offsets, and file 39 /// table is designed to be mmap'ed as read only into memory and used without 40 /// any parsing needed. If the endianness doesn't match, we swap these objects 41 /// and tables into GsymReader::SwappedData and then point our header and 42 /// ArrayRefs to this swapped internal data. 43 /// 44 /// GsymReader objects must use one of the static functions to create an 45 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). 46 47 class GsymReader { 48 GsymReader(std::unique_ptr<MemoryBuffer> Buffer); 49 llvm::Error parse(); 50 51 std::unique_ptr<MemoryBuffer> MemBuffer; 52 StringRef GsymBytes; 53 llvm::support::endianness Endian; 54 const Header *Hdr = nullptr; 55 ArrayRef<uint8_t> AddrOffsets; 56 ArrayRef<uint32_t> AddrInfoOffsets; 57 ArrayRef<FileEntry> Files; 58 StringTable StrTab; 59 /// When the GSYM file's endianness doesn't match the host system then 60 /// we must decode all data structures that need to be swapped into 61 /// local storage and set point the ArrayRef objects above to these swapped 62 /// copies. 63 struct SwappedData { 64 Header Hdr; 65 std::vector<uint8_t> AddrOffsets; 66 std::vector<uint32_t> AddrInfoOffsets; 67 std::vector<FileEntry> Files; 68 }; 69 std::unique_ptr<SwappedData> Swap; 70 71 public: 72 GsymReader(GsymReader &&RHS); 73 ~GsymReader(); 74 75 /// Construct a GsymReader from a file on disk. 76 /// 77 /// \param Path The file path the GSYM file to read. 78 /// \returns An expected GsymReader that contains the object or an error 79 /// object that indicates reason for failing to read the GSYM. 80 static llvm::Expected<GsymReader> openFile(StringRef Path); 81 82 /// Construct a GsymReader from a buffer. 83 /// 84 /// \param Bytes A set of bytes that will be copied and owned by the 85 /// returned object on success. 86 /// \returns An expected GsymReader that contains the object or an error 87 /// object that indicates reason for failing to read the GSYM. 88 static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes); 89 90 /// Access the GSYM header. 91 /// \returns A native endian version of the GSYM header. 92 const Header &getHeader() const; 93 94 /// Get the full function info for an address. 95 /// 96 /// This should be called when a client will store a copy of the complete 97 /// FunctionInfo for a given address. For one off lookups, use the lookup() 98 /// function below. 99 /// 100 /// Symbolication server processes might want to parse the entire function 101 /// info for a given address and cache it if the process stays around to 102 /// service many symbolication addresses, like for parsing profiling 103 /// information. 104 /// 105 /// \param Addr A virtual address from the orignal object file to lookup. 106 /// 107 /// \returns An expected FunctionInfo that contains the function info object 108 /// or an error object that indicates reason for failing to lookup the 109 /// address. 110 llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const; 111 112 /// Lookup an address in the a GSYM. 113 /// 114 /// Lookup just the information needed for a specific address \a Addr. This 115 /// function is faster that calling getFunctionInfo() as it will only return 116 /// information that pertains to \a Addr and allows the parsing to skip any 117 /// extra information encoded for other addresses. For example the line table 118 /// parsing can stop when a matching LineEntry has been fouhnd, and the 119 /// InlineInfo can stop parsing early once a match has been found and also 120 /// skip information that doesn't match. This avoids memory allocations and 121 /// is much faster for lookups. 122 /// 123 /// \param Addr A virtual address from the orignal object file to lookup. 124 /// \returns An expected LookupResult that contains only the information 125 /// needed for the current address, or an error object that indicates reason 126 /// for failing to lookup the address. 127 llvm::Expected<LookupResult> lookup(uint64_t Addr) const; 128 129 /// Get a string from the string table. 130 /// 131 /// \param Offset The string table offset for the string to retrieve. 132 /// \returns The string from the strin table. getString(uint32_t Offset)133 StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } 134 135 /// Get the a file entry for the suppplied file index. 136 /// 137 /// Used to convert any file indexes in the FunctionInfo data back into 138 /// files. This function can be used for iteration, but is more commonly used 139 /// for random access when doing lookups. 140 /// 141 /// \param Index An index into the file table. 142 /// \returns An optional FileInfo that will be valid if the file index is 143 /// valid, or llvm::None if the file index is out of bounds, getFile(uint32_t Index)144 Optional<FileEntry> getFile(uint32_t Index) const { 145 if (Index < Files.size()) 146 return Files[Index]; 147 return llvm::None; 148 } 149 150 /// Dump the entire Gsym data contained in this object. 151 /// 152 /// \param OS The output stream to dump to. 153 void dump(raw_ostream &OS); 154 155 /// Dump a FunctionInfo object. 156 /// 157 /// This function will convert any string table indexes and file indexes 158 /// into human readable format. 159 /// 160 /// \param OS The output stream to dump to. 161 /// 162 /// \param FI The object to dump. 163 void dump(raw_ostream &OS, const FunctionInfo &FI); 164 165 /// Dump a LineTable object. 166 /// 167 /// This function will convert any string table indexes and file indexes 168 /// into human readable format. 169 /// 170 /// 171 /// \param OS The output stream to dump to. 172 /// 173 /// \param LT The object to dump. 174 void dump(raw_ostream &OS, const LineTable <); 175 176 /// Dump a InlineInfo object. 177 /// 178 /// This function will convert any string table indexes and file indexes 179 /// into human readable format. 180 /// 181 /// \param OS The output stream to dump to. 182 /// 183 /// \param II The object to dump. 184 /// 185 /// \param Indent The indentation as number of spaces. Used for recurive 186 /// dumping. 187 void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0); 188 189 /// Dump a FileEntry object. 190 /// 191 /// This function will convert any string table indexes into human readable 192 /// format. 193 /// 194 /// \param OS The output stream to dump to. 195 /// 196 /// \param FE The object to dump. 197 void dump(raw_ostream &OS, Optional<FileEntry> FE); 198 199 /// Get the number of addresses in this Gsym file. getNumAddresses()200 uint32_t getNumAddresses() const { 201 return Hdr->NumAddresses; 202 } 203 204 /// Gets an address from the address table. 205 /// 206 /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. 207 /// 208 /// \param Index A index into the address table. 209 /// \returns A resolved virtual address for adddress in the address table 210 /// or llvm::None if Index is out of bounds. 211 Optional<uint64_t> getAddress(size_t Index) const; 212 213 protected: 214 215 /// Get an appropriate address info offsets array. 216 /// 217 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 218 /// byte offsets from the The gsym::Header::BaseAddress. The table is stored 219 /// internally as a array of bytes that are in the correct endianness. When 220 /// we access this table we must get an array that matches those sizes. This 221 /// templatized helper function is used when accessing address offsets in the 222 /// AddrOffsets member variable. 223 /// 224 /// \returns An ArrayRef of an appropriate address offset size. 225 template <class T> ArrayRef<T> getAddrOffsets()226 getAddrOffsets() const { 227 return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()), 228 AddrOffsets.size()/sizeof(T)); 229 } 230 231 /// Get an appropriate address from the address table. 232 /// 233 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 234 /// byte address offsets from the The gsym::Header::BaseAddress. The table is 235 /// stored internally as a array of bytes that are in the correct endianness. 236 /// In order to extract an address from the address table we must access the 237 /// address offset using the correct size and then add it to the BaseAddress 238 /// in the header. 239 /// 240 /// \param Index An index into the AddrOffsets array. 241 /// \returns An virtual address that matches the original object file for the 242 /// address as the specified index, or llvm::None if Index is out of bounds. 243 template <class T> Optional<uint64_t> addressForIndex(size_t Index)244 addressForIndex(size_t Index) const { 245 ArrayRef<T> AIO = getAddrOffsets<T>(); 246 if (Index < AIO.size()) 247 return AIO[Index] + Hdr->BaseAddress; 248 return llvm::None; 249 } 250 /// Lookup an address offset in the AddrOffsets table. 251 /// 252 /// Given an address offset, look it up using a binary search of the 253 /// AddrOffsets table. 254 /// 255 /// \param AddrOffset An address offset, that has already been computed by 256 /// subtracting the gsym::Header::BaseAddress. 257 /// \returns The matching address offset index. This index will be used to 258 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 259 template <class T> getAddressOffsetIndex(const uint64_t AddrOffset)260 llvm::Optional<uint64_t> getAddressOffsetIndex(const uint64_t AddrOffset) const { 261 ArrayRef<T> AIO = getAddrOffsets<T>(); 262 const auto Begin = AIO.begin(); 263 const auto End = AIO.end(); 264 auto Iter = std::lower_bound(Begin, End, AddrOffset); 265 // Watch for addresses that fall between the gsym::Header::BaseAddress and 266 // the first address offset. 267 if (Iter == Begin && AddrOffset < *Begin) 268 return llvm::None; 269 if (Iter == End || AddrOffset < *Iter) 270 --Iter; 271 return std::distance(Begin, Iter); 272 } 273 274 /// Create a GSYM from a memory buffer. 275 /// 276 /// Called by both openFile() and copyBuffer(), this function does all of the 277 /// work of parsing the GSYM file and returning an error. 278 /// 279 /// \param MemBuffer A memory buffer that will transfer ownership into the 280 /// GsymReader. 281 /// \returns An expected GsymReader that contains the object or an error 282 /// object that indicates reason for failing to read the GSYM. 283 static llvm::Expected<llvm::gsym::GsymReader> 284 create(std::unique_ptr<MemoryBuffer> &MemBuffer); 285 286 287 /// Given an address, find the address index. 288 /// 289 /// Binary search the address table and find the matching address index. 290 /// 291 /// \param Addr A virtual address that matches the original object file 292 /// to lookup. 293 /// \returns An index into the address table. This index can be used to 294 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 295 /// Returns an error if the address isn't in the GSYM with details of why. 296 Expected<uint64_t> getAddressIndex(const uint64_t Addr) const; 297 298 /// Given an address index, get the offset for the FunctionInfo. 299 /// 300 /// Looking up an address is done by finding the corresponding address 301 /// index for the address. This index is then used to get the offset of the 302 /// FunctionInfo data that we will decode using this function. 303 /// 304 /// \param Index An index into the address table. 305 /// \returns An optional GSYM data offset for the offset of the FunctionInfo 306 /// that needs to be decoded. 307 Optional<uint64_t> getAddressInfoOffset(size_t Index) const; 308 }; 309 310 } // namespace gsym 311 } // namespace llvm 312 313 #endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H 314