1 //===- GsymCreator.h --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H 10 #define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H 11 12 #include <functional> 13 #include <memory> 14 #include <mutex> 15 #include <string> 16 #include <thread> 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/DebugInfo/GSYM/FileEntry.h" 21 #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 22 #include "llvm/DebugInfo/GSYM/Range.h" 23 #include "llvm/MC/StringTableBuilder.h" 24 #include "llvm/Support/Endian.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/Path.h" 27 28 namespace llvm { 29 30 namespace gsym { 31 class FileWriter; 32 33 /// GsymCreator is used to emit GSYM data to a stand alone file or section 34 /// within a file. 35 /// 36 /// The GsymCreator is designed to be used in 3 stages: 37 /// - Create FunctionInfo objects and add them 38 /// - Finalize the GsymCreator object 39 /// - Save to file or section 40 /// 41 /// The first stage involves creating FunctionInfo objects from another source 42 /// of information like compiler debug info metadata, DWARF or Breakpad files. 43 /// Any strings in the FunctionInfo or contained information, like InlineInfo 44 /// or LineTable objects, should get the string table offsets by calling 45 /// GsymCreator::insertString(...). Any file indexes that are needed should be 46 /// obtained by calling GsymCreator::insertFile(...). All of the function calls 47 /// in GsymCreator are thread safe. This allows multiple threads to create and 48 /// add FunctionInfo objects while parsing debug information. 49 /// 50 /// Once all of the FunctionInfo objects have been added, the 51 /// GsymCreator::finalize(...) must be called prior to saving. This function 52 /// will sort the FunctionInfo objects, finalize the string table, and do any 53 /// other passes on the information needed to prepare the information to be 54 /// saved. 55 /// 56 /// Once the object has been finalized, it can be saved to a file or section. 57 /// 58 /// ENCODING 59 /// 60 /// GSYM files are designed to be memory mapped into a process as shared, read 61 /// only data, and used as is. 62 /// 63 /// The GSYM file format when in a stand alone file consists of: 64 /// - Header 65 /// - Address Table 66 /// - Function Info Offsets 67 /// - File Table 68 /// - String Table 69 /// - Function Info Data 70 /// 71 /// HEADER 72 /// 73 /// The header is fully described in "llvm/DebugInfo/GSYM/Header.h". 74 /// 75 /// ADDRESS TABLE 76 /// 77 /// The address table immediately follows the header in the file and consists 78 /// of Header.NumAddresses address offsets. These offsets are sorted and can be 79 /// binary searched for efficient lookups. Addresses in the address table are 80 /// stored as offsets from a 64 bit base address found in Header.BaseAddress. 81 /// This allows the address table to contain 8, 16, or 32 offsets. This allows 82 /// the address table to not require full 64 bit addresses for each address. 83 /// The resulting GSYM size is smaller and causes fewer pages to be touched 84 /// during address lookups when the address table is smaller. The size of the 85 /// address offsets in the address table is specified in the header in 86 /// Header.AddrOffSize. The first offset in the address table is aligned to 87 /// Header.AddrOffSize alignment to ensure efficient access when loaded into 88 /// memory. 89 /// 90 /// FUNCTION INFO OFFSETS TABLE 91 /// 92 /// The function info offsets table immediately follows the address table and 93 /// consists of Header.NumAddresses 32 bit file offsets: one for each address 94 /// in the address table. This data is aligned to a 4 byte boundary. The 95 /// offsets in this table are the relative offsets from the start offset of the 96 /// GSYM header and point to the function info data for each address in the 97 /// address table. Keeping this data separate from the address table helps to 98 /// reduce the number of pages that are touched when address lookups occur on a 99 /// GSYM file. 100 /// 101 /// FILE TABLE 102 /// 103 /// The file table immediately follows the function info offsets table. The 104 /// encoding of the FileTable is: 105 /// 106 /// struct FileTable { 107 /// uint32_t Count; 108 /// FileEntry Files[]; 109 /// }; 110 /// 111 /// The file table starts with a 32 bit count of the number of files that are 112 /// used in all of the function info, followed by that number of FileEntry 113 /// structures. The file table is aligned to a 4 byte boundary, Each file in 114 /// the file table is represented with a FileEntry structure. 115 /// See "llvm/DebugInfo/GSYM/FileEntry.h" for details. 116 /// 117 /// STRING TABLE 118 /// 119 /// The string table follows the file table in stand alone GSYM files and 120 /// contains all strings for everything contained in the GSYM file. Any string 121 /// data should be added to the string table and any references to strings 122 /// inside GSYM information must be stored as 32 bit string table offsets into 123 /// this string table. The string table always starts with an empty string at 124 /// offset zero and is followed by any strings needed by the GSYM information. 125 /// The start of the string table is not aligned to any boundary. 126 /// 127 /// FUNCTION INFO DATA 128 /// 129 /// The function info data is the payload that contains information about the 130 /// address that is being looked up. It contains all of the encoded 131 /// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an 132 /// entry in the Function Info Offsets Table. For details on the exact encoding 133 /// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h". 134 class GsymCreator { 135 // Private member variables require Mutex protections 136 mutable std::recursive_mutex Mutex; 137 std::vector<FunctionInfo> Funcs; 138 StringTableBuilder StrTab; 139 StringSet<> StringStorage; 140 DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex; 141 std::vector<llvm::gsym::FileEntry> Files; 142 std::vector<uint8_t> UUID; 143 Optional<AddressRanges> ValidTextRanges; 144 AddressRanges Ranges; 145 llvm::Optional<uint64_t> BaseAddress; 146 bool Finalized = false; 147 148 public: 149 150 GsymCreator(); 151 152 /// Save a GSYM file to a stand alone file. 153 /// 154 /// \param Path The file path to save the GSYM file to. 155 /// \param ByteOrder The endianness to use when saving the file. 156 /// \returns An error object that indicates success or failure of the save. 157 llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const; 158 159 /// Encode a GSYM into the file writer stream at the current position. 160 /// 161 /// \param O The stream to save the binary data to 162 /// \returns An error object that indicates success or failure of the save. 163 llvm::Error encode(FileWriter &O) const; 164 165 /// Insert a string into the GSYM string table. 166 /// 167 /// All strings used by GSYM files must be uniqued by adding them to this 168 /// string pool and using the returned offset for any string values. 169 /// 170 /// \param S The string to insert into the string table. 171 /// \param Copy If true, then make a backing copy of the string. If false, 172 /// the string is owned by another object that will stay around 173 /// long enough for the GsymCreator to save the GSYM file. 174 /// \returns The unique 32 bit offset into the string table. 175 uint32_t insertString(StringRef S, bool Copy = true); 176 177 /// Insert a file into this GSYM creator. 178 /// 179 /// Inserts a file by adding a FileEntry into the "Files" member variable if 180 /// the file has not already been added. The file path is split into 181 /// directory and filename which are both added to the string table. This 182 /// allows paths to be stored efficiently by reusing the directories that are 183 /// common between multiple files. 184 /// 185 /// \param Path The path to the file to insert. 186 /// \param Style The path style for the "Path" parameter. 187 /// \returns The unique file index for the inserted file. 188 uint32_t insertFile(StringRef Path, 189 sys::path::Style Style = sys::path::Style::native); 190 191 /// Add a function info to this GSYM creator. 192 /// 193 /// All information in the FunctionInfo object must use the 194 /// GsymCreator::insertString(...) function when creating string table 195 /// offsets for names and other strings. 196 /// 197 /// \param FI The function info object to emplace into our functions list. 198 void addFunctionInfo(FunctionInfo &&FI); 199 200 /// Finalize the data in the GSYM creator prior to saving the data out. 201 /// 202 /// Finalize must be called after all FunctionInfo objects have been added 203 /// and before GsymCreator::save() is called. 204 /// 205 /// \param OS Output stream to report duplicate function infos, overlapping 206 /// function infos, and function infos that were merged or removed. 207 /// \returns An error object that indicates success or failure of the 208 /// finalize. 209 llvm::Error finalize(llvm::raw_ostream &OS); 210 211 /// Set the UUID value. 212 /// 213 /// \param UUIDBytes The new UUID bytes. setUUID(llvm::ArrayRef<uint8_t> UUIDBytes)214 void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) { 215 UUID.assign(UUIDBytes.begin(), UUIDBytes.end()); 216 } 217 218 /// Thread safe iteration over all function infos. 219 /// 220 /// \param Callback A callback function that will get called with each 221 /// FunctionInfo. If the callback returns false, stop iterating. 222 void forEachFunctionInfo( 223 std::function<bool(FunctionInfo &)> const &Callback); 224 225 /// Thread safe const iteration over all function infos. 226 /// 227 /// \param Callback A callback function that will get called with each 228 /// FunctionInfo. If the callback returns false, stop iterating. 229 void forEachFunctionInfo( 230 std::function<bool(const FunctionInfo &)> const &Callback) const; 231 232 /// Get the current number of FunctionInfo objects contained in this 233 /// object. 234 size_t getNumFunctionInfos() const; 235 236 /// Check if an address has already been added as a function info. 237 /// 238 /// FunctionInfo data can come from many sources: debug info, symbol tables, 239 /// exception information, and more. Symbol tables should be added after 240 /// debug info and can use this function to see if a symbol's start address 241 /// has already been added to the GsymReader. Calling this before adding 242 /// a function info from a source other than debug info avoids clients adding 243 /// many redundant FunctionInfo objects from many sources only for them to be 244 /// removed during the finalize() call. 245 bool hasFunctionInfoForAddress(uint64_t Addr) const; 246 247 /// Set valid .text address ranges that all functions must be contained in. SetValidTextRanges(AddressRanges & TextRanges)248 void SetValidTextRanges(AddressRanges &TextRanges) { 249 ValidTextRanges = TextRanges; 250 } 251 252 /// Get the valid text ranges. GetValidTextRanges()253 const Optional<AddressRanges> GetValidTextRanges() const { 254 return ValidTextRanges; 255 } 256 257 /// Check if an address is a valid code address. 258 /// 259 /// Any functions whose addresses do not exist within these function bounds 260 /// will not be converted into the final GSYM. This allows the object file 261 /// to figure out the valid file address ranges of all the code sections 262 /// and ensure we don't add invalid functions to the final output. Many 263 /// linkers have issues when dead stripping functions from DWARF debug info 264 /// where they set the DW_AT_low_pc to zero, but newer DWARF has the 265 /// DW_AT_high_pc as an offset from the DW_AT_low_pc and these size 266 /// attributes have no relocations that can be applied. This results in DWARF 267 /// where many functions have an DW_AT_low_pc of zero and a valid offset size 268 /// for DW_AT_high_pc. If we extract all valid ranges from an object file 269 /// that are marked with executable permissions, we can properly ensure that 270 /// these functions are removed. 271 /// 272 /// \param Addr An address to check. 273 /// 274 /// \returns True if the address is in the valid text ranges or if no valid 275 /// text ranges have been set, false otherwise. 276 bool IsValidTextAddress(uint64_t Addr) const; 277 278 /// Set the base address to use for the GSYM file. 279 /// 280 /// Setting the base address to use for the GSYM file. Object files typically 281 /// get loaded from a base address when the OS loads them into memory. Using 282 /// GSYM files for symbolication becomes easier if the base address in the 283 /// GSYM header is the same address as it allows addresses to be easily slid 284 /// and allows symbolication without needing to find the original base 285 /// address in the original object file. 286 /// 287 /// \param Addr The address to use as the base address of the GSYM file 288 /// when it is saved to disk. setBaseAddress(uint64_t Addr)289 void setBaseAddress(uint64_t Addr) { 290 BaseAddress = Addr; 291 } 292 }; 293 294 } // namespace gsym 295 } // namespace llvm 296 297 #endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H 298