1 // -*- mode: c++ -*- 2 3 // Copyright (c) 2010 Google Inc. All Rights Reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 32 33 // stabs_reader.h: Define StabsReader, a parser for STABS debugging 34 // information. A description of the STABS debugging format can be 35 // found at: 36 // 37 // http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html 38 // 39 // The comments here assume you understand the format. 40 // 41 // This parser can handle big-endian and little-endian data, and the symbol 42 // values may be either 32 or 64 bits long. It handles both STABS in 43 // sections (as used on Linux) and STABS appearing directly in an 44 // a.out-like symbol table (as used in Darwin OS X Mach-O files). 45 46 #ifndef COMMON_STABS_READER_H__ 47 #define COMMON_STABS_READER_H__ 48 49 #include <stddef.h> 50 #include <stdint.h> 51 52 #ifdef HAVE_CONFIG_H 53 #include <config.h> 54 #endif 55 56 #ifdef HAVE_A_OUT_H 57 #include <a.out.h> 58 #endif 59 #ifdef HAVE_MACH_O_NLIST_H 60 #include <mach-o/nlist.h> 61 #endif 62 63 #include <string> 64 #include <vector> 65 66 #include "common/byte_cursor.h" 67 #include "common/using_std_string.h" 68 69 namespace google_breakpad { 70 71 class StabsHandler; 72 73 class StabsReader { 74 public: 75 // Create a reader for the STABS debug information whose .stab section is 76 // being traversed by ITERATOR, and whose .stabstr section is referred to 77 // by STRINGS. The reader will call the member functions of HANDLER to 78 // report the information it finds, when the reader's 'Process' member 79 // function is called. 80 // 81 // BIG_ENDIAN should be true if the entries in the .stab section are in 82 // big-endian form, or false if they are in little-endian form. 83 // 84 // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value' 85 // field in each entry in bytes. 86 // 87 // UNITIZED should be true if the STABS data is stored in units with 88 // N_UNDF headers. This is usually the case for STABS stored in sections, 89 // like .stab/.stabstr, and usually not the case for STABS stored in the 90 // actual symbol table; UNITIZED should be true when parsing Linux stabs, 91 // false when parsing Mac OS X STABS. For details, see: 92 // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html 93 // 94 // Note that, in ELF, the .stabstr section should be found using the 95 // 'sh_link' field of the .stab section header, not by name. 96 StabsReader(const uint8_t *stab, size_t stab_size, 97 const uint8_t *stabstr, size_t stabstr_size, 98 bool big_endian, size_t value_size, bool unitized, 99 StabsHandler *handler); 100 101 // Process the STABS data, calling the handler's member functions to 102 // report what we find. While the handler functions return true, 103 // continue to process until we reach the end of the section. If we 104 // processed the entire section and all handlers returned true, 105 // return true. If any handler returned false, return false. 106 // 107 // This is only meant to be called once per StabsReader instance; 108 // resuming a prior processing pass that stopped abruptly isn't supported. 109 bool Process(); 110 111 private: 112 113 // An class for walking arrays of STABS entries. This isolates the main 114 // STABS reader from the exact format (size; endianness) of the entries 115 // themselves. 116 class EntryIterator { 117 public: 118 // The contents of a STABS entry, adjusted for the host's endianness, 119 // word size, 'struct nlist' layout, and so on. 120 struct Entry { 121 // True if this iterator has reached the end of the entry array. When 122 // this is set, the other members of this structure are not valid. 123 bool at_end; 124 125 // The number of this entry within the list. 126 size_t index; 127 128 // The current entry's name offset. This is the offset within the 129 // current compilation unit's strings, as establish by the N_UNDF entries. 130 size_t name_offset; 131 132 // The current entry's type, 'other' field, descriptor, and value. 133 unsigned char type; 134 unsigned char other; 135 short descriptor; 136 uint64_t value; 137 }; 138 139 // Create a EntryIterator walking the entries in BUFFER. Treat the 140 // entries as big-endian if BIG_ENDIAN is true, as little-endian 141 // otherwise. Assume each entry has a 'value' field whose size is 142 // VALUE_SIZE. 143 // 144 // This would not be terribly clean to extend to other format variations, 145 // but it's enough to handle Linux and Mac, and we'd like STABS to die 146 // anyway. 147 // 148 // For the record: on Linux, STABS entry values are always 32 bits, 149 // regardless of the architecture address size (don't ask me why); on 150 // Mac, they are 32 or 64 bits long. Oddly, the section header's entry 151 // size for a Linux ELF .stab section varies according to the ELF class 152 // from 12 to 20 even as the actual entries remain unchanged. 153 EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size); 154 155 // Move to the next entry. This function's behavior is undefined if 156 // at_end() is true when it is called. 157 EntryIterator &operator++() { Fetch(); entry_.index++; return *this; } 158 159 // Dereferencing this iterator produces a reference to an Entry structure 160 // that holds the current entry's values. The entry is owned by this 161 // EntryIterator, and will be invalidated at the next call to operator++. 162 const Entry &operator*() const { return entry_; } 163 const Entry *operator->() const { return &entry_; } 164 165 private: 166 // Read the STABS entry at cursor_, and set entry_ appropriately. 167 void Fetch(); 168 169 // The size of entries' value field, in bytes. 170 size_t value_size_; 171 172 // A byte cursor traversing buffer_. 173 ByteCursor cursor_; 174 175 // Values for the entry this iterator refers to. 176 Entry entry_; 177 }; 178 179 // A source line, saved to be reported later. 180 struct Line { 181 uint64_t address; 182 const char *filename; 183 int number; 184 }; 185 186 // Return the name of the current symbol. 187 const char *SymbolString(); 188 189 // Process a compilation unit starting at symbol_. Return true 190 // to continue processing, or false to abort. 191 bool ProcessCompilationUnit(); 192 193 // Process a function in current_source_file_ starting at symbol_. 194 // Return true to continue processing, or false to abort. 195 bool ProcessFunction(); 196 197 // Process an exported function symbol. 198 // Return true to continue processing, or false to abort. 199 bool ProcessExtern(); 200 201 // The STABS entries being parsed. 202 ByteBuffer entries_; 203 204 // The string section to which the entries refer. 205 ByteBuffer strings_; 206 207 // The iterator walking the STABS entries. 208 EntryIterator iterator_; 209 210 // True if the data is "unitized"; see the explanation in the comment for 211 // StabsReader::StabsReader. 212 bool unitized_; 213 214 StabsHandler *handler_; 215 216 // The offset of the current compilation unit's strings within stabstr_. 217 size_t string_offset_; 218 219 // The value string_offset_ should have for the next compilation unit, 220 // as established by N_UNDF entries. 221 size_t next_cu_string_offset_; 222 223 // The current source file name. 224 const char *current_source_file_; 225 226 // Mac OS X STABS place SLINE records before functions; we accumulate a 227 // vector of these until we see the FUN record, and then report them 228 // after the StartFunction call. 229 std::vector<Line> queued_lines_; 230 }; 231 232 // Consumer-provided callback structure for the STABS reader. Clients 233 // of the STABS reader provide an instance of this structure. The 234 // reader then invokes the member functions of that instance to report 235 // the information it finds. 236 // 237 // The default definitions of the member functions do nothing, and return 238 // true so processing will continue. 239 class StabsHandler { 240 public: StabsHandler()241 StabsHandler() { } ~StabsHandler()242 virtual ~StabsHandler() { } 243 244 // Some general notes about the handler callback functions: 245 246 // Processing proceeds until the end of the .stabs section, or until 247 // one of these functions returns false. 248 249 // The addresses given are as reported in the STABS info, without 250 // regard for whether the module may be loaded at different 251 // addresses at different times (a shared library, say). When 252 // processing STABS from an ELF shared library, the addresses given 253 // all assume the library is loaded at its nominal load address. 254 // They are *not* offsets from the nominal load address. If you 255 // want offsets, you must subtract off the library's nominal load 256 // address. 257 258 // The arguments to these functions named FILENAME are all 259 // references to strings stored in the .stabstr section. Because 260 // both the Linux and Solaris linkers factor out duplicate strings 261 // from the .stabstr section, the consumer can assume that if two 262 // FILENAME values are different addresses, they represent different 263 // file names. 264 // 265 // Thus, it's safe to use (say) std::map<char *, ...>, which does 266 // string address comparisons, not string content comparisons. 267 // Since all the strings are in same array of characters --- the 268 // .stabstr section --- comparing their addresses produces 269 // predictable, if not lexicographically meaningful, results. 270 271 // Begin processing a compilation unit whose main source file is 272 // named FILENAME, and whose base address is ADDRESS. If 273 // BUILD_DIRECTORY is non-NULL, it is the name of the build 274 // directory in which the compilation occurred. StartCompilationUnit(const char * filename,uint64_t address,const char * build_directory)275 virtual bool StartCompilationUnit(const char *filename, uint64_t address, 276 const char *build_directory) { 277 return true; 278 } 279 280 // Finish processing the compilation unit. If ADDRESS is non-zero, 281 // it is the ending address of the compilation unit. If ADDRESS is 282 // zero, then the compilation unit's ending address is not 283 // available, and the consumer must infer it by other means. EndCompilationUnit(uint64_t address)284 virtual bool EndCompilationUnit(uint64_t address) { return true; } 285 286 // Begin processing a function named NAME, whose starting address is 287 // ADDRESS. This function belongs to the compilation unit that was 288 // most recently started but not ended. 289 // 290 // Note that, unlike filenames, NAME is not a pointer into the 291 // .stabstr section; this is because the name as it appears in the 292 // STABS data is followed by type information. The value passed to 293 // StartFunction is the function name alone. 294 // 295 // In languages that use name mangling, like C++, NAME is mangled. StartFunction(const string & name,uint64_t address)296 virtual bool StartFunction(const string &name, uint64_t address) { 297 return true; 298 } 299 300 // Finish processing the function. If ADDRESS is non-zero, it is 301 // the ending address for the function. If ADDRESS is zero, then 302 // the function's ending address is not available, and the consumer 303 // must infer it by other means. EndFunction(uint64_t address)304 virtual bool EndFunction(uint64_t address) { return true; } 305 306 // Report that the code at ADDRESS is attributable to line NUMBER of 307 // the source file named FILENAME. The caller must infer the ending 308 // address of the line. Line(uint64_t address,const char * filename,int number)309 virtual bool Line(uint64_t address, const char *filename, int number) { 310 return true; 311 } 312 313 // Report that an exported function NAME is present at ADDRESS. 314 // The size of the function is unknown. Extern(const string & name,uint64_t address)315 virtual bool Extern(const string &name, uint64_t address) { 316 return true; 317 } 318 319 // Report a warning. FORMAT is a printf-like format string, 320 // specifying how to format the subsequent arguments. 321 virtual void Warning(const char *format, ...) = 0; 322 }; 323 324 } // namespace google_breakpad 325 326 #endif // COMMON_STABS_READER_H__ 327