1 // -*- mode: C++ -*- 2 3 // Copyright (c) 2010, Google Inc. 4 // All rights reserved. 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 33 34 // macho_reader.h: A class for parsing Mach-O files. 35 36 #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_ 37 #define BREAKPAD_COMMON_MAC_MACHO_READER_H_ 38 39 #include <mach-o/loader.h> 40 #include <mach-o/fat.h> 41 #include <stdint.h> 42 #include <stdlib.h> 43 #include <unistd.h> 44 45 #include <map> 46 #include <string> 47 #include <vector> 48 49 #include "common/byte_cursor.h" 50 #include "common/mac/super_fat_arch.h" 51 52 namespace google_breakpad { 53 namespace mach_o { 54 55 using std::map; 56 using std::string; 57 using std::vector; 58 59 // The Mac headers don't specify particular types for these groups of 60 // constants, but defining them here provides some documentation 61 // value. We also give them the same width as the fields in which 62 // they appear, which makes them a bit easier to use with ByteCursors. 63 typedef uint32_t Magic; 64 typedef uint32_t FileType; 65 typedef uint32_t FileFlags; 66 typedef uint32_t LoadCommandType; 67 typedef uint32_t SegmentFlags; 68 typedef uint32_t SectionFlags; 69 70 // A parser for fat binary files, used to store universal binaries. 71 // When applied to a (non-fat) Mach-O file, this behaves as if the 72 // file were a fat file containing a single object file. 73 class FatReader { 74 public: 75 76 // A class for reporting errors found while parsing fat binary files. The 77 // default definitions of these methods print messages to stderr. 78 class Reporter { 79 public: 80 // Create a reporter that attributes problems to |filename|. Reporter(const string & filename)81 explicit Reporter(const string &filename) : filename_(filename) { } 82 ~Reporter()83 virtual ~Reporter() { } 84 85 // The data does not begin with a fat binary or Mach-O magic number. 86 // This is a fatal error. 87 virtual void BadHeader(); 88 89 // The Mach-O fat binary file ends abruptly, without enough space 90 // to contain an object file it claims is present. 91 virtual void MisplacedObjectFile(); 92 93 // The file ends abruptly: either it is not large enough to hold a 94 // complete header, or the header implies that contents are present 95 // beyond the actual end of the file. 96 virtual void TooShort(); 97 98 private: 99 // The filename to which the reader should attribute problems. 100 string filename_; 101 }; 102 103 // Create a fat binary file reader that uses |reporter| to report problems. FatReader(Reporter * reporter)104 explicit FatReader(Reporter *reporter) : reporter_(reporter) { } 105 106 // Read the |size| bytes at |buffer| as a fat binary file. On success, 107 // return true; on failure, report the problem to reporter_ and return 108 // false. 109 // 110 // If the data is a plain Mach-O file, rather than a fat binary file, 111 // then the reader behaves as if it had found a fat binary file whose 112 // single object file is the Mach-O file. 113 bool Read(const uint8_t *buffer, size_t size); 114 115 // Return an array of 'SuperFatArch' structures describing the 116 // object files present in this fat binary file. Set |size| to the 117 // number of elements in the array. 118 // 119 // Assuming Read returned true, the entries are validated: it is safe to 120 // assume that the offsets and sizes in each SuperFatArch refer to subranges 121 // of the bytes passed to Read. 122 // 123 // If there are no object files in this fat binary, then this 124 // function can return NULL. 125 // 126 // The array is owned by this FatReader instance; it will be freed when 127 // this FatReader is destroyed. 128 // 129 // This function returns a C-style array instead of a vector to make it 130 // possible to use the result with OS X functions like NXFindBestFatArch, 131 // so that the symbol dumper will behave consistently with other OS X 132 // utilities that work with fat binaries. object_files(size_t * count)133 const SuperFatArch* object_files(size_t *count) const { 134 *count = object_files_.size(); 135 if (object_files_.size() > 0) 136 return &object_files_[0]; 137 return NULL; 138 } 139 140 private: 141 // We use this to report problems parsing the file's contents. (WEAK) 142 Reporter *reporter_; 143 144 // The contents of the fat binary or Mach-O file we're parsing. We do not 145 // own the storage it refers to. 146 ByteBuffer buffer_; 147 148 // The magic number of this binary, in host byte order. 149 Magic magic_; 150 151 // The list of object files in this binary. 152 // object_files_.size() == fat_header.nfat_arch 153 vector<SuperFatArch> object_files_; 154 }; 155 156 // A segment in a Mach-O file. All these fields have been byte-swapped as 157 // appropriate for use by the executing architecture. 158 struct Segment { 159 // The ByteBuffers below point into the bytes passed to the Reader that 160 // created this Segment. 161 162 ByteBuffer section_list; // This segment's section list. 163 ByteBuffer contents; // This segment's contents. 164 165 // This segment's name. 166 string name; 167 168 // The address at which this segment should be loaded in memory. If 169 // bits_64 is false, only the bottom 32 bits of this value are valid. 170 uint64_t vmaddr; 171 172 // The size of this segment when loaded into memory. This may be larger 173 // than contents.Size(), in which case the extra area will be 174 // initialized with zeros. If bits_64 is false, only the bottom 32 bits 175 // of this value are valid. 176 uint64_t vmsize; 177 178 // The file offset and size of the segment in the Mach-O image. 179 uint64_t fileoff; 180 uint64_t filesize; 181 182 // The maximum and initial VM protection of this segment's contents. 183 uint32_t maxprot; 184 uint32_t initprot; 185 186 // The number of sections in section_list. 187 uint32_t nsects; 188 189 // Flags describing this segment, from SegmentFlags. 190 uint32_t flags; 191 192 // True if this is a 64-bit section; false if it is a 32-bit section. 193 bool bits_64; 194 }; 195 196 // A section in a Mach-O file. All these fields have been byte-swapped as 197 // appropriate for use by the executing architecture. 198 struct Section { 199 // This section's contents. This points into the bytes passed to the 200 // Reader that created this Section. 201 ByteBuffer contents; 202 203 // This section's name. 204 string section_name; // section[_64].sectname 205 // The name of the segment this section belongs to. 206 string segment_name; // section[_64].segname 207 208 // The address at which this section's contents should be loaded in 209 // memory. If bits_64 is false, only the bottom 32 bits of this value 210 // are valid. 211 uint64_t address; 212 213 // The contents of this section should be loaded into memory at an 214 // address which is a multiple of (two raised to this power). 215 uint32_t align; 216 217 // Flags from SectionFlags describing the section's contents. 218 uint32_t flags; 219 220 // We don't support reading relocations yet. 221 222 // True if this is a 64-bit section; false if it is a 32-bit section. 223 bool bits_64; 224 }; 225 226 // A map from section names to Sections. 227 typedef map<string, Section> SectionMap; 228 229 // A reader for a Mach-O file. 230 // 231 // This does not handle fat binaries; see FatReader above. FatReader 232 // provides a friendly interface for parsing data that could be either a 233 // fat binary or a Mach-O file. 234 class Reader { 235 public: 236 237 // A class for reporting errors found while parsing Mach-O files. The 238 // default definitions of these member functions print messages to 239 // stderr. 240 class Reporter { 241 public: 242 // Create a reporter that attributes problems to |filename|. Reporter(const string & filename)243 explicit Reporter(const string &filename) : filename_(filename) { } ~Reporter()244 virtual ~Reporter() { } 245 246 // Reporter functions for fatal errors return void; the reader will 247 // definitely return an error to its caller after calling them 248 249 // The data does not begin with a Mach-O magic number, or the magic 250 // number does not match the expected value for the cpu architecture. 251 // This is a fatal error. 252 virtual void BadHeader(); 253 254 // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|) 255 // does not match the expected CPU architecture 256 // (|expected_cpu_type|, |expected_cpu_subtype|). 257 virtual void CPUTypeMismatch(cpu_type_t cpu_type, 258 cpu_subtype_t cpu_subtype, 259 cpu_type_t expected_cpu_type, 260 cpu_subtype_t expected_cpu_subtype); 261 262 // The file ends abruptly: either it is not large enough to hold a 263 // complete header, or the header implies that contents are present 264 // beyond the actual end of the file. 265 virtual void HeaderTruncated(); 266 267 // The file's load command region, as given in the Mach-O header, is 268 // too large for the file. 269 virtual void LoadCommandRegionTruncated(); 270 271 // The file's Mach-O header claims the file contains |claimed| load 272 // commands, but the I'th load command, of type |type|, extends beyond 273 // the end of the load command region, as given by the Mach-O header. 274 // If |type| is zero, the command's type was unreadable. 275 virtual void LoadCommandsOverrun(size_t claimed, size_t i, 276 LoadCommandType type); 277 278 // The contents of the |i|'th load command, of type |type|, extend beyond 279 // the size given in the load command's header. 280 virtual void LoadCommandTooShort(size_t i, LoadCommandType type); 281 282 // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named 283 // |name| is too short to hold the sections that its header says it does. 284 // (This more specific than LoadCommandTooShort.) 285 virtual void SectionsMissing(const string &name); 286 287 // The segment named |name| claims that its contents lie beyond the end 288 // of the file. 289 virtual void MisplacedSegmentData(const string &name); 290 291 // The section named |section| in the segment named |segment| claims that 292 // its contents do not lie entirely within the segment. 293 virtual void MisplacedSectionData(const string §ion, 294 const string &segment); 295 296 // The LC_SYMTAB command claims that symbol table contents are located 297 // beyond the end of the file. 298 virtual void MisplacedSymbolTable(); 299 300 // An attempt was made to read a Mach-O file of the unsupported 301 // CPU architecture |cpu_type|. 302 virtual void UnsupportedCPUType(cpu_type_t cpu_type); 303 304 private: 305 string filename_; 306 }; 307 308 // A handler for sections parsed from a segment. The WalkSegmentSections 309 // member function accepts an instance of this class, and applies it to 310 // each section defined in a given segment. 311 class SectionHandler { 312 public: ~SectionHandler()313 virtual ~SectionHandler() { } 314 315 // Called to report that the segment's section list contains |section|. 316 // This should return true if the iteration should continue, or false 317 // if it should stop. 318 virtual bool HandleSection(const Section §ion) = 0; 319 }; 320 321 // A handler for the load commands in a Mach-O file. 322 class LoadCommandHandler { 323 public: LoadCommandHandler()324 LoadCommandHandler() { } ~LoadCommandHandler()325 virtual ~LoadCommandHandler() { } 326 327 // When called from WalkLoadCommands, the following handler functions 328 // should return true if they wish to continue iterating over the load 329 // command list, or false if they wish to stop iterating. 330 // 331 // When called from LoadCommandIterator::Handle or Reader::Handle, 332 // these functions' return values are simply passed through to Handle's 333 // caller. 334 // 335 // The definitions provided by this base class simply return true; the 336 // default is to silently ignore sections whose member functions the 337 // subclass doesn't override. 338 339 // COMMAND is load command we don't recognize. We provide only the 340 // command type and a ByteBuffer enclosing the command's data (If we 341 // cannot parse the command type or its size, we call 342 // reporter_->IncompleteLoadCommand instead.) UnknownCommand(LoadCommandType type,const ByteBuffer & contents)343 virtual bool UnknownCommand(LoadCommandType type, 344 const ByteBuffer &contents) { 345 return true; 346 } 347 348 // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment 349 // with the properties given in |segment|. SegmentCommand(const Segment & segment)350 virtual bool SegmentCommand(const Segment &segment) { 351 return true; 352 } 353 354 // The load command is LC_SYMTAB. |entries| holds the array of nlist 355 // entries, and |names| holds the strings the entries refer to. SymtabCommand(const ByteBuffer & entries,const ByteBuffer & names)356 virtual bool SymtabCommand(const ByteBuffer &entries, 357 const ByteBuffer &names) { 358 return true; 359 } 360 361 // Add handler functions for more load commands here as needed. 362 }; 363 364 // Create a Mach-O file reader that reports problems to |reporter|. Reader(Reporter * reporter)365 explicit Reader(Reporter *reporter) 366 : reporter_(reporter) { } 367 368 // Read the given data as a Mach-O file. The reader retains pointers 369 // into the data passed, so the data should live as long as the reader 370 // does. On success, return true; on failure, return false. 371 // 372 // At most one of these functions should be invoked once on each Reader 373 // instance. 374 bool Read(const uint8_t *buffer, 375 size_t size, 376 cpu_type_t expected_cpu_type, 377 cpu_subtype_t expected_cpu_subtype); Read(const ByteBuffer & buffer,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)378 bool Read(const ByteBuffer &buffer, 379 cpu_type_t expected_cpu_type, 380 cpu_subtype_t expected_cpu_subtype) { 381 return Read(buffer.start, 382 buffer.Size(), 383 expected_cpu_type, 384 expected_cpu_subtype); 385 } 386 387 // Return this file's characteristics, as found in the Mach-O header. cpu_type()388 cpu_type_t cpu_type() const { return cpu_type_; } cpu_subtype()389 cpu_subtype_t cpu_subtype() const { return cpu_subtype_; } file_type()390 FileType file_type() const { return file_type_; } flags()391 FileFlags flags() const { return flags_; } 392 393 // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit 394 // Mach-O file. bits_64()395 bool bits_64() const { return bits_64_; } 396 397 // Return true if this is a big-endian Mach-O file, false if it is 398 // little-endian. big_endian()399 bool big_endian() const { return big_endian_; } 400 401 // Apply |handler| to each load command in this Mach-O file, stopping when 402 // a handler function returns false. If we encounter a malformed load 403 // command, report it via reporter_ and return false. Return true if all 404 // load commands were parseable and all handlers returned true. 405 bool WalkLoadCommands(LoadCommandHandler *handler) const; 406 407 // Set |segment| to describe the segment named |name|, if present. If 408 // found, |segment|'s byte buffers refer to a subregion of the bytes 409 // passed to Read. If we find the section, return true; otherwise, 410 // return false. 411 bool FindSegment(const string &name, Segment *segment) const; 412 413 // Apply |handler| to each section defined in |segment|. If |handler| returns 414 // false, stop iterating and return false. If all calls to |handler| return 415 // true and we reach the end of the section list, return true. 416 bool WalkSegmentSections(const Segment &segment, SectionHandler *handler) 417 const; 418 419 // Clear |section_map| and then populate it with a map of the sections 420 // in |segment|, from section names to Section structures. 421 // Each Section's contents refer to bytes in |segment|'s contents. 422 // On success, return true; if a problem occurs, report it and return false. 423 bool MapSegmentSections(const Segment &segment, SectionMap *section_map) 424 const; 425 426 private: 427 // Used internally. 428 class SegmentFinder; 429 class SectionMapper; 430 431 // We use this to report problems parsing the file's contents. (WEAK) 432 Reporter *reporter_; 433 434 // The contents of the Mach-O file we're parsing. We do not own the 435 // storage it refers to. 436 ByteBuffer buffer_; 437 438 // True if this file is big-endian. 439 bool big_endian_; 440 441 // True if this file is a 64-bit Mach-O file. 442 bool bits_64_; 443 444 // This file's cpu type and subtype. 445 cpu_type_t cpu_type_; // mach_header[_64].cputype 446 cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype 447 448 // This file's type. 449 FileType file_type_; // mach_header[_64].filetype 450 451 // The region of buffer_ occupied by load commands. 452 ByteBuffer load_commands_; 453 454 // The number of load commands in load_commands_. 455 uint32_t load_command_count_; // mach_header[_64].ncmds 456 457 // This file's header flags. 458 FileFlags flags_; 459 }; 460 461 } // namespace mach_o 462 } // namespace google_breakpad 463 464 #endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_ 465