1 // Copyright 2014 Renato Tegon Forti, Antony Polukhin. 2 // Copyright 2015-2020 Antony Polukhin. 3 // 4 // Distributed under the Boost Software License, Version 1.0. 5 // (See accompanying file LICENSE_1_0.txt 6 // or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_DLL_DETAIL_MACHO_INFO_HPP 9 #define BOOST_DLL_DETAIL_MACHO_INFO_HPP 10 11 #include <boost/dll/config.hpp> 12 13 #ifdef BOOST_HAS_PRAGMA_ONCE 14 # pragma once 15 #endif 16 17 #include <algorithm> 18 #include <fstream> 19 #include <string> // for std::getline 20 21 #include <boost/cstdint.hpp> 22 23 namespace boost { namespace dll { namespace detail { 24 25 typedef int integer_t; 26 typedef int vm_prot_t; 27 typedef integer_t cpu_type_t; 28 typedef integer_t cpu_subtype_t; 29 30 template <class AddressOffsetT> 31 struct mach_header_template { 32 boost::uint32_t magic; 33 cpu_type_t cputype; 34 cpu_subtype_t cpusubtype; 35 boost::uint32_t filetype; 36 boost::uint32_t ncmds; 37 boost::uint32_t sizeofcmds; 38 boost::uint32_t flags[sizeof(AddressOffsetT) / sizeof(uint32_t)]; // Flags and reserved 39 }; 40 41 typedef mach_header_template<boost::uint32_t> mach_header_32_; 42 typedef mach_header_template<boost::uint64_t> mach_header_64_; 43 44 struct load_command_ { 45 boost::uint32_t cmd; /* type of command */ 46 boost::uint32_t cmdsize; 47 }; 48 49 struct load_command_types { 50 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SEGMENT_ = 0x1); /* segment of this file to be mapped */ 51 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SYMTAB_ = 0x2); /* link-edit stab symbol table info */ 52 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SYMSEG_ = 0x3); /* link-edit gdb symbol table info (obsolete) */ 53 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_THREAD_ = 0x4); /* thread */ 54 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_UNIXTHREAD_ = 0x5); /* unix thread (includes a stack) */ 55 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOADFVMLIB_ = 0x6); /* load a specified fixed VM shared library */ 56 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_IDFVMLIB_ = 0x7); /* fixed VM shared library identification */ 57 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_IDENT_ = 0x8); /* object identification info (obsolete) */ 58 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_FVMFILE_ = 0x9); /* fixed VM file inclusion (internal use) */ 59 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_PREPAGE_ = 0xa); /* prepage command (internal use) */ 60 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_DYSYMTAB_ = 0xb); /* dynamic link-edit symbol table info */ 61 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOAD_DYLIB_ = 0xc); /* load a dynamically linked shared library */ 62 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ID_DYLIB_ = 0xd); /* dynamically linked shared lib ident */ 63 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOAD_DYLINKER_ = 0xe); /* load a dynamic linker */ 64 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ID_DYLINKER_ = 0xf); /* dynamic linker identification */ 65 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_PREBOUND_DYLIB_ = 0x10); /* modules prebound for a dynamically linked shared library */ 66 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ROUTINES_ = 0x11); /* image routines */ 67 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_FRAMEWORK_ = 0x12); /* sub framework */ 68 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_UMBRELLA_ = 0x13); /* sub umbrella */ 69 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_CLIENT_ = 0x14); /* sub client */ 70 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_LIBRARY_ = 0x15); /* sub library */ 71 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_TWOLEVEL_HINTS_ = 0x16); /* two-level namespace lookup hints */ 72 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_PREBIND_CKSUM_ = 0x17); /* prebind checksum */ 73 /* 74 * After MacOS X 10.1 when a new load command is added that is required to be 75 * understood by the dynamic linker for the image to execute properly the 76 * LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic 77 * linker sees such a load command it it does not understand will issue a 78 * "unknown load command required for execution" error and refuse to use the 79 * image. Other load commands without this bit that are not understood will 80 * simply be ignored. 81 */ 82 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_REQ_DYLD_ = 0x80000000); 83 84 /* 85 * load a dynamically linked shared library that is allowed to be missing 86 * (all symbols are weak imported). 87 */ 88 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOAD_WEAK_DYLIB_ = (0x18 | LC_REQ_DYLD_)); 89 90 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SEGMENT_64_ = 0x19); /* 64-bit segment of this file to be mapped */ 91 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ROUTINES_64_ = 0x1a); /* 64-bit image routines */ 92 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_UUID_ = 0x1b); /* the uuid */ 93 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_RPATH_ = (0x1c | LC_REQ_DYLD_)); /* runpath additions */ 94 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_CODE_SIGNATURE_ = 0x1d); /* local of code signature */ 95 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SEGMENT_SPLIT_INFO_= 0x1e); /* local of info to split segments */ 96 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_REEXPORT_DYLIB_ = (0x1f | LC_REQ_DYLD_)); /* load and re-export dylib */ 97 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LAZY_LOAD_DYLIB_ = 0x20); /* delay load of dylib until first use */ 98 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ENCRYPTION_INFO_ = 0x21); /* encrypted segment information */ 99 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_DYLD_INFO_ = 0x22); /* compressed dyld information */ 100 BOOST_STATIC_CONSTANT(boost::uint32_t, LC_DYLD_INFO_ONLY_ = (0x22|LC_REQ_DYLD_)); /* compressed dyld information only */ 101 }; 102 103 template <class AddressOffsetT> 104 struct segment_command_template { 105 boost::uint32_t cmd; /* LC_SEGMENT_ */ 106 boost::uint32_t cmdsize; /* includes sizeof section structs */ 107 char segname[16]; /* segment name */ 108 AddressOffsetT vmaddr; /* memory address of this segment */ 109 AddressOffsetT vmsize; /* memory size of this segment */ 110 AddressOffsetT fileoff; /* file offset of this segment */ 111 AddressOffsetT filesize; /* amount to map from the file */ 112 vm_prot_t maxprot; /* maximum VM protection */ 113 vm_prot_t initprot; /* initial VM protection */ 114 boost::uint32_t nsects; /* number of sections in segment */ 115 boost::uint32_t flags; /* flags */ 116 }; 117 118 typedef segment_command_template<boost::uint32_t> segment_command_32_; 119 typedef segment_command_template<boost::uint64_t> segment_command_64_; 120 121 template <class AddressOffsetT> 122 struct section_template { 123 char sectname[16]; /* name of this section */ 124 char segname[16]; /* segment this section goes in */ 125 AddressOffsetT addr; /* memory address of this section */ 126 AddressOffsetT size; /* size in bytes of this section */ 127 boost::uint32_t offset; /* file offset of this section */ 128 boost::uint32_t align; /* section alignment (power of 2) */ 129 boost::uint32_t reloff; /* file offset of relocation entries */ 130 boost::uint32_t nreloc; /* number of relocation entries */ 131 boost::uint32_t flags; /* flags (section type and attributes)*/ 132 boost::uint32_t reserved[1 + sizeof(AddressOffsetT) / sizeof(uint32_t)]; 133 }; 134 135 typedef section_template<boost::uint32_t> section_32_; 136 typedef section_template<boost::uint64_t> section_64_; 137 138 struct symtab_command_ { 139 boost::uint32_t cmd; /* LC_SYMTAB_ */ 140 boost::uint32_t cmdsize; /* sizeof(struct symtab_command) */ 141 boost::uint32_t symoff; /* symbol table offset */ 142 boost::uint32_t nsyms; /* number of symbol table entries */ 143 boost::uint32_t stroff; /* string table offset */ 144 boost::uint32_t strsize; /* string table size in bytes */ 145 }; 146 147 template <class AddressOffsetT> 148 struct nlist_template { 149 boost::uint32_t n_strx; 150 boost::uint8_t n_type; 151 boost::uint8_t n_sect; 152 boost::uint16_t n_desc; 153 AddressOffsetT n_value; 154 }; 155 156 typedef nlist_template<boost::uint32_t> nlist_32_; 157 typedef nlist_template<boost::uint64_t> nlist_64_; 158 159 template <class AddressOffsetT> 160 class macho_info { 161 typedef boost::dll::detail::mach_header_template<AddressOffsetT> header_t; 162 typedef boost::dll::detail::load_command_ load_command_t; 163 typedef boost::dll::detail::segment_command_template<AddressOffsetT> segment_t; 164 typedef boost::dll::detail::section_template<AddressOffsetT> section_t; 165 typedef boost::dll::detail::symtab_command_ symbol_header_t; 166 typedef boost::dll::detail::nlist_template<AddressOffsetT> nlist_t; 167 168 BOOST_STATIC_CONSTANT(boost::uint32_t, SEGMENT_CMD_NUMBER = (sizeof(AddressOffsetT) > 4 ? load_command_types::LC_SEGMENT_64_ : load_command_types::LC_SEGMENT_)); 169 170 public: parsing_supported(std::ifstream & fs)171 static bool parsing_supported(std::ifstream& fs) { 172 static const uint32_t magic_bytes = (sizeof(AddressOffsetT) <= sizeof(uint32_t) ? 0xfeedface : 0xfeedfacf); 173 174 uint32_t magic; 175 fs.seekg(0); 176 fs.read(reinterpret_cast<char*>(&magic), sizeof(magic)); 177 return (magic_bytes == magic); 178 } 179 180 private: 181 template <class T> read_raw(std::ifstream & fs,T & value,std::size_t size=sizeof (T))182 static void read_raw(std::ifstream& fs, T& value, std::size_t size = sizeof(T)) { 183 fs.read(reinterpret_cast<char*>(&value), size); 184 } 185 186 template <class F> command_finder(std::ifstream & fs,uint32_t cmd_num,F callback_f)187 static void command_finder(std::ifstream& fs, uint32_t cmd_num, F callback_f) { 188 const header_t h = header(fs); 189 load_command_t command; 190 fs.seekg(sizeof(header_t)); 191 for (std::size_t i = 0; i < h.ncmds; ++i) { 192 const std::ifstream::pos_type pos = fs.tellg(); 193 read_raw(fs, command); 194 if (command.cmd != cmd_num) { 195 fs.seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize)); 196 continue; 197 } 198 199 fs.seekg(pos); 200 callback_f(fs); 201 fs.seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize)); 202 } 203 } 204 205 struct section_names_gather { 206 std::vector<std::string>& ret; 207 operator ()boost::dll::detail::macho_info::section_names_gather208 void operator()(std::ifstream& fs) const { 209 segment_t segment; 210 read_raw(fs, segment); 211 212 section_t section; 213 ret.reserve(ret.size() + segment.nsects); 214 for (std::size_t j = 0; j < segment.nsects; ++j) { 215 read_raw(fs, section); 216 // `segname` goes right after the `sectname`. 217 // Forcing `sectname` to end on '\0' 218 section.segname[0] = '\0'; 219 ret.push_back(section.sectname); 220 if (ret.back().empty()) { 221 ret.pop_back(); // Do not show empty names 222 } 223 } 224 } 225 }; 226 227 struct symbol_names_gather { 228 std::vector<std::string>& ret; 229 std::size_t section_index; 230 operator ()boost::dll::detail::macho_info::symbol_names_gather231 void operator()(std::ifstream& fs) const { 232 symbol_header_t symbh; 233 read_raw(fs, symbh); 234 ret.reserve(ret.size() + symbh.nsyms); 235 236 nlist_t symbol; 237 std::string symbol_name; 238 for (std::size_t j = 0; j < symbh.nsyms; ++j) { 239 fs.seekg(symbh.symoff + j * sizeof(nlist_t)); 240 read_raw(fs, symbol); 241 if (!symbol.n_strx) { 242 continue; // Symbol has no name 243 } 244 245 if ((symbol.n_type & 0x0e) != 0xe || !symbol.n_sect) { 246 continue; // Symbol has no section 247 } 248 249 if (section_index && section_index != symbol.n_sect) { 250 continue; // Not in the required section 251 } 252 253 fs.seekg(symbh.stroff + symbol.n_strx); 254 std::getline(fs, symbol_name, '\0'); 255 if (symbol_name.empty()) { 256 continue; 257 } 258 259 if (symbol_name[0] == '_') { 260 // Linker adds additional '_' symbol. Could not find official docs for that case. 261 ret.push_back(symbol_name.c_str() + 1); 262 } else { 263 ret.push_back(symbol_name); 264 } 265 } 266 } 267 }; 268 269 public: sections(std::ifstream & fs)270 static std::vector<std::string> sections(std::ifstream& fs) { 271 std::vector<std::string> ret; 272 section_names_gather f = { ret }; 273 command_finder(fs, SEGMENT_CMD_NUMBER, f); 274 return ret; 275 } 276 277 private: header(std::ifstream & fs)278 static header_t header(std::ifstream& fs) { 279 header_t h; 280 281 fs.seekg(0); 282 read_raw(fs, h); 283 284 return h; 285 } 286 287 public: symbols(std::ifstream & fs)288 static std::vector<std::string> symbols(std::ifstream& fs) { 289 std::vector<std::string> ret; 290 symbol_names_gather f = { ret, 0 }; 291 command_finder(fs, load_command_types::LC_SYMTAB_, f); 292 return ret; 293 } 294 symbols(std::ifstream & fs,const char * section_name)295 static std::vector<std::string> symbols(std::ifstream& fs, const char* section_name) { 296 // Not very optimal solution 297 std::vector<std::string> ret = sections(fs); 298 std::vector<std::string>::iterator it = std::find(ret.begin(), ret.end(), section_name); 299 if (it == ret.end()) { 300 // No section with such name 301 ret.clear(); 302 return ret; 303 } 304 305 // section indexes start from 1 306 symbol_names_gather f = { ret, static_cast<std::size_t>(1 + (it - ret.begin())) }; 307 ret.clear(); 308 command_finder(fs, load_command_types::LC_SYMTAB_, f); 309 return ret; 310 } 311 }; 312 313 typedef macho_info<boost::uint32_t> macho_info32; 314 typedef macho_info<boost::uint64_t> macho_info64; 315 316 }}} // namespace boost::dll::detail 317 318 #endif // BOOST_DLL_DETAIL_MACHO_INFO_HPP 319