1 // Copyright (c) 2007, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "client/mac/handler/dynamic_images.h"
31
32 extern "C" { // needed to compile on Leopard
33 #include <mach-o/nlist.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 }
37
38 #include <assert.h>
39 #include <AvailabilityMacros.h>
40 #include <dlfcn.h>
41 #include <mach/task_info.h>
42 #include <sys/sysctl.h>
43 #include <TargetConditionals.h>
44 #include <unistd.h>
45
46 #include <algorithm>
47 #include <string>
48 #include <vector>
49
50 #include "breakpad_nlist_64.h"
51
52 #if !TARGET_OS_IPHONE
53 #include <CoreServices/CoreServices.h>
54
55 #ifndef MAC_OS_X_VERSION_10_6
56 #define MAC_OS_X_VERSION_10_6 1060
57 #endif
58
59 #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
60
61 // Fallback declarations for TASK_DYLD_INFO and friends, introduced in
62 // <mach/task_info.h> in the Mac OS X 10.6 SDK.
63 #define TASK_DYLD_INFO 17
64 struct task_dyld_info {
65 mach_vm_address_t all_image_info_addr;
66 mach_vm_size_t all_image_info_size;
67 };
68 typedef struct task_dyld_info task_dyld_info_data_t;
69 typedef struct task_dyld_info *task_dyld_info_t;
70 #define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t))
71
72 #endif
73
74 #endif // !TARGET_OS_IPHONE
75
76 namespace google_breakpad {
77
78 using std::string;
79 using std::vector;
80
81 //==============================================================================
82 // Returns the size of the memory region containing |address| and the
83 // number of bytes from |address| to the end of the region.
84 // We potentially, will extend the size of the original
85 // region by the size of the following region if it's contiguous with the
86 // first in order to handle cases when we're reading strings and they
87 // straddle two vm regions.
88 //
GetMemoryRegionSize(task_port_t target_task,const uint64_t address,mach_vm_size_t * size_to_end)89 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
90 const uint64_t address,
91 mach_vm_size_t *size_to_end) {
92 mach_vm_address_t region_base = (mach_vm_address_t)address;
93 mach_vm_size_t region_size;
94 natural_t nesting_level = 0;
95 vm_region_submap_info_64 submap_info;
96 mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
97
98 // Get information about the vm region containing |address|
99 vm_region_recurse_info_t region_info;
100 region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
101
102 kern_return_t result =
103 mach_vm_region_recurse(target_task,
104 ®ion_base,
105 ®ion_size,
106 &nesting_level,
107 region_info,
108 &info_count);
109
110 if (result == KERN_SUCCESS) {
111 // Get distance from |address| to the end of this region
112 *size_to_end = region_base + region_size -(mach_vm_address_t)address;
113
114 // If we want to handle strings as long as 4096 characters we may need
115 // to check if there's a vm region immediately following the first one.
116 // If so, we need to extend |*size_to_end| to go all the way to the end
117 // of the second region.
118 if (*size_to_end < 4096) {
119 // Second region starts where the first one ends
120 mach_vm_address_t region_base2 =
121 (mach_vm_address_t)(region_base + region_size);
122 mach_vm_size_t region_size2;
123
124 // Get information about the following vm region
125 result =
126 mach_vm_region_recurse(target_task,
127 ®ion_base2,
128 ®ion_size2,
129 &nesting_level,
130 region_info,
131 &info_count);
132
133 // Extend region_size to go all the way to the end of the 2nd region
134 if (result == KERN_SUCCESS
135 && region_base2 == region_base + region_size) {
136 region_size += region_size2;
137 }
138 }
139
140 *size_to_end = region_base + region_size -(mach_vm_address_t)address;
141 } else {
142 region_size = 0;
143 *size_to_end = 0;
144 }
145
146 return region_size;
147 }
148
149 #define kMaxStringLength 8192
150 //==============================================================================
151 // Reads a NULL-terminated string from another task.
152 //
153 // Warning! This will not read any strings longer than kMaxStringLength-1
154 //
ReadTaskString(task_port_t target_task,const uint64_t address)155 static string ReadTaskString(task_port_t target_task,
156 const uint64_t address) {
157 // The problem is we don't know how much to read until we know how long
158 // the string is. And we don't know how long the string is, until we've read
159 // the memory! So, we'll try to read kMaxStringLength bytes
160 // (or as many bytes as we can until we reach the end of the vm region).
161 mach_vm_size_t size_to_end;
162 GetMemoryRegionSize(target_task, address, &size_to_end);
163
164 if (size_to_end > 0) {
165 mach_vm_size_t size_to_read =
166 size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
167
168 vector<uint8_t> bytes;
169 if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) !=
170 KERN_SUCCESS)
171 return string();
172
173 return string(reinterpret_cast<const char*>(&bytes[0]));
174 }
175
176 return string();
177 }
178
179 //==============================================================================
180 // Reads an address range from another task. The bytes read will be returned
181 // in bytes, which will be resized as necessary.
ReadTaskMemory(task_port_t target_task,const uint64_t address,size_t length,vector<uint8_t> & bytes)182 kern_return_t ReadTaskMemory(task_port_t target_task,
183 const uint64_t address,
184 size_t length,
185 vector<uint8_t> &bytes) {
186 int systemPageSize = getpagesize();
187
188 // use the negative of the page size for the mask to find the page address
189 mach_vm_address_t page_address = address & (-systemPageSize);
190
191 mach_vm_address_t last_page_address =
192 (address + length + (systemPageSize - 1)) & (-systemPageSize);
193
194 mach_vm_size_t page_size = last_page_address - page_address;
195 uint8_t* local_start;
196 uint32_t local_length;
197
198 kern_return_t r = mach_vm_read(target_task,
199 page_address,
200 page_size,
201 reinterpret_cast<vm_offset_t*>(&local_start),
202 &local_length);
203
204 if (r != KERN_SUCCESS)
205 return r;
206
207 bytes.resize(length);
208 memcpy(&bytes[0],
209 &local_start[(mach_vm_address_t)address - page_address],
210 length);
211 mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
212 return KERN_SUCCESS;
213 }
214
215 #pragma mark -
216
217 //==============================================================================
218 // Traits structs for specializing function templates to handle
219 // 32-bit/64-bit Mach-O files.
220 struct MachO32 {
221 typedef mach_header mach_header_type;
222 typedef segment_command mach_segment_command_type;
223 typedef dyld_image_info32 dyld_image_info;
224 typedef dyld_all_image_infos32 dyld_all_image_infos;
225 typedef struct nlist nlist_type;
226 static const uint32_t magic = MH_MAGIC;
227 static const uint32_t segment_load_command = LC_SEGMENT;
228 };
229
230 struct MachO64 {
231 typedef mach_header_64 mach_header_type;
232 typedef segment_command_64 mach_segment_command_type;
233 typedef dyld_image_info64 dyld_image_info;
234 typedef dyld_all_image_infos64 dyld_all_image_infos;
235 typedef struct nlist_64 nlist_type;
236 static const uint32_t magic = MH_MAGIC_64;
237 static const uint32_t segment_load_command = LC_SEGMENT_64;
238 };
239
240 template<typename MachBits>
FindTextSection(DynamicImage & image)241 bool FindTextSection(DynamicImage& image) {
242 typedef typename MachBits::mach_header_type mach_header_type;
243 typedef typename MachBits::mach_segment_command_type
244 mach_segment_command_type;
245
246 const mach_header_type* header =
247 reinterpret_cast<const mach_header_type*>(&image.header_[0]);
248
249 if(header->magic != MachBits::magic) {
250 return false;
251 }
252
253 const struct load_command *cmd =
254 reinterpret_cast<const struct load_command *>(header + 1);
255
256 bool found_text_section = false;
257 bool found_dylib_id_command = false;
258 for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
259 if (!found_text_section) {
260 if (cmd->cmd == MachBits::segment_load_command) {
261 const mach_segment_command_type *seg =
262 reinterpret_cast<const mach_segment_command_type *>(cmd);
263
264 if (!strcmp(seg->segname, "__TEXT")) {
265 image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr);
266 image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize);
267 image.slide_ = 0;
268
269 if (seg->fileoff == 0 && seg->filesize != 0) {
270 image.slide_ =
271 (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr;
272 }
273 found_text_section = true;
274 }
275 }
276 }
277
278 if (!found_dylib_id_command) {
279 if (cmd->cmd == LC_ID_DYLIB) {
280 const struct dylib_command *dc =
281 reinterpret_cast<const struct dylib_command *>(cmd);
282
283 image.version_ = dc->dylib.current_version;
284 found_dylib_id_command = true;
285 }
286 }
287
288 if (found_dylib_id_command && found_text_section) {
289 return true;
290 }
291
292 cmd = reinterpret_cast<const struct load_command *>
293 (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
294 }
295
296 return false;
297 }
298
299 //==============================================================================
300 // Initializes vmaddr_, vmsize_, and slide_
CalculateMemoryAndVersionInfo()301 void DynamicImage::CalculateMemoryAndVersionInfo() {
302 // unless we can process the header, ensure that calls to
303 // IsValid() will return false
304 vmaddr_ = 0;
305 vmsize_ = 0;
306 slide_ = 0;
307 version_ = 0;
308
309 // The function template above does all the real work.
310 if (Is64Bit())
311 FindTextSection<MachO64>(*this);
312 else
313 FindTextSection<MachO32>(*this);
314 }
315
316 //==============================================================================
317 // The helper function template abstracts the 32/64-bit differences.
318 template<typename MachBits>
GetFileTypeFromHeader(DynamicImage & image)319 uint32_t GetFileTypeFromHeader(DynamicImage& image) {
320 typedef typename MachBits::mach_header_type mach_header_type;
321
322 const mach_header_type* header =
323 reinterpret_cast<const mach_header_type*>(&image.header_[0]);
324 return header->filetype;
325 }
326
GetFileType()327 uint32_t DynamicImage::GetFileType() {
328 if (Is64Bit())
329 return GetFileTypeFromHeader<MachO64>(*this);
330
331 return GetFileTypeFromHeader<MachO32>(*this);
332 }
333
334 #pragma mark -
335
336 //==============================================================================
337 // Loads information about dynamically loaded code in the given task.
DynamicImages(mach_port_t task)338 DynamicImages::DynamicImages(mach_port_t task)
339 : task_(task),
340 cpu_type_(DetermineTaskCPUType(task)),
341 image_list_() {
342 ReadImageInfoForTask();
343 }
344
345 template<typename MachBits>
LookupSymbol(const char * symbol_name,const char * filename,cpu_type_t cpu_type)346 static uint64_t LookupSymbol(const char* symbol_name,
347 const char* filename,
348 cpu_type_t cpu_type) {
349 typedef typename MachBits::nlist_type nlist_type;
350
351 nlist_type symbol_info[8] = {};
352 const char *symbolNames[2] = { symbol_name, "\0" };
353 nlist_type &list = symbol_info[0];
354 int invalidEntriesCount = breakpad_nlist(filename,
355 &list,
356 symbolNames,
357 cpu_type);
358
359 if(invalidEntriesCount != 0) {
360 return 0;
361 }
362
363 assert(list.n_value);
364 return list.n_value;
365 }
366
367 #if TARGET_OS_IPHONE
HasTaskDyldInfo()368 static bool HasTaskDyldInfo() {
369 return true;
370 }
371 #else
GetOSVersionInternal()372 static SInt32 GetOSVersionInternal() {
373 SInt32 os_version = 0;
374 Gestalt(gestaltSystemVersion, &os_version);
375 return os_version;
376 }
377
GetOSVersion()378 static SInt32 GetOSVersion() {
379 static SInt32 os_version = GetOSVersionInternal();
380 return os_version;
381 }
382
HasTaskDyldInfo()383 static bool HasTaskDyldInfo() {
384 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
385 return true;
386 #else
387 return GetOSVersion() >= 0x1060;
388 #endif
389 }
390 #endif // TARGET_OS_IPHONE
391
GetDyldAllImageInfosPointer()392 uint64_t DynamicImages::GetDyldAllImageInfosPointer() {
393 if (HasTaskDyldInfo()) {
394 task_dyld_info_data_t task_dyld_info;
395 mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
396 if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
397 &count) != KERN_SUCCESS) {
398 return 0;
399 }
400
401 return (uint64_t)task_dyld_info.all_image_info_addr;
402 } else {
403 const char *imageSymbolName = "_dyld_all_image_infos";
404 const char *dyldPath = "/usr/lib/dyld";
405
406 if (Is64Bit())
407 return LookupSymbol<MachO64>(imageSymbolName, dyldPath, cpu_type_);
408 return LookupSymbol<MachO32>(imageSymbolName, dyldPath, cpu_type_);
409 }
410 }
411
412 //==============================================================================
413 // This code was written using dyld_debug.c (from Darwin) as a guide.
414
415 template<typename MachBits>
ReadImageInfo(DynamicImages & images,uint64_t image_list_address)416 void ReadImageInfo(DynamicImages& images,
417 uint64_t image_list_address) {
418 typedef typename MachBits::dyld_image_info dyld_image_info;
419 typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos;
420 typedef typename MachBits::mach_header_type mach_header_type;
421
422 // Read the structure inside of dyld that contains information about
423 // loaded images. We're reading from the desired task's address space.
424
425 // Here we make the assumption that dyld loaded at the same address in
426 // the crashed process vs. this one. This is an assumption made in
427 // "dyld_debug.c" and is said to be nearly always valid.
428 vector<uint8_t> dyld_all_info_bytes;
429 if (ReadTaskMemory(images.task_,
430 image_list_address,
431 sizeof(dyld_all_image_infos),
432 dyld_all_info_bytes) != KERN_SUCCESS)
433 return;
434
435 dyld_all_image_infos *dyldInfo =
436 reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]);
437
438 // number of loaded images
439 int count = dyldInfo->infoArrayCount;
440
441 // Read an array of dyld_image_info structures each containing
442 // information about a loaded image.
443 vector<uint8_t> dyld_info_array_bytes;
444 if (ReadTaskMemory(images.task_,
445 dyldInfo->infoArray,
446 count * sizeof(dyld_image_info),
447 dyld_info_array_bytes) != KERN_SUCCESS)
448 return;
449
450 dyld_image_info *infoArray =
451 reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]);
452 images.image_list_.reserve(count);
453
454 for (int i = 0; i < count; ++i) {
455 dyld_image_info &info = infoArray[i];
456
457 // First read just the mach_header from the image in the task.
458 vector<uint8_t> mach_header_bytes;
459 if (ReadTaskMemory(images.task_,
460 info.load_address_,
461 sizeof(mach_header_type),
462 mach_header_bytes) != KERN_SUCCESS)
463 continue; // bail on this dynamic image
464
465 mach_header_type *header =
466 reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]);
467
468 // Now determine the total amount necessary to read the header
469 // plus all of the load commands.
470 size_t header_size =
471 sizeof(mach_header_type) + header->sizeofcmds;
472
473 if (ReadTaskMemory(images.task_,
474 info.load_address_,
475 header_size,
476 mach_header_bytes) != KERN_SUCCESS)
477 continue;
478
479 // Read the file name from the task's memory space.
480 string file_path;
481 if (info.file_path_) {
482 // Although we're reading kMaxStringLength bytes, it's copied in the
483 // the DynamicImage constructor below with the correct string length,
484 // so it's not really wasting memory.
485 file_path = ReadTaskString(images.task_, info.file_path_);
486 }
487
488 // Create an object representing this image and add it to our list.
489 DynamicImage *new_image;
490 new_image = new DynamicImage(&mach_header_bytes[0],
491 header_size,
492 info.load_address_,
493 file_path,
494 static_cast<uintptr_t>(info.file_mod_date_),
495 images.task_,
496 images.cpu_type_);
497
498 if (new_image->IsValid()) {
499 images.image_list_.push_back(DynamicImageRef(new_image));
500 } else {
501 delete new_image;
502 }
503 }
504
505 // sorts based on loading address
506 sort(images.image_list_.begin(), images.image_list_.end());
507 // remove duplicates - this happens in certain strange cases
508 // You can see it in DashboardClient when Google Gadgets plugin
509 // is installed. Apple's crash reporter log and gdb "info shared"
510 // both show the same library multiple times at the same address
511
512 vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(),
513 images.image_list_.end());
514 images.image_list_.erase(it, images.image_list_.end());
515 }
516
ReadImageInfoForTask()517 void DynamicImages::ReadImageInfoForTask() {
518 uint64_t imageList = GetDyldAllImageInfosPointer();
519
520 if (imageList) {
521 if (Is64Bit())
522 ReadImageInfo<MachO64>(*this, imageList);
523 else
524 ReadImageInfo<MachO32>(*this, imageList);
525 }
526 }
527
528 //==============================================================================
GetExecutableImage()529 DynamicImage *DynamicImages::GetExecutableImage() {
530 int executable_index = GetExecutableImageIndex();
531
532 if (executable_index >= 0) {
533 return GetImage(executable_index);
534 }
535
536 return NULL;
537 }
538
539 //==============================================================================
540 // returns -1 if failure to find executable
GetExecutableImageIndex()541 int DynamicImages::GetExecutableImageIndex() {
542 int image_count = GetImageCount();
543
544 for (int i = 0; i < image_count; ++i) {
545 DynamicImage *image = GetImage(i);
546 if (image->GetFileType() == MH_EXECUTE) {
547 return i;
548 }
549 }
550
551 return -1;
552 }
553
554 //==============================================================================
555 // static
DetermineTaskCPUType(task_t task)556 cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) {
557 if (task == mach_task_self())
558 return GetNativeCPUType();
559
560 int mib[CTL_MAXNAME];
561 size_t mibLen = CTL_MAXNAME;
562 int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen);
563 if (err == 0) {
564 assert(mibLen < CTL_MAXNAME);
565 pid_for_task(task, &mib[mibLen]);
566 mibLen += 1;
567
568 cpu_type_t cpu_type;
569 size_t cpuTypeSize = sizeof(cpu_type);
570 sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0);
571 return cpu_type;
572 }
573
574 return GetNativeCPUType();
575 }
576
577 } // namespace google_breakpad
578