1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // macho_id.cc: Functions to gather identifying information from a macho file
31 //
32 // See macho_id.h for documentation
33 //
34 // Author: Dan Waylonis
35
36 extern "C" { // necessary for Leopard
37 #include <fcntl.h>
38 #include <mach-o/loader.h>
39 #include <mach-o/swap.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <sys/time.h>
44 #include <sys/types.h>
45 #include <unistd.h>
46 }
47
48 #include "common/mac/macho_id.h"
49 #include "common/mac/macho_walker.h"
50 #include "common/mac/macho_utilities.h"
51
52 namespace MacFileUtilities {
53
54 using google_breakpad::MD5Init;
55 using google_breakpad::MD5Update;
56 using google_breakpad::MD5Final;
57
MachoID(const char * path)58 MachoID::MachoID(const char *path)
59 : memory_(0),
60 memory_size_(0),
61 crc_(0),
62 md5_context_(),
63 update_function_(NULL) {
64 strlcpy(path_, path, sizeof(path_));
65 }
66
MachoID(const char * path,void * memory,size_t size)67 MachoID::MachoID(const char *path, void *memory, size_t size)
68 : memory_(memory),
69 memory_size_(size),
70 crc_(0),
71 md5_context_(),
72 update_function_(NULL) {
73 strlcpy(path_, path, sizeof(path_));
74 }
75
~MachoID()76 MachoID::~MachoID() {
77 }
78
79 // The CRC info is from http://en.wikipedia.org/wiki/Adler-32
80 // With optimizations from http://www.zlib.net/
81
82 // The largest prime smaller than 65536
83 #define MOD_ADLER 65521
84 // MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1
85 #define MAX_BLOCK 5552
86
UpdateCRC(unsigned char * bytes,size_t size)87 void MachoID::UpdateCRC(unsigned char *bytes, size_t size) {
88 // Unrolled loops for summing
89 #define DO1(buf,i) {sum1 += (buf)[i]; sum2 += sum1;}
90 #define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
91 #define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
92 #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
93 #define DO16(buf) DO8(buf,0); DO8(buf,8);
94 // Split up the crc
95 uint32_t sum1 = crc_ & 0xFFFF;
96 uint32_t sum2 = (crc_ >> 16) & 0xFFFF;
97
98 // Do large blocks
99 while (size >= MAX_BLOCK) {
100 size -= MAX_BLOCK;
101 int block_count = MAX_BLOCK / 16;
102 do {
103 DO16(bytes);
104 bytes += 16;
105 } while (--block_count);
106 sum1 %= MOD_ADLER;
107 sum2 %= MOD_ADLER;
108 }
109
110 // Do remaining bytes
111 if (size) {
112 while (size >= 16) {
113 size -= 16;
114 DO16(bytes);
115 bytes += 16;
116 }
117 while (size--) {
118 sum1 += *bytes++;
119 sum2 += sum1;
120 }
121 sum1 %= MOD_ADLER;
122 sum2 %= MOD_ADLER;
123 crc_ = (sum2 << 16) | sum1;
124 }
125 }
126
UpdateMD5(unsigned char * bytes,size_t size)127 void MachoID::UpdateMD5(unsigned char *bytes, size_t size) {
128 MD5Update(&md5_context_, bytes, static_cast<unsigned>(size));
129 }
130
Update(MachoWalker * walker,off_t offset,size_t size)131 void MachoID::Update(MachoWalker *walker, off_t offset, size_t size) {
132 if (!update_function_ || !size)
133 return;
134
135 // Read up to 4k bytes at a time
136 unsigned char buffer[4096];
137 size_t buffer_size;
138 off_t file_offset = offset;
139 while (size > 0) {
140 if (size > sizeof(buffer)) {
141 buffer_size = sizeof(buffer);
142 size -= buffer_size;
143 } else {
144 buffer_size = size;
145 size = 0;
146 }
147
148 if (!walker->ReadBytes(buffer, buffer_size, file_offset))
149 return;
150
151 (this->*update_function_)(buffer, buffer_size);
152 file_offset += buffer_size;
153 }
154 }
155
UUIDCommand(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char bytes[16])156 bool MachoID::UUIDCommand(cpu_type_t cpu_type,
157 cpu_subtype_t cpu_subtype,
158 unsigned char bytes[16]) {
159 struct breakpad_uuid_command uuid_cmd;
160 uuid_cmd.cmd = 0;
161 if (!WalkHeader(cpu_type, cpu_subtype, UUIDWalkerCB, &uuid_cmd))
162 return false;
163
164 // If we found the command, we'll have initialized the uuid_command
165 // structure
166 if (uuid_cmd.cmd == LC_UUID) {
167 memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid));
168 return true;
169 }
170
171 return false;
172 }
173
IDCommand(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char identifier[16])174 bool MachoID::IDCommand(cpu_type_t cpu_type,
175 cpu_subtype_t cpu_subtype,
176 unsigned char identifier[16]) {
177 struct dylib_command dylib_cmd;
178 dylib_cmd.cmd = 0;
179 if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd))
180 return false;
181
182 // If we found the command, we'll have initialized the dylib_command
183 // structure
184 if (dylib_cmd.cmd == LC_ID_DYLIB) {
185 // Take the hashed filename, version, and compatability version bytes
186 // to form the first 12 bytes, pad the rest with zeros
187
188 // create a crude hash of the filename to generate the first 4 bytes
189 identifier[0] = 0;
190 identifier[1] = 0;
191 identifier[2] = 0;
192 identifier[3] = 0;
193
194 for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) {
195 identifier[j%4] += path_[i];
196 }
197
198 identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF;
199 identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF;
200 identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF;
201 identifier[7] = dylib_cmd.dylib.current_version & 0xFF;
202 identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF;
203 identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF;
204 identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF;
205 identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF;
206 identifier[12] = (cpu_type >> 24) & 0xFF;
207 identifier[13] = (cpu_type >> 16) & 0xFF;
208 identifier[14] = (cpu_type >> 8) & 0xFF;
209 identifier[15] = cpu_type & 0xFF;
210
211 return true;
212 }
213
214 return false;
215 }
216
Adler32(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype)217 uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
218 update_function_ = &MachoID::UpdateCRC;
219 crc_ = 0;
220
221 if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
222 return 0;
223
224 return crc_;
225 }
226
MD5(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char identifier[16])227 bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) {
228 update_function_ = &MachoID::UpdateMD5;
229
230 MD5Init(&md5_context_);
231
232 if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
233 return false;
234
235 MD5Final(identifier, &md5_context_);
236 return true;
237 }
238
WalkHeader(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,MachoWalker::LoadCommandCallback callback,void * context)239 bool MachoID::WalkHeader(cpu_type_t cpu_type,
240 cpu_subtype_t cpu_subtype,
241 MachoWalker::LoadCommandCallback callback,
242 void *context) {
243 if (memory_) {
244 MachoWalker walker(memory_, memory_size_, callback, context);
245 return walker.WalkHeader(cpu_type, cpu_subtype);
246 } else {
247 MachoWalker walker(path_, callback, context);
248 return walker.WalkHeader(cpu_type, cpu_subtype);
249 }
250 }
251
252 // static
WalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)253 bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
254 bool swap, void *context) {
255 MachoID *macho_id = (MachoID *)context;
256
257 if (cmd->cmd == LC_SEGMENT) {
258 struct segment_command seg;
259
260 if (!walker->ReadBytes(&seg, sizeof(seg), offset))
261 return false;
262
263 if (swap)
264 swap_segment_command(&seg, NXHostByteOrder());
265
266 struct mach_header_64 header;
267 off_t header_offset;
268
269 if (!walker->CurrentHeader(&header, &header_offset))
270 return false;
271
272 // Process segments that have sections:
273 // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
274 offset += sizeof(struct segment_command);
275 struct section sec;
276 for (unsigned long i = 0; i < seg.nsects; ++i) {
277 if (!walker->ReadBytes(&sec, sizeof(sec), offset))
278 return false;
279
280 if (swap)
281 swap_section(&sec, 1, NXHostByteOrder());
282
283 // sections of type S_ZEROFILL are "virtual" and contain no data
284 // in the file itself
285 if ((sec.flags & SECTION_TYPE) != S_ZEROFILL && sec.offset != 0)
286 macho_id->Update(walker, header_offset + sec.offset, sec.size);
287
288 offset += sizeof(struct section);
289 }
290 } else if (cmd->cmd == LC_SEGMENT_64) {
291 struct segment_command_64 seg64;
292
293 if (!walker->ReadBytes(&seg64, sizeof(seg64), offset))
294 return false;
295
296 if (swap)
297 breakpad_swap_segment_command_64(&seg64, NXHostByteOrder());
298
299 struct mach_header_64 header;
300 off_t header_offset;
301
302 if (!walker->CurrentHeader(&header, &header_offset))
303 return false;
304
305 // Process segments that have sections:
306 // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
307 offset += sizeof(struct segment_command_64);
308 struct section_64 sec64;
309 for (unsigned long i = 0; i < seg64.nsects; ++i) {
310 if (!walker->ReadBytes(&sec64, sizeof(sec64), offset))
311 return false;
312
313 if (swap)
314 breakpad_swap_section_64(&sec64, 1, NXHostByteOrder());
315
316 // sections of type S_ZEROFILL are "virtual" and contain no data
317 // in the file itself
318 if ((sec64.flags & SECTION_TYPE) != S_ZEROFILL && sec64.offset != 0)
319 macho_id->Update(walker,
320 header_offset + sec64.offset,
321 (size_t)sec64.size);
322
323 offset += sizeof(struct section_64);
324 }
325 }
326
327 // Continue processing
328 return true;
329 }
330
331 // static
UUIDWalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)332 bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
333 bool swap, void *context) {
334 if (cmd->cmd == LC_UUID) {
335 struct breakpad_uuid_command *uuid_cmd =
336 (struct breakpad_uuid_command *)context;
337
338 if (!walker->ReadBytes(uuid_cmd, sizeof(struct breakpad_uuid_command),
339 offset))
340 return false;
341
342 if (swap)
343 breakpad_swap_uuid_command(uuid_cmd, NXHostByteOrder());
344
345 return false;
346 }
347
348 // Continue processing
349 return true;
350 }
351
352 // static
IDWalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)353 bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
354 bool swap, void *context) {
355 if (cmd->cmd == LC_ID_DYLIB) {
356 struct dylib_command *dylib_cmd = (struct dylib_command *)context;
357
358 if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset))
359 return false;
360
361 if (swap)
362 swap_dylib_command(dylib_cmd, NXHostByteOrder());
363
364 return false;
365 }
366
367 // Continue processing
368 return true;
369 }
370
371 } // namespace MacFileUtilities
372