1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is shared between AddressSanitizer and ThreadSanitizer
11 // run-time libraries. See sanitizer_symbolizer.h for details.
12 //===----------------------------------------------------------------------===//
13
14 #include "sanitizer_allocator_internal.h"
15 #include "sanitizer_common.h"
16 #include "sanitizer_placement_new.h"
17 #include "sanitizer_procmaps.h"
18 #include "sanitizer_symbolizer.h"
19
20 namespace __sanitizer {
21
Clear()22 void AddressInfo::Clear() {
23 InternalFree(module);
24 InternalFree(function);
25 InternalFree(file);
26 internal_memset(this, 0, sizeof(AddressInfo));
27 }
28
LoadedModule(const char * module_name,uptr base_address)29 LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
30 full_name_ = internal_strdup(module_name);
31 base_address_ = base_address;
32 n_ranges_ = 0;
33 }
34
addAddressRange(uptr beg,uptr end)35 void LoadedModule::addAddressRange(uptr beg, uptr end) {
36 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
37 ranges_[n_ranges_].beg = beg;
38 ranges_[n_ranges_].end = end;
39 n_ranges_++;
40 }
41
containsAddress(uptr address) const42 bool LoadedModule::containsAddress(uptr address) const {
43 for (uptr i = 0; i < n_ranges_; i++) {
44 if (ranges_[i].beg <= address && address < ranges_[i].end)
45 return true;
46 }
47 return false;
48 }
49
50 // Extracts the prefix of "str" that consists of any characters not
51 // present in "delims" string, and copies this prefix to "result", allocating
52 // space for it.
53 // Returns a pointer to "str" after skipping extracted prefix and first
54 // delimiter char.
ExtractToken(const char * str,const char * delims,char ** result)55 static const char *ExtractToken(const char *str, const char *delims,
56 char **result) {
57 uptr prefix_len = internal_strcspn(str, delims);
58 *result = (char*)InternalAlloc(prefix_len + 1);
59 internal_memcpy(*result, str, prefix_len);
60 (*result)[prefix_len] = '\0';
61 const char *prefix_end = str + prefix_len;
62 if (*prefix_end != '\0') prefix_end++;
63 return prefix_end;
64 }
65
66 // Same as ExtractToken, but converts extracted token to integer.
ExtractInt(const char * str,const char * delims,int * result)67 static const char *ExtractInt(const char *str, const char *delims,
68 int *result) {
69 char *buff;
70 const char *ret = ExtractToken(str, delims, &buff);
71 if (buff != 0) {
72 *result = (int)internal_atoll(buff);
73 }
74 InternalFree(buff);
75 return ret;
76 }
77
ExtractUptr(const char * str,const char * delims,uptr * result)78 static const char *ExtractUptr(const char *str, const char *delims,
79 uptr *result) {
80 char *buff;
81 const char *ret = ExtractToken(str, delims, &buff);
82 if (buff != 0) {
83 *result = (uptr)internal_atoll(buff);
84 }
85 InternalFree(buff);
86 return ret;
87 }
88
89 // ExternalSymbolizer encapsulates communication between the tool and
90 // external symbolizer program, running in a different subprocess,
91 // For now we assume the following protocol:
92 // For each request of the form
93 // <module_name> <module_offset>
94 // passed to STDIN, external symbolizer prints to STDOUT response:
95 // <function_name>
96 // <file_name>:<line_number>:<column_number>
97 // <function_name>
98 // <file_name>:<line_number>:<column_number>
99 // ...
100 // <empty line>
101 class ExternalSymbolizer {
102 public:
ExternalSymbolizer(const char * path,int input_fd,int output_fd)103 ExternalSymbolizer(const char *path, int input_fd, int output_fd)
104 : path_(path),
105 input_fd_(input_fd),
106 output_fd_(output_fd),
107 times_restarted_(0) {
108 CHECK(path_);
109 CHECK_NE(input_fd_, kInvalidFd);
110 CHECK_NE(output_fd_, kInvalidFd);
111 }
112
SendCommand(bool is_data,const char * module_name,uptr module_offset)113 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
114 CHECK(module_name);
115 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
116 is_data ? "DATA " : "", module_name, module_offset);
117 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
118 return 0;
119 if (!readFromSymbolizer(buffer_, kBufferSize))
120 return 0;
121 return buffer_;
122 }
123
Restart()124 bool Restart() {
125 if (times_restarted_ >= kMaxTimesRestarted) return false;
126 times_restarted_++;
127 internal_close(input_fd_);
128 internal_close(output_fd_);
129 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
130 }
131
Flush()132 void Flush() {
133 }
134
135 private:
readFromSymbolizer(char * buffer,uptr max_length)136 bool readFromSymbolizer(char *buffer, uptr max_length) {
137 if (max_length == 0)
138 return true;
139 uptr read_len = 0;
140 while (true) {
141 uptr just_read = internal_read(input_fd_, buffer + read_len,
142 max_length - read_len);
143 // We can't read 0 bytes, as we don't expect external symbolizer to close
144 // its stdout.
145 if (just_read == 0 || just_read == (uptr)-1) {
146 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
147 return false;
148 }
149 read_len += just_read;
150 // Empty line marks the end of symbolizer output.
151 if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
152 buffer[read_len - 2] == '\n') {
153 break;
154 }
155 }
156 return true;
157 }
158
writeToSymbolizer(const char * buffer,uptr length)159 bool writeToSymbolizer(const char *buffer, uptr length) {
160 if (length == 0)
161 return true;
162 uptr write_len = internal_write(output_fd_, buffer, length);
163 if (write_len == 0 || write_len == (uptr)-1) {
164 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
165 return false;
166 }
167 return true;
168 }
169
170 const char *path_;
171 int input_fd_;
172 int output_fd_;
173
174 static const uptr kBufferSize = 16 * 1024;
175 char buffer_[kBufferSize];
176
177 static const uptr kMaxTimesRestarted = 5;
178 uptr times_restarted_;
179 };
180
181 static LowLevelAllocator symbolizer_allocator; // Linker initialized.
182
183 #if SANITIZER_SUPPORTS_WEAK_HOOKS
184 extern "C" {
185 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
186 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
187 char *Buffer, int MaxLength);
188 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
189 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
190 char *Buffer, int MaxLength);
191 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
192 void __sanitizer_symbolize_flush();
193 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
194 int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
195 int MaxLength);
196 } // extern "C"
197
198 class InternalSymbolizer {
199 public:
200 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
201
get()202 static InternalSymbolizer *get() {
203 if (__sanitizer_symbolize_code != 0 &&
204 __sanitizer_symbolize_data != 0) {
205 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
206 return new(mem) InternalSymbolizer();
207 }
208 return 0;
209 }
210
SendCommand(bool is_data,const char * module_name,uptr module_offset)211 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
212 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
213 : __sanitizer_symbolize_code;
214 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
215 return buffer_;
216 return 0;
217 }
218
Flush()219 void Flush() {
220 if (__sanitizer_symbolize_flush)
221 __sanitizer_symbolize_flush();
222 }
223
Demangle(const char * name)224 const char *Demangle(const char *name) {
225 if (__sanitizer_symbolize_demangle) {
226 for (uptr res_length = 1024;
227 res_length <= InternalSizeClassMap::kMaxSize;) {
228 char *res_buff = static_cast<char*>(InternalAlloc(res_length));
229 uptr req_length =
230 __sanitizer_symbolize_demangle(name, res_buff, res_length);
231 if (req_length > res_length) {
232 res_length = req_length + 1;
233 InternalFree(res_buff);
234 continue;
235 }
236 return res_buff;
237 }
238 }
239 return name;
240 }
241
242 private:
InternalSymbolizer()243 InternalSymbolizer() { }
244
245 static const int kBufferSize = 16 * 1024;
246 static const int kMaxDemangledNameSize = 1024;
247 char buffer_[kBufferSize];
248 };
249 #else // SANITIZER_SUPPORTS_WEAK_HOOKS
250
251 class InternalSymbolizer {
252 public:
get()253 static InternalSymbolizer *get() { return 0; }
SendCommand(bool is_data,const char * module_name,uptr module_offset)254 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
255 return 0;
256 }
Flush()257 void Flush() { }
Demangle(const char * name)258 const char *Demangle(const char *name) { return name; }
259 };
260
261 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS
262
263 class Symbolizer {
264 // This class has no constructor, as global constructors are forbidden in
265 // sanitizer_common. It should be linker initialized instead.
266 public:
SymbolizeCode(uptr addr,AddressInfo * frames,uptr max_frames)267 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
268 if (max_frames == 0)
269 return 0;
270 LoadedModule *module = FindModuleForAddress(addr);
271 if (module == 0)
272 return 0;
273 const char *module_name = module->full_name();
274 uptr module_offset = addr - module->base_address();
275 const char *str = SendCommand(false, module_name, module_offset);
276 if (str == 0) {
277 // External symbolizer was not initialized or failed. Fill only data
278 // about module name and offset.
279 AddressInfo *info = &frames[0];
280 info->Clear();
281 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
282 return 1;
283 }
284 uptr frame_id = 0;
285 for (frame_id = 0; frame_id < max_frames; frame_id++) {
286 AddressInfo *info = &frames[frame_id];
287 char *function_name = 0;
288 str = ExtractToken(str, "\n", &function_name);
289 CHECK(function_name);
290 if (function_name[0] == '\0') {
291 // There are no more frames.
292 break;
293 }
294 info->Clear();
295 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
296 info->function = function_name;
297 // Parse <file>:<line>:<column> buffer.
298 char *file_line_info = 0;
299 str = ExtractToken(str, "\n", &file_line_info);
300 CHECK(file_line_info);
301 const char *line_info = ExtractToken(file_line_info, ":", &info->file);
302 line_info = ExtractInt(line_info, ":", &info->line);
303 line_info = ExtractInt(line_info, "", &info->column);
304 InternalFree(file_line_info);
305
306 // Functions and filenames can be "??", in which case we write 0
307 // to address info to mark that names are unknown.
308 if (0 == internal_strcmp(info->function, "??")) {
309 InternalFree(info->function);
310 info->function = 0;
311 }
312 if (0 == internal_strcmp(info->file, "??")) {
313 InternalFree(info->file);
314 info->file = 0;
315 }
316 }
317 if (frame_id == 0) {
318 // Make sure we return at least one frame.
319 AddressInfo *info = &frames[0];
320 info->Clear();
321 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
322 frame_id = 1;
323 }
324 return frame_id;
325 }
326
SymbolizeData(uptr addr,DataInfo * info)327 bool SymbolizeData(uptr addr, DataInfo *info) {
328 LoadedModule *module = FindModuleForAddress(addr);
329 if (module == 0)
330 return false;
331 const char *module_name = module->full_name();
332 uptr module_offset = addr - module->base_address();
333 internal_memset(info, 0, sizeof(*info));
334 info->address = addr;
335 info->module = internal_strdup(module_name);
336 info->module_offset = module_offset;
337 const char *str = SendCommand(true, module_name, module_offset);
338 if (str == 0)
339 return true;
340 str = ExtractToken(str, "\n", &info->name);
341 str = ExtractUptr(str, " ", &info->start);
342 str = ExtractUptr(str, "\n", &info->size);
343 info->start += module->base_address();
344 return true;
345 }
346
InitializeExternalSymbolizer(const char * path_to_symbolizer)347 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
348 int input_fd, output_fd;
349 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
350 return false;
351 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
352 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
353 input_fd, output_fd);
354 return true;
355 }
356
IsSymbolizerAvailable()357 bool IsSymbolizerAvailable() {
358 if (internal_symbolizer_ == 0)
359 internal_symbolizer_ = InternalSymbolizer::get();
360 return internal_symbolizer_ || external_symbolizer_;
361 }
362
Flush()363 void Flush() {
364 if (internal_symbolizer_)
365 internal_symbolizer_->Flush();
366 if (external_symbolizer_)
367 external_symbolizer_->Flush();
368 }
369
Demangle(const char * name)370 const char *Demangle(const char *name) {
371 if (IsSymbolizerAvailable() && internal_symbolizer_ != 0)
372 return internal_symbolizer_->Demangle(name);
373 return DemangleCXXABI(name);
374 }
375
376 private:
SendCommand(bool is_data,const char * module_name,uptr module_offset)377 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
378 // First, try to use internal symbolizer.
379 if (!IsSymbolizerAvailable()) {
380 return 0;
381 }
382 if (internal_symbolizer_) {
383 return internal_symbolizer_->SendCommand(is_data, module_name,
384 module_offset);
385 }
386 // Otherwise, fall back to external symbolizer.
387 if (external_symbolizer_ == 0) {
388 ReportExternalSymbolizerError(
389 "WARNING: Trying to symbolize code, but external "
390 "symbolizer is not initialized!\n");
391 return 0;
392 }
393 for (;;) {
394 char *reply = external_symbolizer_->SendCommand(is_data, module_name,
395 module_offset);
396 if (reply)
397 return reply;
398 // Try to restart symbolizer subprocess. If we don't succeed, forget
399 // about it and don't try to use it later.
400 if (!external_symbolizer_->Restart()) {
401 ReportExternalSymbolizerError(
402 "WARNING: Failed to use and restart external symbolizer!\n");
403 external_symbolizer_ = 0;
404 return 0;
405 }
406 }
407 }
408
FindModuleForAddress(uptr address)409 LoadedModule *FindModuleForAddress(uptr address) {
410 bool modules_were_reloaded = false;
411 if (modules_ == 0 || !modules_fresh_) {
412 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
413 kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
414 CHECK(modules_);
415 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
416 /* filter */ 0);
417 // FIXME: Return this check when GetListOfModules is implemented on Mac.
418 // CHECK_GT(n_modules_, 0);
419 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
420 modules_fresh_ = true;
421 modules_were_reloaded = true;
422 }
423 for (uptr i = 0; i < n_modules_; i++) {
424 if (modules_[i].containsAddress(address)) {
425 return &modules_[i];
426 }
427 }
428 // Reload the modules and look up again, if we haven't tried it yet.
429 if (!modules_were_reloaded) {
430 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
431 // It's too aggressive to reload the list of modules each time we fail
432 // to find a module for a given address.
433 modules_fresh_ = false;
434 return FindModuleForAddress(address);
435 }
436 return 0;
437 }
438
ReportExternalSymbolizerError(const char * msg)439 void ReportExternalSymbolizerError(const char *msg) {
440 // Don't use atomics here for now, as SymbolizeCode can't be called
441 // from multiple threads anyway.
442 static bool reported;
443 if (!reported) {
444 Report(msg);
445 reported = true;
446 }
447 }
448
449 // 16K loaded modules should be enough for everyone.
450 static const uptr kMaxNumberOfModuleContexts = 1 << 14;
451 LoadedModule *modules_; // Array of module descriptions is leaked.
452 uptr n_modules_;
453 // If stale, need to reload the modules before looking up addresses.
454 bool modules_fresh_;
455
456 ExternalSymbolizer *external_symbolizer_; // Leaked.
457 InternalSymbolizer *internal_symbolizer_; // Leaked.
458 };
459
460 static Symbolizer symbolizer; // Linker initialized.
461
SymbolizeCode(uptr address,AddressInfo * frames,uptr max_frames)462 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
463 return symbolizer.SymbolizeCode(address, frames, max_frames);
464 }
465
SymbolizeData(uptr address,DataInfo * info)466 bool SymbolizeData(uptr address, DataInfo *info) {
467 return symbolizer.SymbolizeData(address, info);
468 }
469
InitializeExternalSymbolizer(const char * path_to_symbolizer)470 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
471 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
472 }
473
IsSymbolizerAvailable()474 bool IsSymbolizerAvailable() {
475 return symbolizer.IsSymbolizerAvailable();
476 }
477
FlushSymbolizer()478 void FlushSymbolizer() {
479 symbolizer.Flush();
480 }
481
Demangle(const char * name)482 const char *Demangle(const char *name) {
483 return symbolizer.Demangle(name);
484 }
485
486 } // namespace __sanitizer
487