• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is shared between AddressSanitizer and ThreadSanitizer
11 // run-time libraries. See sanitizer_symbolizer.h for details.
12 //===----------------------------------------------------------------------===//
13 
14 #include "sanitizer_allocator_internal.h"
15 #include "sanitizer_common.h"
16 #include "sanitizer_placement_new.h"
17 #include "sanitizer_procmaps.h"
18 #include "sanitizer_symbolizer.h"
19 
20 namespace __sanitizer {
21 
Clear()22 void AddressInfo::Clear() {
23   InternalFree(module);
24   InternalFree(function);
25   InternalFree(file);
26   internal_memset(this, 0, sizeof(AddressInfo));
27 }
28 
LoadedModule(const char * module_name,uptr base_address)29 LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
30   full_name_ = internal_strdup(module_name);
31   base_address_ = base_address;
32   n_ranges_ = 0;
33 }
34 
addAddressRange(uptr beg,uptr end)35 void LoadedModule::addAddressRange(uptr beg, uptr end) {
36   CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
37   ranges_[n_ranges_].beg = beg;
38   ranges_[n_ranges_].end = end;
39   n_ranges_++;
40 }
41 
containsAddress(uptr address) const42 bool LoadedModule::containsAddress(uptr address) const {
43   for (uptr i = 0; i < n_ranges_; i++) {
44     if (ranges_[i].beg <= address && address < ranges_[i].end)
45       return true;
46   }
47   return false;
48 }
49 
50 // Extracts the prefix of "str" that consists of any characters not
51 // present in "delims" string, and copies this prefix to "result", allocating
52 // space for it.
53 // Returns a pointer to "str" after skipping extracted prefix and first
54 // delimiter char.
ExtractToken(const char * str,const char * delims,char ** result)55 static const char *ExtractToken(const char *str, const char *delims,
56                                 char **result) {
57   uptr prefix_len = internal_strcspn(str, delims);
58   *result = (char*)InternalAlloc(prefix_len + 1);
59   internal_memcpy(*result, str, prefix_len);
60   (*result)[prefix_len] = '\0';
61   const char *prefix_end = str + prefix_len;
62   if (*prefix_end != '\0') prefix_end++;
63   return prefix_end;
64 }
65 
66 // Same as ExtractToken, but converts extracted token to integer.
ExtractInt(const char * str,const char * delims,int * result)67 static const char *ExtractInt(const char *str, const char *delims,
68                               int *result) {
69   char *buff;
70   const char *ret = ExtractToken(str, delims, &buff);
71   if (buff != 0) {
72     *result = (int)internal_atoll(buff);
73   }
74   InternalFree(buff);
75   return ret;
76 }
77 
ExtractUptr(const char * str,const char * delims,uptr * result)78 static const char *ExtractUptr(const char *str, const char *delims,
79                                uptr *result) {
80   char *buff;
81   const char *ret = ExtractToken(str, delims, &buff);
82   if (buff != 0) {
83     *result = (uptr)internal_atoll(buff);
84   }
85   InternalFree(buff);
86   return ret;
87 }
88 
89 // ExternalSymbolizer encapsulates communication between the tool and
90 // external symbolizer program, running in a different subprocess,
91 // For now we assume the following protocol:
92 // For each request of the form
93 //   <module_name> <module_offset>
94 // passed to STDIN, external symbolizer prints to STDOUT response:
95 //   <function_name>
96 //   <file_name>:<line_number>:<column_number>
97 //   <function_name>
98 //   <file_name>:<line_number>:<column_number>
99 //   ...
100 //   <empty line>
101 class ExternalSymbolizer {
102  public:
ExternalSymbolizer(const char * path,int input_fd,int output_fd)103   ExternalSymbolizer(const char *path, int input_fd, int output_fd)
104       : path_(path),
105         input_fd_(input_fd),
106         output_fd_(output_fd),
107         times_restarted_(0) {
108     CHECK(path_);
109     CHECK_NE(input_fd_, kInvalidFd);
110     CHECK_NE(output_fd_, kInvalidFd);
111   }
112 
SendCommand(bool is_data,const char * module_name,uptr module_offset)113   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
114     CHECK(module_name);
115     internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
116                       is_data ? "DATA " : "", module_name, module_offset);
117     if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
118       return 0;
119     if (!readFromSymbolizer(buffer_, kBufferSize))
120       return 0;
121     return buffer_;
122   }
123 
Restart()124   bool Restart() {
125     if (times_restarted_ >= kMaxTimesRestarted) return false;
126     times_restarted_++;
127     internal_close(input_fd_);
128     internal_close(output_fd_);
129     return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
130   }
131 
Flush()132   void Flush() {
133   }
134 
135  private:
readFromSymbolizer(char * buffer,uptr max_length)136   bool readFromSymbolizer(char *buffer, uptr max_length) {
137     if (max_length == 0)
138       return true;
139     uptr read_len = 0;
140     while (true) {
141       uptr just_read = internal_read(input_fd_, buffer + read_len,
142                                      max_length - read_len);
143       // We can't read 0 bytes, as we don't expect external symbolizer to close
144       // its stdout.
145       if (just_read == 0 || just_read == (uptr)-1) {
146         Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
147         return false;
148       }
149       read_len += just_read;
150       // Empty line marks the end of symbolizer output.
151       if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
152                            buffer[read_len - 2] == '\n') {
153         break;
154       }
155     }
156     return true;
157   }
158 
writeToSymbolizer(const char * buffer,uptr length)159   bool writeToSymbolizer(const char *buffer, uptr length) {
160     if (length == 0)
161       return true;
162     uptr write_len = internal_write(output_fd_, buffer, length);
163     if (write_len == 0 || write_len == (uptr)-1) {
164       Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
165       return false;
166     }
167     return true;
168   }
169 
170   const char *path_;
171   int input_fd_;
172   int output_fd_;
173 
174   static const uptr kBufferSize = 16 * 1024;
175   char buffer_[kBufferSize];
176 
177   static const uptr kMaxTimesRestarted = 5;
178   uptr times_restarted_;
179 };
180 
181 static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
182 
183 #if SANITIZER_SUPPORTS_WEAK_HOOKS
184 extern "C" {
185 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
186 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
187                                 char *Buffer, int MaxLength);
188 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
189 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
190                                 char *Buffer, int MaxLength);
191 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
192 void __sanitizer_symbolize_flush();
193 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
194 int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
195                                    int MaxLength);
196 }  // extern "C"
197 
198 class InternalSymbolizer {
199  public:
200   typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
201 
get()202   static InternalSymbolizer *get() {
203     if (__sanitizer_symbolize_code != 0 &&
204         __sanitizer_symbolize_data != 0) {
205       void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
206       return new(mem) InternalSymbolizer();
207     }
208     return 0;
209   }
210 
SendCommand(bool is_data,const char * module_name,uptr module_offset)211   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
212     SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
213                                                 : __sanitizer_symbolize_code;
214     if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
215       return buffer_;
216     return 0;
217   }
218 
Flush()219   void Flush() {
220     if (__sanitizer_symbolize_flush)
221       __sanitizer_symbolize_flush();
222   }
223 
Demangle(const char * name)224   const char *Demangle(const char *name) {
225     if (__sanitizer_symbolize_demangle) {
226       for (uptr res_length = 1024;
227            res_length <= InternalSizeClassMap::kMaxSize;) {
228         char *res_buff = static_cast<char*>(InternalAlloc(res_length));
229         uptr req_length =
230             __sanitizer_symbolize_demangle(name, res_buff, res_length);
231         if (req_length > res_length) {
232           res_length = req_length + 1;
233           InternalFree(res_buff);
234           continue;
235         }
236         return res_buff;
237       }
238     }
239     return name;
240   }
241 
242  private:
InternalSymbolizer()243   InternalSymbolizer() { }
244 
245   static const int kBufferSize = 16 * 1024;
246   static const int kMaxDemangledNameSize = 1024;
247   char buffer_[kBufferSize];
248 };
249 #else  // SANITIZER_SUPPORTS_WEAK_HOOKS
250 
251 class InternalSymbolizer {
252  public:
get()253   static InternalSymbolizer *get() { return 0; }
SendCommand(bool is_data,const char * module_name,uptr module_offset)254   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
255     return 0;
256   }
Flush()257   void Flush() { }
Demangle(const char * name)258   const char *Demangle(const char *name) { return name; }
259 };
260 
261 #endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
262 
263 class Symbolizer {
264   // This class has no constructor, as global constructors are forbidden in
265   // sanitizer_common. It should be linker initialized instead.
266  public:
SymbolizeCode(uptr addr,AddressInfo * frames,uptr max_frames)267   uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
268     if (max_frames == 0)
269       return 0;
270     LoadedModule *module = FindModuleForAddress(addr);
271     if (module == 0)
272       return 0;
273     const char *module_name = module->full_name();
274     uptr module_offset = addr - module->base_address();
275     const char *str = SendCommand(false, module_name, module_offset);
276     if (str == 0) {
277       // External symbolizer was not initialized or failed. Fill only data
278       // about module name and offset.
279       AddressInfo *info = &frames[0];
280       info->Clear();
281       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
282       return 1;
283     }
284     uptr frame_id = 0;
285     for (frame_id = 0; frame_id < max_frames; frame_id++) {
286       AddressInfo *info = &frames[frame_id];
287       char *function_name = 0;
288       str = ExtractToken(str, "\n", &function_name);
289       CHECK(function_name);
290       if (function_name[0] == '\0') {
291         // There are no more frames.
292         break;
293       }
294       info->Clear();
295       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
296       info->function = function_name;
297       // Parse <file>:<line>:<column> buffer.
298       char *file_line_info = 0;
299       str = ExtractToken(str, "\n", &file_line_info);
300       CHECK(file_line_info);
301       const char *line_info = ExtractToken(file_line_info, ":", &info->file);
302       line_info = ExtractInt(line_info, ":", &info->line);
303       line_info = ExtractInt(line_info, "", &info->column);
304       InternalFree(file_line_info);
305 
306       // Functions and filenames can be "??", in which case we write 0
307       // to address info to mark that names are unknown.
308       if (0 == internal_strcmp(info->function, "??")) {
309         InternalFree(info->function);
310         info->function = 0;
311       }
312       if (0 == internal_strcmp(info->file, "??")) {
313         InternalFree(info->file);
314         info->file = 0;
315       }
316     }
317     if (frame_id == 0) {
318       // Make sure we return at least one frame.
319       AddressInfo *info = &frames[0];
320       info->Clear();
321       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
322       frame_id = 1;
323     }
324     return frame_id;
325   }
326 
SymbolizeData(uptr addr,DataInfo * info)327   bool SymbolizeData(uptr addr, DataInfo *info) {
328     LoadedModule *module = FindModuleForAddress(addr);
329     if (module == 0)
330       return false;
331     const char *module_name = module->full_name();
332     uptr module_offset = addr - module->base_address();
333     internal_memset(info, 0, sizeof(*info));
334     info->address = addr;
335     info->module = internal_strdup(module_name);
336     info->module_offset = module_offset;
337     const char *str = SendCommand(true, module_name, module_offset);
338     if (str == 0)
339       return true;
340     str = ExtractToken(str, "\n", &info->name);
341     str = ExtractUptr(str, " ", &info->start);
342     str = ExtractUptr(str, "\n", &info->size);
343     info->start += module->base_address();
344     return true;
345   }
346 
InitializeExternalSymbolizer(const char * path_to_symbolizer)347   bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
348     int input_fd, output_fd;
349     if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
350       return false;
351     void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
352     external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
353                                                        input_fd, output_fd);
354     return true;
355   }
356 
IsSymbolizerAvailable()357   bool IsSymbolizerAvailable() {
358     if (internal_symbolizer_ == 0)
359       internal_symbolizer_ = InternalSymbolizer::get();
360     return internal_symbolizer_ || external_symbolizer_;
361   }
362 
Flush()363   void Flush() {
364     if (internal_symbolizer_)
365       internal_symbolizer_->Flush();
366     if (external_symbolizer_)
367       external_symbolizer_->Flush();
368   }
369 
Demangle(const char * name)370   const char *Demangle(const char *name) {
371     if (IsSymbolizerAvailable() && internal_symbolizer_ != 0)
372       return internal_symbolizer_->Demangle(name);
373     return DemangleCXXABI(name);
374   }
375 
376  private:
SendCommand(bool is_data,const char * module_name,uptr module_offset)377   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
378     // First, try to use internal symbolizer.
379     if (!IsSymbolizerAvailable()) {
380       return 0;
381     }
382     if (internal_symbolizer_) {
383       return internal_symbolizer_->SendCommand(is_data, module_name,
384                                                module_offset);
385     }
386     // Otherwise, fall back to external symbolizer.
387     if (external_symbolizer_ == 0) {
388       ReportExternalSymbolizerError(
389           "WARNING: Trying to symbolize code, but external "
390           "symbolizer is not initialized!\n");
391       return 0;
392     }
393     for (;;) {
394       char *reply = external_symbolizer_->SendCommand(is_data, module_name,
395           module_offset);
396       if (reply)
397         return reply;
398       // Try to restart symbolizer subprocess. If we don't succeed, forget
399       // about it and don't try to use it later.
400       if (!external_symbolizer_->Restart()) {
401         ReportExternalSymbolizerError(
402             "WARNING: Failed to use and restart external symbolizer!\n");
403         external_symbolizer_ = 0;
404         return 0;
405       }
406     }
407   }
408 
FindModuleForAddress(uptr address)409   LoadedModule *FindModuleForAddress(uptr address) {
410     bool modules_were_reloaded = false;
411     if (modules_ == 0 || !modules_fresh_) {
412       modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
413           kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
414       CHECK(modules_);
415       n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
416                                     /* filter */ 0);
417       // FIXME: Return this check when GetListOfModules is implemented on Mac.
418       // CHECK_GT(n_modules_, 0);
419       CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
420       modules_fresh_ = true;
421       modules_were_reloaded = true;
422     }
423     for (uptr i = 0; i < n_modules_; i++) {
424       if (modules_[i].containsAddress(address)) {
425         return &modules_[i];
426       }
427     }
428     // Reload the modules and look up again, if we haven't tried it yet.
429     if (!modules_were_reloaded) {
430       // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
431       // It's too aggressive to reload the list of modules each time we fail
432       // to find a module for a given address.
433       modules_fresh_ = false;
434       return FindModuleForAddress(address);
435     }
436     return 0;
437   }
438 
ReportExternalSymbolizerError(const char * msg)439   void ReportExternalSymbolizerError(const char *msg) {
440     // Don't use atomics here for now, as SymbolizeCode can't be called
441     // from multiple threads anyway.
442     static bool reported;
443     if (!reported) {
444       Report(msg);
445       reported = true;
446     }
447   }
448 
449   // 16K loaded modules should be enough for everyone.
450   static const uptr kMaxNumberOfModuleContexts = 1 << 14;
451   LoadedModule *modules_;  // Array of module descriptions is leaked.
452   uptr n_modules_;
453   // If stale, need to reload the modules before looking up addresses.
454   bool modules_fresh_;
455 
456   ExternalSymbolizer *external_symbolizer_;  // Leaked.
457   InternalSymbolizer *internal_symbolizer_;  // Leaked.
458 };
459 
460 static Symbolizer symbolizer;  // Linker initialized.
461 
SymbolizeCode(uptr address,AddressInfo * frames,uptr max_frames)462 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
463   return symbolizer.SymbolizeCode(address, frames, max_frames);
464 }
465 
SymbolizeData(uptr address,DataInfo * info)466 bool SymbolizeData(uptr address, DataInfo *info) {
467   return symbolizer.SymbolizeData(address, info);
468 }
469 
InitializeExternalSymbolizer(const char * path_to_symbolizer)470 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
471   return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
472 }
473 
IsSymbolizerAvailable()474 bool IsSymbolizerAvailable() {
475   return symbolizer.IsSymbolizerAvailable();
476 }
477 
FlushSymbolizer()478 void FlushSymbolizer() {
479   symbolizer.Flush();
480 }
481 
Demangle(const char * name)482 const char *Demangle(const char *name) {
483   return symbolizer.Demangle(name);
484 }
485 
486 }  // namespace __sanitizer
487