• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Demangling of C++ mangled names.                  demangle.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2017 Julian Seward
11       jseward@acm.org
12 
13    Rust demangler components are
14    Copyright (C) 2016-2016 David Tolnay
15       dtolnay@gmail.com
16 
17    This program is free software; you can redistribute it and/or
18    modify it under the terms of the GNU General Public License as
19    published by the Free Software Foundation; either version 2 of the
20    License, or (at your option) any later version.
21 
22    This program is distributed in the hope that it will be useful, but
23    WITHOUT ANY WARRANTY; without even the implied warranty of
24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25    General Public License for more details.
26 
27    You should have received a copy of the GNU General Public License
28    along with this program; if not, write to the Free Software
29    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30    02111-1307, USA.
31 
32    The GNU General Public License is contained in the file COPYING.
33 */
34 
35 #include "pub_core_basics.h"
36 #include "pub_core_demangle.h"
37 #include "pub_core_libcassert.h"
38 #include "pub_core_libcbase.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_mallocfree.h"
41 #include "pub_core_options.h"
42 
43 #include "vg_libciface.h"
44 #include "demangle.h"
45 
46 
47 /*------------------------------------------------------------*/
48 /*---                                                      ---*/
49 /*------------------------------------------------------------*/
50 
51 /* The demangler's job is to take a raw symbol name and turn it into
52    something a Human Bean can understand.  Our mangling model
53    comprises a three stage pipeline.  Mangling pushes names forward
54    through the pipeline (0, then 1, then 2) and demangling is
55    obviously the reverse.  In practice it is highly unlikely that a
56    name would require all stages, but it is not impossible either.
57 
58    0. If we're working with Rust, Rust names are lightly mangled by
59       the Rust front end.
60 
61    1. Then the name is subject to standard C++ mangling.
62 
63    2. Optionally, in relatively rare cases, the resulting name is then
64       itself encoded using Z-escaping (see pub_core_redir.h) so as to
65       become part of a redirect-specification.
66 
67    Therefore, VG_(demangle) first tries to undo (2).  If successful,
68    the soname part is discarded (humans don't want to see that).
69    Then, it tries to undo (1) (using demangling code from GNU/FSF) and
70    finally it tries to undo (0).
71 
72    Finally, it changes the name of all symbols which are known to be
73    functions below main() to "(below main)".  This helps reduce
74    variability of stack traces, something which has been a problem for
75    the testsuite for a long time.
76 
77    --------
78    If do_cxx_demangle == True, it does all the above stages:
79    - undo (2) [Z-encoding]
80    - undo (1) [C++ mangling]
81    - if (1) succeeds, undo (0) [Rust mangling]
82    - do the below-main hack
83 
84    Rust demangling (0) is only done if C++ demangling (1) succeeds
85    because Rust demangling is performed in-place, and it is difficult
86    to prove that we "own" the storage -- hence, that the in-place
87    operation is safe -- unless it is clear that it has come from the
88    C++ demangler, which returns its output in a heap-allocated buffer
89    which we can be sure we own.  In practice (Nov 2016) this does not
90    seem to be a problem, since the Rust compiler appears to apply C++
91    mangling after Rust mangling, so we never encounter symbols that
92    require Rust demangling but not C++ demangling.
93 
94    If do_cxx_demangle == False, the C++ and Rust stags are skipped:
95    - undo (2) [Z-encoding]
96    - do the below-main hack
97 */
98 
99 /* Note that the C++ demangler is from GNU libiberty and is almost
100    completely unmodified.  We use vg_libciface.h as a way to
101    impedance-match the libiberty code into our own framework.
102 
103    The libiberty code included here was taken from the GCC repository
104    and is released under the LGPL 2.1 license, which AFAICT is compatible
105    with "GPL 2 or later" and so is OK for inclusion in Valgrind.
106 
107    To update to a newer libiberty, use the "update-demangler" script
108    which is included in the valgrind repository. */
109 
110 /* This is the main, standard demangler entry point. */
111 
112 /* Upon return, *RESULT will point to the demangled name.
113    The memory buffer that holds the demangled name is allocated on the
114    heap and will be deallocated in the next invocation. Conceptually,
115    that buffer is owned by VG_(demangle). That means two things:
116    (1) Users of VG_(demangle) must not free that buffer.
117    (2) If the demangled name needs to be stashed away for later use,
118        the contents of the buffer need to be copied. It is not sufficient
119        to just store the pointer as it will point to deallocated memory
120        after the next VG_(demangle) invocation. */
VG_(demangle)121 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
122                      /* IN */  const HChar  *orig,
123                      /* OUT */ const HChar **result )
124 {
125    /* Possibly undo (2) */
126    /* Z-Demangling was requested.
127       The fastest way to see if it's a Z-mangled name is just to attempt
128       to Z-demangle it (with NULL for the soname buffer, since we're not
129       interested in that). */
130    if (do_z_demangling) {
131       const HChar *z_demangled;
132 
133       if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/
134                                  &z_demangled, NULL, NULL, NULL )) {
135          orig = z_demangled;
136       }
137    }
138 
139    /* Possibly undo (1) */
140    if (do_cxx_demangling && VG_(clo_demangle)
141        && orig != NULL && orig[0] == '_' && orig[1] == 'Z') {
142       /* !!! vvv STATIC vvv !!! */
143       static HChar* demangled = NULL;
144       /* !!! ^^^ STATIC ^^^ !!! */
145 
146       /* Free up previously demangled name */
147       if (demangled) {
148          VG_(arena_free) (VG_AR_DEMANGLE, demangled);
149          demangled = NULL;
150       }
151       demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
152 
153       *result = (demangled == NULL) ? orig : demangled;
154 
155       if (demangled) {
156          /* Possibly undo (0).  This is the only place where it is
157             safe, from a storage management perspective, to
158             Rust-demangle the symbol.  That's because Rust demangling
159             happens in place, so we need to be sure that the storage
160             it is happening in is actually owned by us, and non-const.
161             In this case, the value returned by ML_(cplus_demangle)
162             does have that property. */
163          if (rust_is_mangled(demangled)) {
164             rust_demangle_sym(demangled);
165          }
166          *result = demangled;
167       } else {
168          *result = orig;
169       }
170 
171    } else {
172       *result = orig;
173    }
174 
175    // 13 Mar 2005: We used to check here that the demangler wasn't leaking
176    // by calling the (now-removed) function VG_(is_empty_arena)().  But,
177    // very rarely (ie. I've heard of it twice in 3 years), the demangler
178    // does leak.  But, we can't do much about it, and it's not a disaster,
179    // so we just let it slide without aborting or telling the user.
180 }
181 
182 
183 /*------------------------------------------------------------*/
184 /*--- DEMANGLE Z-ENCODED NAMES                             ---*/
185 /*------------------------------------------------------------*/
186 
187 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
188    Z-encoded names are used by Valgrind for doing function
189    interception/wrapping.
190 
191    Demangle 'sym' into its soname and fnname parts, putting them in
192    the specified buffers.  Returns a Bool indicating whether the
193    demangled failed or not.  A failure can occur because the prefix
194    isn't recognised, the internal Z-escaping is wrong, or because one
195    or the other (or both) of the output buffers becomes full.  Passing
196    'so' as NULL is acceptable if the caller is only interested in the
197    function name part. */
198 
VG_(maybe_Z_demangle)199 Bool VG_(maybe_Z_demangle) ( const HChar* sym,
200                              /*OUT*/const HChar** so,
201                              /*OUT*/const HChar** fn,
202                              /*OUT*/Bool* isWrap,
203                              /*OUT*/Int*  eclassTag,
204                              /*OUT*/Int*  eclassPrio )
205 {
206    static HChar *sobuf;
207    static HChar *fnbuf;
208    static SizeT  buf_len = 0;
209 
210    /* The length of the name after undoing Z-encoding is always smaller
211       than the mangled name. Making the soname and fnname buffers as large
212       as the demangled name is therefore always safe and overflow can never
213       occur. */
214    SizeT len = VG_(strlen)(sym) + 1;
215 
216    if (buf_len < len) {
217       sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len);
218       fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len);
219       buf_len = len;
220    }
221    sobuf[0] = fnbuf[0] = '\0';
222 
223    if (so)
224      *so = sobuf;
225    *fn = fnbuf;
226 
227 #  define EMITSO(ch)                           \
228       do {                                     \
229          if (so) {                             \
230             sobuf[soi++] = ch; sobuf[soi] = 0; \
231          }                                     \
232       } while (0)
233 #  define EMITFN(ch)                           \
234       do {                                     \
235          fnbuf[fni++] = ch; fnbuf[fni] = 0;    \
236       } while (0)
237 
238    Bool error, valid, fn_is_encoded, is_VG_Z_prefixed;
239    Int  soi, fni, i;
240 
241    error = False;
242    soi = 0;
243    fni = 0;
244 
245    valid =     sym[0] == '_'
246            &&  sym[1] == 'v'
247            &&  sym[2] == 'g'
248            && (sym[3] == 'r' || sym[3] == 'w')
249            &&  VG_(isdigit)(sym[4])
250            &&  VG_(isdigit)(sym[5])
251            &&  VG_(isdigit)(sym[6])
252            &&  VG_(isdigit)(sym[7])
253            &&  VG_(isdigit)(sym[8])
254            &&  sym[9] == 'Z'
255            && (sym[10] == 'Z' || sym[10] == 'U')
256            &&  sym[11] == '_';
257 
258    if (valid
259        && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
260        && sym[8] != '0') {
261       /* If the eclass tag is 0000 (meaning "no eclass"), the priority
262          must be 0 too. */
263       valid = False;
264    }
265 
266    if (!valid)
267       return False;
268 
269    fn_is_encoded = sym[10] == 'Z';
270 
271    if (isWrap)
272       *isWrap = sym[3] == 'w';
273 
274    if (eclassTag) {
275       *eclassTag =    1000 * ((Int)sym[4] - '0')
276                    +  100 * ((Int)sym[5] - '0')
277                    +  10 * ((Int)sym[6] - '0')
278                    +  1 * ((Int)sym[7] - '0');
279       vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
280    }
281 
282    if (eclassPrio) {
283       *eclassPrio = ((Int)sym[8]) - '0';
284       vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
285    }
286 
287    /* Now check the soname prefix isn't "VG_Z_", as described in
288       pub_tool_redir.h. */
289    is_VG_Z_prefixed =
290       sym[12] == 'V' &&
291       sym[13] == 'G' &&
292       sym[14] == '_' &&
293       sym[15] == 'Z' &&
294       sym[16] == '_';
295    if (is_VG_Z_prefixed) {
296       vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
297                     "see pub_tool_redir.h for an explanation.", sym);
298    }
299 
300    /* Now scan the Z-encoded soname. */
301    i = 12;
302    while (True) {
303 
304       if (sym[i] == '_')
305       /* Found the delimiter.  Move on to the fnname loop. */
306          break;
307 
308       if (sym[i] == 0) {
309          error = True;
310          goto out;
311       }
312 
313       if (sym[i] != 'Z') {
314          EMITSO(sym[i]);
315          i++;
316          continue;
317       }
318 
319       /* We've got a Z-escape. */
320       i++;
321       switch (sym[i]) {
322          case 'a': EMITSO('*'); break;
323          case 'c': EMITSO(':'); break;
324          case 'd': EMITSO('.'); break;
325          case 'h': EMITSO('-'); break;
326          case 'p': EMITSO('+'); break;
327          case 's': EMITSO(' '); break;
328          case 'u': EMITSO('_'); break;
329          case 'A': EMITSO('@'); break;
330          case 'D': EMITSO('$'); break;
331          case 'L': EMITSO('('); break;
332          case 'P': EMITSO('%'); break;
333          case 'R': EMITSO(')'); break;
334          case 'S': EMITSO('/'); break;
335          case 'Z': EMITSO('Z'); break;
336          default: error = True; goto out;
337       }
338       i++;
339    }
340 
341    vg_assert(sym[i] == '_');
342    i++;
343 
344    /* Now deal with the function name part. */
345    if (!fn_is_encoded) {
346 
347       /* simple; just copy. */
348       while (True) {
349          if (sym[i] == 0)
350             break;
351          EMITFN(sym[i]);
352          i++;
353       }
354       goto out;
355 
356    }
357 
358    /* else use a Z-decoding loop like with soname */
359    while (True) {
360 
361       if (sym[i] == 0)
362          break;
363 
364       if (sym[i] != 'Z') {
365          EMITFN(sym[i]);
366          i++;
367          continue;
368       }
369 
370       /* We've got a Z-escape. */
371       i++;
372       switch (sym[i]) {
373          case 'a': EMITFN('*'); break;
374          case 'c': EMITFN(':'); break;
375          case 'd': EMITFN('.'); break;
376          case 'h': EMITFN('-'); break;
377          case 'p': EMITFN('+'); break;
378          case 's': EMITFN(' '); break;
379          case 'u': EMITFN('_'); break;
380          case 'A': EMITFN('@'); break;
381          case 'D': EMITFN('$'); break;
382          case 'L': EMITFN('('); break;
383          case 'P': EMITFN('%'); break;
384          case 'R': EMITFN(')'); break;
385          case 'S': EMITFN('/'); break;
386          case 'Z': EMITFN('Z'); break;
387          default: error = True; goto out;
388       }
389       i++;
390    }
391 
392   out:
393    EMITSO(0);
394    EMITFN(0);
395 
396    if (error) {
397       /* Something's wrong.  Give up. */
398       VG_(message)(Vg_UserMsg,
399                    "m_demangle: error Z-demangling: %s\n", sym);
400       return False;
401    }
402 
403    return True;
404 }
405 
406 
407 /*--------------------------------------------------------------------*/
408 /*--- end                                                          ---*/
409 /*--------------------------------------------------------------------*/
410