1
2 /*--------------------------------------------------------------------*/
3 /*--- Demangling of C++ mangled names. demangle.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2017 Julian Seward
11 jseward@acm.org
12
13 Rust demangler components are
14 Copyright (C) 2016-2016 David Tolnay
15 dtolnay@gmail.com
16
17 This program is free software; you can redistribute it and/or
18 modify it under the terms of the GNU General Public License as
19 published by the Free Software Foundation; either version 2 of the
20 License, or (at your option) any later version.
21
22 This program is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 General Public License for more details.
26
27 You should have received a copy of the GNU General Public License
28 along with this program; if not, write to the Free Software
29 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 02111-1307, USA.
31
32 The GNU General Public License is contained in the file COPYING.
33 */
34
35 #include "pub_core_basics.h"
36 #include "pub_core_demangle.h"
37 #include "pub_core_libcassert.h"
38 #include "pub_core_libcbase.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_mallocfree.h"
41 #include "pub_core_options.h"
42
43 #include "vg_libciface.h"
44 #include "demangle.h"
45
46
47 /*------------------------------------------------------------*/
48 /*--- ---*/
49 /*------------------------------------------------------------*/
50
51 /* The demangler's job is to take a raw symbol name and turn it into
52 something a Human Bean can understand. Our mangling model
53 comprises a three stage pipeline. Mangling pushes names forward
54 through the pipeline (0, then 1, then 2) and demangling is
55 obviously the reverse. In practice it is highly unlikely that a
56 name would require all stages, but it is not impossible either.
57
58 0. If we're working with Rust, Rust names are lightly mangled by
59 the Rust front end.
60
61 1. Then the name is subject to standard C++ mangling.
62
63 2. Optionally, in relatively rare cases, the resulting name is then
64 itself encoded using Z-escaping (see pub_core_redir.h) so as to
65 become part of a redirect-specification.
66
67 Therefore, VG_(demangle) first tries to undo (2). If successful,
68 the soname part is discarded (humans don't want to see that).
69 Then, it tries to undo (1) (using demangling code from GNU/FSF) and
70 finally it tries to undo (0).
71
72 Finally, it changes the name of all symbols which are known to be
73 functions below main() to "(below main)". This helps reduce
74 variability of stack traces, something which has been a problem for
75 the testsuite for a long time.
76
77 --------
78 If do_cxx_demangle == True, it does all the above stages:
79 - undo (2) [Z-encoding]
80 - undo (1) [C++ mangling]
81 - if (1) succeeds, undo (0) [Rust mangling]
82 - do the below-main hack
83
84 Rust demangling (0) is only done if C++ demangling (1) succeeds
85 because Rust demangling is performed in-place, and it is difficult
86 to prove that we "own" the storage -- hence, that the in-place
87 operation is safe -- unless it is clear that it has come from the
88 C++ demangler, which returns its output in a heap-allocated buffer
89 which we can be sure we own. In practice (Nov 2016) this does not
90 seem to be a problem, since the Rust compiler appears to apply C++
91 mangling after Rust mangling, so we never encounter symbols that
92 require Rust demangling but not C++ demangling.
93
94 If do_cxx_demangle == False, the C++ and Rust stags are skipped:
95 - undo (2) [Z-encoding]
96 - do the below-main hack
97 */
98
99 /* Note that the C++ demangler is from GNU libiberty and is almost
100 completely unmodified. We use vg_libciface.h as a way to
101 impedance-match the libiberty code into our own framework.
102
103 The libiberty code included here was taken from the GCC repository
104 and is released under the LGPL 2.1 license, which AFAICT is compatible
105 with "GPL 2 or later" and so is OK for inclusion in Valgrind.
106
107 To update to a newer libiberty, use the "update-demangler" script
108 which is included in the valgrind repository. */
109
110 /* This is the main, standard demangler entry point. */
111
112 /* Upon return, *RESULT will point to the demangled name.
113 The memory buffer that holds the demangled name is allocated on the
114 heap and will be deallocated in the next invocation. Conceptually,
115 that buffer is owned by VG_(demangle). That means two things:
116 (1) Users of VG_(demangle) must not free that buffer.
117 (2) If the demangled name needs to be stashed away for later use,
118 the contents of the buffer need to be copied. It is not sufficient
119 to just store the pointer as it will point to deallocated memory
120 after the next VG_(demangle) invocation. */
VG_(demangle)121 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
122 /* IN */ const HChar *orig,
123 /* OUT */ const HChar **result )
124 {
125 /* Possibly undo (2) */
126 /* Z-Demangling was requested.
127 The fastest way to see if it's a Z-mangled name is just to attempt
128 to Z-demangle it (with NULL for the soname buffer, since we're not
129 interested in that). */
130 if (do_z_demangling) {
131 const HChar *z_demangled;
132
133 if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/
134 &z_demangled, NULL, NULL, NULL )) {
135 orig = z_demangled;
136 }
137 }
138
139 /* Possibly undo (1) */
140 if (do_cxx_demangling && VG_(clo_demangle)
141 && orig != NULL && orig[0] == '_' && orig[1] == 'Z') {
142 /* !!! vvv STATIC vvv !!! */
143 static HChar* demangled = NULL;
144 /* !!! ^^^ STATIC ^^^ !!! */
145
146 /* Free up previously demangled name */
147 if (demangled) {
148 VG_(arena_free) (VG_AR_DEMANGLE, demangled);
149 demangled = NULL;
150 }
151 demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
152
153 *result = (demangled == NULL) ? orig : demangled;
154
155 if (demangled) {
156 /* Possibly undo (0). This is the only place where it is
157 safe, from a storage management perspective, to
158 Rust-demangle the symbol. That's because Rust demangling
159 happens in place, so we need to be sure that the storage
160 it is happening in is actually owned by us, and non-const.
161 In this case, the value returned by ML_(cplus_demangle)
162 does have that property. */
163 if (rust_is_mangled(demangled)) {
164 rust_demangle_sym(demangled);
165 }
166 *result = demangled;
167 } else {
168 *result = orig;
169 }
170
171 } else {
172 *result = orig;
173 }
174
175 // 13 Mar 2005: We used to check here that the demangler wasn't leaking
176 // by calling the (now-removed) function VG_(is_empty_arena)(). But,
177 // very rarely (ie. I've heard of it twice in 3 years), the demangler
178 // does leak. But, we can't do much about it, and it's not a disaster,
179 // so we just let it slide without aborting or telling the user.
180 }
181
182
183 /*------------------------------------------------------------*/
184 /*--- DEMANGLE Z-ENCODED NAMES ---*/
185 /*------------------------------------------------------------*/
186
187 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
188 Z-encoded names are used by Valgrind for doing function
189 interception/wrapping.
190
191 Demangle 'sym' into its soname and fnname parts, putting them in
192 the specified buffers. Returns a Bool indicating whether the
193 demangled failed or not. A failure can occur because the prefix
194 isn't recognised, the internal Z-escaping is wrong, or because one
195 or the other (or both) of the output buffers becomes full. Passing
196 'so' as NULL is acceptable if the caller is only interested in the
197 function name part. */
198
VG_(maybe_Z_demangle)199 Bool VG_(maybe_Z_demangle) ( const HChar* sym,
200 /*OUT*/const HChar** so,
201 /*OUT*/const HChar** fn,
202 /*OUT*/Bool* isWrap,
203 /*OUT*/Int* eclassTag,
204 /*OUT*/Int* eclassPrio )
205 {
206 static HChar *sobuf;
207 static HChar *fnbuf;
208 static SizeT buf_len = 0;
209
210 /* The length of the name after undoing Z-encoding is always smaller
211 than the mangled name. Making the soname and fnname buffers as large
212 as the demangled name is therefore always safe and overflow can never
213 occur. */
214 SizeT len = VG_(strlen)(sym) + 1;
215
216 if (buf_len < len) {
217 sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len);
218 fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len);
219 buf_len = len;
220 }
221 sobuf[0] = fnbuf[0] = '\0';
222
223 if (so)
224 *so = sobuf;
225 *fn = fnbuf;
226
227 # define EMITSO(ch) \
228 do { \
229 if (so) { \
230 sobuf[soi++] = ch; sobuf[soi] = 0; \
231 } \
232 } while (0)
233 # define EMITFN(ch) \
234 do { \
235 fnbuf[fni++] = ch; fnbuf[fni] = 0; \
236 } while (0)
237
238 Bool error, valid, fn_is_encoded, is_VG_Z_prefixed;
239 Int soi, fni, i;
240
241 error = False;
242 soi = 0;
243 fni = 0;
244
245 valid = sym[0] == '_'
246 && sym[1] == 'v'
247 && sym[2] == 'g'
248 && (sym[3] == 'r' || sym[3] == 'w')
249 && VG_(isdigit)(sym[4])
250 && VG_(isdigit)(sym[5])
251 && VG_(isdigit)(sym[6])
252 && VG_(isdigit)(sym[7])
253 && VG_(isdigit)(sym[8])
254 && sym[9] == 'Z'
255 && (sym[10] == 'Z' || sym[10] == 'U')
256 && sym[11] == '_';
257
258 if (valid
259 && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
260 && sym[8] != '0') {
261 /* If the eclass tag is 0000 (meaning "no eclass"), the priority
262 must be 0 too. */
263 valid = False;
264 }
265
266 if (!valid)
267 return False;
268
269 fn_is_encoded = sym[10] == 'Z';
270
271 if (isWrap)
272 *isWrap = sym[3] == 'w';
273
274 if (eclassTag) {
275 *eclassTag = 1000 * ((Int)sym[4] - '0')
276 + 100 * ((Int)sym[5] - '0')
277 + 10 * ((Int)sym[6] - '0')
278 + 1 * ((Int)sym[7] - '0');
279 vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
280 }
281
282 if (eclassPrio) {
283 *eclassPrio = ((Int)sym[8]) - '0';
284 vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
285 }
286
287 /* Now check the soname prefix isn't "VG_Z_", as described in
288 pub_tool_redir.h. */
289 is_VG_Z_prefixed =
290 sym[12] == 'V' &&
291 sym[13] == 'G' &&
292 sym[14] == '_' &&
293 sym[15] == 'Z' &&
294 sym[16] == '_';
295 if (is_VG_Z_prefixed) {
296 vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
297 "see pub_tool_redir.h for an explanation.", sym);
298 }
299
300 /* Now scan the Z-encoded soname. */
301 i = 12;
302 while (True) {
303
304 if (sym[i] == '_')
305 /* Found the delimiter. Move on to the fnname loop. */
306 break;
307
308 if (sym[i] == 0) {
309 error = True;
310 goto out;
311 }
312
313 if (sym[i] != 'Z') {
314 EMITSO(sym[i]);
315 i++;
316 continue;
317 }
318
319 /* We've got a Z-escape. */
320 i++;
321 switch (sym[i]) {
322 case 'a': EMITSO('*'); break;
323 case 'c': EMITSO(':'); break;
324 case 'd': EMITSO('.'); break;
325 case 'h': EMITSO('-'); break;
326 case 'p': EMITSO('+'); break;
327 case 's': EMITSO(' '); break;
328 case 'u': EMITSO('_'); break;
329 case 'A': EMITSO('@'); break;
330 case 'D': EMITSO('$'); break;
331 case 'L': EMITSO('('); break;
332 case 'P': EMITSO('%'); break;
333 case 'R': EMITSO(')'); break;
334 case 'S': EMITSO('/'); break;
335 case 'Z': EMITSO('Z'); break;
336 default: error = True; goto out;
337 }
338 i++;
339 }
340
341 vg_assert(sym[i] == '_');
342 i++;
343
344 /* Now deal with the function name part. */
345 if (!fn_is_encoded) {
346
347 /* simple; just copy. */
348 while (True) {
349 if (sym[i] == 0)
350 break;
351 EMITFN(sym[i]);
352 i++;
353 }
354 goto out;
355
356 }
357
358 /* else use a Z-decoding loop like with soname */
359 while (True) {
360
361 if (sym[i] == 0)
362 break;
363
364 if (sym[i] != 'Z') {
365 EMITFN(sym[i]);
366 i++;
367 continue;
368 }
369
370 /* We've got a Z-escape. */
371 i++;
372 switch (sym[i]) {
373 case 'a': EMITFN('*'); break;
374 case 'c': EMITFN(':'); break;
375 case 'd': EMITFN('.'); break;
376 case 'h': EMITFN('-'); break;
377 case 'p': EMITFN('+'); break;
378 case 's': EMITFN(' '); break;
379 case 'u': EMITFN('_'); break;
380 case 'A': EMITFN('@'); break;
381 case 'D': EMITFN('$'); break;
382 case 'L': EMITFN('('); break;
383 case 'P': EMITFN('%'); break;
384 case 'R': EMITFN(')'); break;
385 case 'S': EMITFN('/'); break;
386 case 'Z': EMITFN('Z'); break;
387 default: error = True; goto out;
388 }
389 i++;
390 }
391
392 out:
393 EMITSO(0);
394 EMITFN(0);
395
396 if (error) {
397 /* Something's wrong. Give up. */
398 VG_(message)(Vg_UserMsg,
399 "m_demangle: error Z-demangling: %s\n", sym);
400 return False;
401 }
402
403 return True;
404 }
405
406
407 /*--------------------------------------------------------------------*/
408 /*--- end ---*/
409 /*--------------------------------------------------------------------*/
410