1 // Copyright (c) 2009, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // ---
31 // Author: Craig Silverstein
32 //
33 // This forks out to pprof to do the actual symbolizing. We might
34 // be better off writing our own in C++.
35
36 #include "config.h"
37 #include "symbolize.h"
38 #include <stdlib.h>
39 #ifdef HAVE_UNISTD_H
40 #include <unistd.h> // for write()
41 #endif
42 #ifdef HAVE_SYS_SOCKET_H
43 #include <sys/socket.h> // for socketpair() -- needed by Symbolize
44 #endif
45 #ifdef HAVE_SYS_WAIT_H
46 #include <sys/wait.h> // for wait() -- needed by Symbolize
47 #endif
48 #ifdef HAVE_POLL_H
49 #include <poll.h>
50 #endif
51 #ifdef __MACH__
52 #include <mach-o/dyld.h> // for GetProgramInvocationName()
53 #include <limits.h> // for PATH_MAX
54 #endif
55 #if defined(__CYGWIN__) || defined(__CYGWIN32__)
56 #include <io.h> // for get_osfhandle()
57 #endif
58 #include <string>
59 #include "base/commandlineflags.h"
60 #include "base/logging.h"
61 #include "base/sysinfo.h"
62
63 using std::string;
64 using tcmalloc::DumpProcSelfMaps; // from sysinfo.h
65
66
67 DEFINE_string(symbolize_pprof,
68 EnvToString("PPROF_PATH", "pprof"),
69 "Path to pprof to call for reporting function names.");
70
71 // Returns NULL if we're on an OS where we can't get the invocation name.
72 // Using a static var is ok because we're not called from a thread.
GetProgramInvocationName()73 static char* GetProgramInvocationName() {
74 #if defined(HAVE_PROGRAM_INVOCATION_NAME)
75 extern char* program_invocation_name; // gcc provides this
76 return program_invocation_name;
77 #elif defined(__MACH__)
78 // We don't want to allocate memory for this since we may be
79 // calculating it when memory is corrupted.
80 static char program_invocation_name[PATH_MAX];
81 if (program_invocation_name[0] == '\0') { // first time calculating
82 uint32_t length = sizeof(program_invocation_name);
83 if (_NSGetExecutablePath(program_invocation_name, &length))
84 return NULL;
85 }
86 return program_invocation_name;
87 #else
88 return NULL; // figure out a way to get argv[0]
89 #endif
90 }
91
92 // Prints an error message when you can't run Symbolize().
PrintError(const char * reason)93 static void PrintError(const char* reason) {
94 RAW_LOG(ERROR,
95 "*** WARNING: Cannot convert addresses to symbols in output below.\n"
96 "*** Reason: %s\n"
97 "*** If you cannot fix this, try running pprof directly.\n",
98 reason);
99 }
100
Add(const void * addr)101 void SymbolTable::Add(const void* addr) {
102 symbolization_table_[addr] = "";
103 }
104
GetSymbol(const void * addr)105 const char* SymbolTable::GetSymbol(const void* addr) {
106 return symbolization_table_[addr];
107 }
108
109 // Updates symbolization_table with the pointers to symbol names corresponding
110 // to its keys. The symbol names are stored in out, which is allocated and
111 // freed by the caller of this routine.
112 // Note that the forking/etc is not thread-safe or re-entrant. That's
113 // ok for the purpose we need -- reporting leaks detected by heap-checker
114 // -- but be careful if you decide to use this routine for other purposes.
115 // Returns number of symbols read on error. If can't symbolize, returns 0
116 // and emits an error message about why.
Symbolize()117 int SymbolTable::Symbolize() {
118 #if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
119 PrintError("Perftools does not know how to call a sub-process on this O/S");
120 return 0;
121 #else
122 const char* argv0 = GetProgramInvocationName();
123 if (argv0 == NULL) { // can't call symbolize if we can't figure out our name
124 PrintError("Cannot figure out the name of this executable (argv0)");
125 return 0;
126 }
127 if (access(FLAGS_symbolize_pprof, R_OK) != 0) {
128 PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
129 return 0;
130 }
131
132 // All this work is to do two-way communication. ugh.
133 int *child_in = NULL; // file descriptors
134 int *child_out = NULL; // for now, we don't worry about child_err
135 int child_fds[5][2]; // socketpair may be called up to five times below
136
137 // The client program may close its stdin and/or stdout and/or stderr
138 // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
139 // In this case the communication between the forked processes may be broken
140 // if either the parent or the child tries to close or duplicate these
141 // descriptors. The loop below produces two pairs of file descriptors, each
142 // greater than 2 (stderr).
143 for (int i = 0; i < 5; i++) {
144 if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
145 for (int j = 0; j < i; j++) {
146 close(child_fds[j][0]);
147 close(child_fds[j][1]);
148 PrintError("Cannot create a socket pair");
149 return 0;
150 }
151 } else {
152 if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
153 if (child_in == NULL) {
154 child_in = child_fds[i];
155 } else {
156 child_out = child_fds[i];
157 for (int j = 0; j < i; j++) {
158 if (child_fds[j] == child_in) continue;
159 close(child_fds[j][0]);
160 close(child_fds[j][1]);
161 }
162 break;
163 }
164 }
165 }
166 }
167
168 switch (fork()) {
169 case -1: { // error
170 close(child_in[0]);
171 close(child_in[1]);
172 close(child_out[0]);
173 close(child_out[1]);
174 PrintError("Unknown error calling fork()");
175 return 0;
176 }
177 case 0: { // child
178 close(child_in[1]); // child uses the 0's, parent uses the 1's
179 close(child_out[1]); // child uses the 0's, parent uses the 1's
180 close(0);
181 close(1);
182 if (dup2(child_in[0], 0) == -1) _exit(1);
183 if (dup2(child_out[0], 1) == -1) _exit(2);
184 // Unset vars that might cause trouble when we fork
185 unsetenv("CPUPROFILE");
186 unsetenv("HEAPPROFILE");
187 unsetenv("HEAPCHECK");
188 unsetenv("PERFTOOLS_VERBOSE");
189 execlp(FLAGS_symbolize_pprof, FLAGS_symbolize_pprof,
190 "--symbols", argv0, NULL);
191 _exit(3); // if execvp fails, it's bad news for us
192 }
193 default: { // parent
194 close(child_in[0]); // child uses the 0's, parent uses the 1's
195 close(child_out[0]); // child uses the 0's, parent uses the 1's
196 #ifdef HAVE_POLL_H
197 // Waiting for 1ms seems to give the OS time to notice any errors.
198 poll(0, 0, 1);
199 // For maximum safety, we check to make sure the execlp
200 // succeeded before trying to write. (Otherwise we'll get a
201 // SIGPIPE.) For systems without poll.h, we'll just skip this
202 // check, and trust that the user set PPROF_PATH correctly!
203 struct pollfd pfd = { child_in[1], POLLOUT, 0 };
204 if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
205 (pfd.revents & (POLLHUP|POLLERR))) {
206 PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
207 return 0;
208 }
209 #endif
210 #if defined(__CYGWIN__) || defined(__CYGWIN32__)
211 // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert.
212 const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
213 DumpProcSelfMaps(symbols_handle);
214 #else
215 DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin
216 #endif
217
218 // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
219 // address to feed to pprof.
220 const int kOutBufSize = 24 * symbolization_table_.size();
221 char *pprof_buffer = new char[kOutBufSize];
222 int written = 0;
223 for (SymbolMap::const_iterator iter = symbolization_table_.begin();
224 iter != symbolization_table_.end(); ++iter) {
225 written += snprintf(pprof_buffer + written, kOutBufSize - written,
226 // pprof expects format to be 0xXXXXXX
227 "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
228 }
229 write(child_in[1], pprof_buffer, strlen(pprof_buffer));
230 close(child_in[1]); // that's all we need to write
231
232 const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
233 int total_bytes_read = 0;
234 delete[] symbol_buffer_;
235 symbol_buffer_ = new char[kSymbolBufferSize];
236 memset(symbol_buffer_, '\0', kSymbolBufferSize);
237 while (1) {
238 int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
239 kSymbolBufferSize - total_bytes_read);
240 if (bytes_read < 0) {
241 close(child_out[1]);
242 PrintError("Cannot read data from pprof");
243 return 0;
244 } else if (bytes_read == 0) {
245 close(child_out[1]);
246 wait(NULL);
247 break;
248 } else {
249 total_bytes_read += bytes_read;
250 }
251 }
252 // We have successfully read the output of pprof into out. Make sure
253 // the last symbol is full (we can tell because it ends with a \n).
254 if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
255 return 0;
256 // make the symbolization_table_ values point to the output vector
257 SymbolMap::iterator fill = symbolization_table_.begin();
258 int num_symbols = 0;
259 const char *current_name = symbol_buffer_;
260 for (int i = 0; i < total_bytes_read; i++) {
261 if (symbol_buffer_[i] == '\n') {
262 fill->second = current_name;
263 symbol_buffer_[i] = '\0';
264 current_name = symbol_buffer_ + i + 1;
265 fill++;
266 num_symbols++;
267 }
268 }
269 return num_symbols;
270 }
271 }
272 PrintError("Unkown error (should never occur!)");
273 return 0; // shouldn't be reachable
274 #endif
275 }
276