• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/profiling/memory/client.h"
18 
19 #include <signal.h>
20 #include <sys/prctl.h>
21 #include <sys/syscall.h>
22 #include <sys/types.h>
23 #include <unistd.h>
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cinttypes>
28 #include <new>
29 
30 #include <unwindstack/MachineArm.h>
31 #include <unwindstack/MachineArm64.h>
32 #include <unwindstack/MachineMips.h>
33 #include <unwindstack/MachineMips64.h>
34 #include <unwindstack/MachineX86.h>
35 #include <unwindstack/MachineX86_64.h>
36 #include <unwindstack/Regs.h>
37 #include <unwindstack/RegsGetLocal.h>
38 
39 #include "perfetto/base/compiler.h"
40 #include "perfetto/base/logging.h"
41 #include "perfetto/base/thread_utils.h"
42 #include "perfetto/base/time.h"
43 #include "perfetto/ext/base/file_utils.h"
44 #include "perfetto/ext/base/scoped_file.h"
45 #include "perfetto/ext/base/string_utils.h"
46 #include "perfetto/ext/base/unix_socket.h"
47 #include "perfetto/ext/base/utils.h"
48 #include "src/profiling/memory/sampler.h"
49 #include "src/profiling/memory/scoped_spinlock.h"
50 #include "src/profiling/memory/shared_ring_buffer.h"
51 #include "src/profiling/memory/wire_protocol.h"
52 
53 namespace perfetto {
54 namespace profiling {
55 namespace {
56 
57 const char kSingleByte[1] = {'x'};
58 constexpr auto kResendBackoffUs = 100;
59 
IsMainThread()60 inline bool IsMainThread() {
61   return getpid() == base::GetThreadId();
62 }
63 
UnsetDumpable(int)64 int UnsetDumpable(int) {
65   prctl(PR_SET_DUMPABLE, 0);
66   return 0;
67 }
68 
Contained(const StackRange & base,const char * ptr)69 bool Contained(const StackRange& base, const char* ptr) {
70   return (ptr >= base.begin && ptr < base.end);
71 }
72 
73 }  // namespace
74 
GetMaxTries(const ClientConfiguration & client_config)75 uint64_t GetMaxTries(const ClientConfiguration& client_config) {
76   if (!client_config.block_client)
77     return 1u;
78   if (client_config.block_client_timeout_us == 0)
79     return kInfiniteTries;
80   return std::max<uint64_t>(
81       1ul, client_config.block_client_timeout_us / kResendBackoffUs);
82 }
83 
GetThreadStackRange()84 StackRange GetThreadStackRange() {
85   // In glibc pthread_getattr_np can call realloc, even for a non-main-thread.
86   // This is fine, because the heapprofd wrapper for glibc prevents re-entering
87   // malloc.
88   pthread_attr_t attr;
89   if (pthread_getattr_np(pthread_self(), &attr) != 0)
90     return {nullptr, nullptr};
91   base::ScopedResource<pthread_attr_t*, pthread_attr_destroy, nullptr> cleanup(
92       &attr);
93 
94   char* stackaddr;
95   size_t stacksize;
96   if (pthread_attr_getstack(&attr, reinterpret_cast<void**>(&stackaddr),
97                             &stacksize) != 0)
98     return {nullptr, nullptr};
99   return {stackaddr, stackaddr + stacksize};
100 }
101 
GetSigAltStackRange()102 StackRange GetSigAltStackRange() {
103   stack_t altstack;
104 
105   if (sigaltstack(nullptr, &altstack) == -1) {
106     PERFETTO_PLOG("sigaltstack");
107     return {nullptr, nullptr};
108   }
109 
110   if ((altstack.ss_flags & SS_ONSTACK) == 0) {
111     return {nullptr, nullptr};
112   }
113 
114   return {static_cast<char*>(altstack.ss_sp),
115           static_cast<char*>(altstack.ss_sp) + altstack.ss_size};
116 }
117 
118 // The implementation of pthread_getattr_np for the main thread on bionic uses
119 // malloc, so we cannot use it in GetStackEnd, which we use inside of
120 // RecordMalloc (which is called from malloc). We would re-enter malloc if we
121 // used it.
122 //
123 // This is why we find the stack base for the main-thread when constructing
124 // the client and remember it.
GetMainThreadStackRange()125 StackRange GetMainThreadStackRange() {
126   base::ScopedFstream maps(fopen("/proc/self/maps", "re"));
127   if (!maps) {
128     return {nullptr, nullptr};
129   }
130   while (!feof(*maps)) {
131     char line[1024];
132     char* data = fgets(line, sizeof(line), *maps);
133     if (data != nullptr && strstr(data, "[stack]")) {
134       char* sep = strstr(data, "-");
135       if (sep == nullptr)
136         continue;
137 
138       char* min = reinterpret_cast<char*>(strtoll(data, nullptr, 16));
139       char* max = reinterpret_cast<char*>(strtoll(sep + 1, nullptr, 16));
140       return {min, max};
141     }
142   }
143   return {nullptr, nullptr};
144 }
145 
146 // static
ConnectToHeapprofd(const std::string & sock_name)147 std::optional<base::UnixSocketRaw> Client::ConnectToHeapprofd(
148     const std::string& sock_name) {
149   auto sock = base::UnixSocketRaw::CreateMayFail(base::SockFamily::kUnix,
150                                                  base::SockType::kStream);
151   if (!sock || !sock.Connect(sock_name)) {
152     PERFETTO_PLOG("Failed to connect to %s", sock_name.c_str());
153     return std::nullopt;
154   }
155   if (!sock.SetTxTimeout(kClientSockTimeoutMs)) {
156     PERFETTO_PLOG("Failed to set send timeout for %s", sock_name.c_str());
157     return std::nullopt;
158   }
159   if (!sock.SetRxTimeout(kClientSockTimeoutMs)) {
160     PERFETTO_PLOG("Failed to set receive timeout for %s", sock_name.c_str());
161     return std::nullopt;
162   }
163   return std::move(sock);
164 }
165 
166 // static
CreateAndHandshake(base::UnixSocketRaw sock,UnhookedAllocator<Client> unhooked_allocator)167 std::shared_ptr<Client> Client::CreateAndHandshake(
168     base::UnixSocketRaw sock,
169     UnhookedAllocator<Client> unhooked_allocator) {
170   if (!sock) {
171     PERFETTO_DFATAL_OR_ELOG("Socket not connected.");
172     return nullptr;
173   }
174 
175   sock.DcheckIsBlocking(true);
176 
177   // We might be running in a process that is not dumpable (such as app
178   // processes on user builds), in which case the /proc/self/mem will be chown'd
179   // to root:root, and will not be accessible even to the process itself (see
180   // man 5 proc). In such situations, temporarily mark the process dumpable to
181   // be able to open the files, unsetting dumpability immediately afterwards.
182   int orig_dumpable = prctl(PR_GET_DUMPABLE);
183 
184   enum { kNop, kDoUnset };
185   base::ScopedResource<int, UnsetDumpable, kNop, false> unset_dumpable(kNop);
186   if (orig_dumpable == 0) {
187     unset_dumpable.reset(kDoUnset);
188     prctl(PR_SET_DUMPABLE, 1);
189   }
190 
191   base::ScopedFile maps(base::OpenFile("/proc/self/maps", O_RDONLY));
192   if (!maps) {
193     PERFETTO_DFATAL_OR_ELOG("Failed to open /proc/self/maps");
194     return nullptr;
195   }
196   base::ScopedFile mem(base::OpenFile("/proc/self/mem", O_RDONLY));
197   if (!mem) {
198     PERFETTO_DFATAL_OR_ELOG("Failed to open /proc/self/mem");
199     return nullptr;
200   }
201 
202   // Restore original dumpability value if we overrode it.
203   unset_dumpable.reset();
204 
205   int fds[kHandshakeSize];
206   fds[kHandshakeMaps] = *maps;
207   fds[kHandshakeMem] = *mem;
208 
209   // Send an empty record to transfer fds for /proc/self/maps and
210   // /proc/self/mem.
211   if (sock.Send(kSingleByte, sizeof(kSingleByte), fds, kHandshakeSize) !=
212       sizeof(kSingleByte)) {
213     PERFETTO_DFATAL_OR_ELOG("Failed to send file descriptors.");
214     return nullptr;
215   }
216 
217   ClientConfiguration client_config;
218   base::ScopedFile shmem_fd;
219   size_t recv = 0;
220   while (recv < sizeof(client_config)) {
221     size_t num_fds = 0;
222     base::ScopedFile* fd = nullptr;
223     if (!shmem_fd) {
224       num_fds = 1;
225       fd = &shmem_fd;
226     }
227     ssize_t rd = sock.Receive(reinterpret_cast<char*>(&client_config) + recv,
228                               sizeof(client_config) - recv, fd, num_fds);
229     if (rd == -1) {
230       PERFETTO_PLOG("Failed to receive ClientConfiguration.");
231       return nullptr;
232     }
233     if (rd == 0) {
234       PERFETTO_LOG("Server disconnected while sending ClientConfiguration.");
235       return nullptr;
236     }
237     recv += static_cast<size_t>(rd);
238   }
239 
240   if (!shmem_fd) {
241     PERFETTO_DFATAL_OR_ELOG("Did not receive shmem fd.");
242     return nullptr;
243   }
244 
245   auto shmem = SharedRingBuffer::Attach(std::move(shmem_fd));
246   if (!shmem || !shmem->is_valid()) {
247     PERFETTO_DFATAL_OR_ELOG("Failed to attach to shmem.");
248     return nullptr;
249   }
250 
251   sock.SetBlocking(false);
252   // note: the shared_ptr will retain a copy of the unhooked_allocator
253   return std::allocate_shared<Client>(unhooked_allocator, std::move(sock),
254                                       client_config, std::move(shmem.value()),
255                                       getpid(), GetMainThreadStackRange());
256 }
257 
Client(base::UnixSocketRaw sock,ClientConfiguration client_config,SharedRingBuffer shmem,pid_t pid_at_creation,StackRange main_thread_stack_range)258 Client::Client(base::UnixSocketRaw sock,
259                ClientConfiguration client_config,
260                SharedRingBuffer shmem,
261                pid_t pid_at_creation,
262                StackRange main_thread_stack_range)
263     : client_config_(client_config),
264       max_shmem_tries_(GetMaxTries(client_config_)),
265       sock_(std::move(sock)),
266       main_thread_stack_range_(main_thread_stack_range),
267       shmem_(std::move(shmem)),
268       pid_at_creation_(pid_at_creation) {}
269 
~Client()270 Client::~Client() {
271   // This is work-around for code like the following:
272   // https://android.googlesource.com/platform/libcore/+/4ecb71f94378716f88703b9f7548b5d24839262f/ojluni/src/main/native/UNIXProcess_md.c#427
273   // They fork, close all fds by iterating over /proc/self/fd using opendir.
274   // Unfortunately closedir calls free, which detects the fork, and then tries
275   // to destruct this Client.
276   //
277   // ScopedResource crashes on failure to close, so we explicitly ignore
278   // failures here.
279   int fd = sock_.ReleaseFd().release();
280   if (fd != -1)
281     close(fd);
282 }
283 
GetStackEnd(const char * stackptr)284 const char* Client::GetStackEnd(const char* stackptr) {
285   StackRange thread_stack_range;
286   bool is_main_thread = IsMainThread();
287   if (is_main_thread) {
288     thread_stack_range = main_thread_stack_range_;
289   } else {
290     thread_stack_range = GetThreadStackRange();
291   }
292   if (Contained(thread_stack_range, stackptr)) {
293     return thread_stack_range.end;
294   }
295   StackRange sigalt_stack_range = GetSigAltStackRange();
296   if (Contained(sigalt_stack_range, stackptr)) {
297     return sigalt_stack_range.end;
298   }
299   // The main thread might have expanded since we read its bounds. We now know
300   // it is not the sigaltstack, so it has to be the main stack.
301   // TODO(fmayer): We should reparse maps here, because now we will keep
302   //               hitting the slow-path that calls the sigaltstack syscall.
303   if (is_main_thread && stackptr < thread_stack_range.end) {
304     return thread_stack_range.end;
305   }
306   return nullptr;
307 }
308 
309 // Best-effort detection of whether we're continuing work in a forked child of
310 // the profiled process, in which case we want to stop. Note that due to
311 // malloc_hooks.cc's atfork handler, the proper fork calls should leak the child
312 // before reaching this point. Therefore this logic exists primarily to handle
313 // clone and vfork.
314 // TODO(rsavitski): rename/delete |disable_fork_teardown| config option if this
315 // logic sticks, as the option becomes more clone-specific, and quite narrow.
IsPostFork()316 bool Client::IsPostFork() {
317   if (PERFETTO_UNLIKELY(getpid() != pid_at_creation_)) {
318     // Only print the message once, even if we do not shut down the client.
319     if (!detected_fork_) {
320       detected_fork_ = true;
321       const char* vfork_detected = "";
322 
323       // We use the fact that vfork does not update Bionic's TID cache, so
324       // we will have a mismatch between the actual TID (from the syscall)
325       // and the cached one.
326       //
327       // What we really want to check is if we are sharing virtual memory space
328       // with the original process. This would be
329       // syscall(__NR_kcmp, syscall(__NR_getpid), pid_at_creation_,
330       //         KCMP_VM, 0, 0),
331       //  but that is not compiled into our kernels and disallowed by seccomp.
332       if (!client_config_.disable_vfork_detection &&
333           syscall(__NR_gettid) != base::GetThreadId()) {
334         postfork_return_value_ = true;
335         vfork_detected = " (vfork detected)";
336       } else {
337         postfork_return_value_ = client_config_.disable_fork_teardown;
338       }
339       const char* action =
340           postfork_return_value_ ? "Not shutting down" : "Shutting down";
341       const char* force =
342           postfork_return_value_ ? " (fork teardown disabled)" : "";
343       PERFETTO_LOG(
344           "Detected post-fork child situation. Not profiling the child. "
345           "%s client%s%s",
346           action, force, vfork_detected);
347     }
348     return true;
349   }
350   return false;
351 }
352 
353 // The stack grows towards numerically smaller addresses, so the stack layout
354 // of main calling malloc is as follows.
355 //
356 //               +------------+
357 //               |SendWireMsg |
358 // stackptr +--> +------------+ 0x1000
359 //               |RecordMalloc|    +
360 //               +------------+    |
361 //               | malloc     |    |
362 //               +------------+    |
363 //               |  main      |    v
364 // stackend  +-> +------------+ 0xffff
RecordMalloc(uint32_t heap_id,uint64_t sample_size,uint64_t alloc_size,uint64_t alloc_address)365 bool Client::RecordMalloc(uint32_t heap_id,
366                           uint64_t sample_size,
367                           uint64_t alloc_size,
368                           uint64_t alloc_address) {
369   if (PERFETTO_UNLIKELY(IsPostFork())) {
370     return postfork_return_value_;
371   }
372 
373   AllocMetadata metadata;
374   const char* stackptr = reinterpret_cast<char*>(__builtin_frame_address(0));
375   unwindstack::AsmGetRegs(metadata.register_data);
376   const char* stackend = GetStackEnd(stackptr);
377   if (!stackend) {
378     PERFETTO_ELOG("Failed to find stackend.");
379     shmem_.SetErrorState(SharedRingBuffer::kInvalidStackBounds);
380     return false;
381   }
382   uint64_t stack_size = static_cast<uint64_t>(stackend - stackptr);
383   metadata.sample_size = sample_size;
384   metadata.alloc_size = alloc_size;
385   metadata.alloc_address = alloc_address;
386   metadata.stack_pointer = reinterpret_cast<uint64_t>(stackptr);
387   metadata.arch = unwindstack::Regs::CurrentArch();
388   metadata.sequence_number =
389       1 + sequence_number_[heap_id].fetch_add(1, std::memory_order_acq_rel);
390   metadata.heap_id = heap_id;
391 
392   struct timespec ts;
393   if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
394     metadata.clock_monotonic_coarse_timestamp =
395         static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
396   } else {
397     metadata.clock_monotonic_coarse_timestamp = 0;
398   }
399 
400   WireMessage msg{};
401   msg.record_type = RecordType::Malloc;
402   msg.alloc_header = &metadata;
403   msg.payload = const_cast<char*>(stackptr);
404   msg.payload_size = static_cast<size_t>(stack_size);
405 
406   if (SendWireMessageWithRetriesIfBlocking(msg) == -1)
407     return false;
408 
409   if (!shmem_.GetAndResetReaderPaused())
410     return true;
411   return SendControlSocketByte();
412 }
413 
SendWireMessageWithRetriesIfBlocking(const WireMessage & msg)414 int64_t Client::SendWireMessageWithRetriesIfBlocking(const WireMessage& msg) {
415   for (uint64_t i = 0;
416        max_shmem_tries_ == kInfiniteTries || i < max_shmem_tries_; ++i) {
417     if (shmem_.shutting_down())
418       return -1;
419     int64_t res = SendWireMessage(&shmem_, msg);
420     if (PERFETTO_LIKELY(res >= 0))
421       return res;
422     // retry if in blocking mode and still connected
423     if (client_config_.block_client && base::IsAgain(errno) && IsConnected()) {
424       usleep(kResendBackoffUs);
425     } else {
426       break;
427     }
428   }
429   if (IsConnected())
430     shmem_.SetErrorState(SharedRingBuffer::kHitTimeout);
431   PERFETTO_PLOG("Failed to write to shared ring buffer. Disconnecting.");
432   return -1;
433 }
434 
RecordFree(uint32_t heap_id,const uint64_t alloc_address)435 bool Client::RecordFree(uint32_t heap_id, const uint64_t alloc_address) {
436   if (PERFETTO_UNLIKELY(IsPostFork())) {
437     return postfork_return_value_;
438   }
439 
440   FreeEntry current_entry;
441   current_entry.sequence_number =
442       1 + sequence_number_[heap_id].fetch_add(1, std::memory_order_acq_rel);
443   current_entry.addr = alloc_address;
444   current_entry.heap_id = heap_id;
445   WireMessage msg = {};
446   msg.record_type = RecordType::Free;
447   msg.free_header = &current_entry;
448   // Do not send control socket byte, as frees are very cheap to handle, so we
449   // just delay to the next alloc. Sending the control socket byte is ~10x the
450   // rest of the client overhead.
451   int64_t bytes_free = SendWireMessageWithRetriesIfBlocking(msg);
452   if (bytes_free == -1)
453     return false;
454   // Seems like we are filling up the shmem with frees. Flush.
455   if (static_cast<uint64_t>(bytes_free) < shmem_.size() / 2 &&
456       shmem_.GetAndResetReaderPaused()) {
457     return SendControlSocketByte();
458   }
459   return true;
460 }
461 
RecordHeapInfo(uint32_t heap_id,const char * heap_name,uint64_t interval)462 bool Client::RecordHeapInfo(uint32_t heap_id,
463                             const char* heap_name,
464                             uint64_t interval) {
465   if (PERFETTO_UNLIKELY(IsPostFork())) {
466     return postfork_return_value_;
467   }
468 
469   HeapName hnr;
470   hnr.heap_id = heap_id;
471   base::StringCopy(&hnr.heap_name[0], heap_name, sizeof(hnr.heap_name));
472   hnr.sample_interval = interval;
473 
474   WireMessage msg = {};
475   msg.record_type = RecordType::HeapName;
476   msg.heap_name_header = &hnr;
477   return SendWireMessageWithRetriesIfBlocking(msg);
478 }
479 
IsConnected()480 bool Client::IsConnected() {
481   sock_.DcheckIsBlocking(false);
482   char buf[1];
483   ssize_t recv_bytes = sock_.Receive(buf, sizeof(buf), nullptr, 0);
484   if (recv_bytes == 0)
485     return false;
486   // This is not supposed to happen because currently heapprofd does not send
487   // data to the client. Here for generality's sake.
488   if (recv_bytes > 0)
489     return true;
490   return base::IsAgain(errno);
491 }
492 
SendControlSocketByte()493 bool Client::SendControlSocketByte() {
494   // If base::IsAgain(errno), the socket buffer is full, so the service will
495   // pick up the notification even without adding another byte.
496   // In other error cases (usually EPIPE) we want to disconnect, because that
497   // is how the service signals the tracing session was torn down.
498   if (sock_.Send(kSingleByte, sizeof(kSingleByte)) == -1 &&
499       !base::IsAgain(errno)) {
500     if (shmem_.shutting_down()) {
501       PERFETTO_LOG("Profiling session ended.");
502     } else {
503       PERFETTO_PLOG("Failed to send control socket byte.");
504     }
505     return false;
506   }
507   return true;
508 }
509 
510 }  // namespace profiling
511 }  // namespace perfetto
512