• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/profiling/memory/client.h"
18 
19 #include <inttypes.h>
20 #include <sys/prctl.h>
21 #include <sys/syscall.h>
22 #include <sys/types.h>
23 #include <unistd.h>
24 #include <unwindstack/MachineArm.h>
25 #include <unwindstack/MachineArm64.h>
26 #include <unwindstack/MachineMips.h>
27 #include <unwindstack/MachineMips64.h>
28 #include <unwindstack/MachineX86.h>
29 #include <unwindstack/MachineX86_64.h>
30 #include <unwindstack/Regs.h>
31 #include <unwindstack/RegsGetLocal.h>
32 
33 #include <algorithm>
34 #include <atomic>
35 #include <new>
36 
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/thread_utils.h"
39 #include "perfetto/base/time.h"
40 #include "perfetto/ext/base/scoped_file.h"
41 #include "perfetto/ext/base/unix_socket.h"
42 #include "perfetto/ext/base/utils.h"
43 #include "src/profiling/memory/sampler.h"
44 #include "src/profiling/memory/scoped_spinlock.h"
45 #include "src/profiling/memory/wire_protocol.h"
46 
47 namespace perfetto {
48 namespace profiling {
49 namespace {
50 
51 const char kSingleByte[1] = {'x'};
52 constexpr std::chrono::seconds kLockTimeout{1};
53 constexpr auto kResendBackoffUs = 100;
54 
IsMainThread()55 inline bool IsMainThread() {
56   return getpid() == base::GetThreadId();
57 }
58 
59 // The implementation of pthread_getattr_np for the main thread uses malloc,
60 // so we cannot use it in GetStackBase, which we use inside of RecordMalloc
61 // (which is called from malloc). We would re-enter malloc if we used it.
62 //
63 // This is why we find the stack base for the main-thread when constructing
64 // the client and remember it.
FindMainThreadStack()65 char* FindMainThreadStack() {
66   base::ScopedFstream maps(fopen("/proc/self/maps", "r"));
67   if (!maps) {
68     return nullptr;
69   }
70   while (!feof(*maps)) {
71     char line[1024];
72     char* data = fgets(line, sizeof(line), *maps);
73     if (data != nullptr && strstr(data, "[stack]")) {
74       char* sep = strstr(data, "-");
75       if (sep == nullptr)
76         continue;
77       sep++;
78       return reinterpret_cast<char*>(strtoll(sep, nullptr, 16));
79     }
80   }
81   return nullptr;
82 }
83 
UnsetDumpable(int)84 int UnsetDumpable(int) {
85   prctl(PR_SET_DUMPABLE, 0);
86   return 0;
87 }
88 
89 }  // namespace
90 
GetMaxTries(const ClientConfiguration & client_config)91 uint64_t GetMaxTries(const ClientConfiguration& client_config) {
92   if (!client_config.block_client)
93     return 1u;
94   if (client_config.block_client_timeout_us == 0)
95     return kInfiniteTries;
96   return std::max<uint64_t>(
97       1ul, client_config.block_client_timeout_us / kResendBackoffUs);
98 }
99 
GetThreadStackBase()100 const char* GetThreadStackBase() {
101   pthread_attr_t attr;
102   if (pthread_getattr_np(pthread_self(), &attr) != 0)
103     return nullptr;
104   base::ScopedResource<pthread_attr_t*, pthread_attr_destroy, nullptr> cleanup(
105       &attr);
106 
107   char* stackaddr;
108   size_t stacksize;
109   if (pthread_attr_getstack(&attr, reinterpret_cast<void**>(&stackaddr),
110                             &stacksize) != 0)
111     return nullptr;
112   return stackaddr + stacksize;
113 }
114 
115 // static
ConnectToHeapprofd(const std::string & sock_name)116 base::Optional<base::UnixSocketRaw> Client::ConnectToHeapprofd(
117     const std::string& sock_name) {
118   auto sock = base::UnixSocketRaw::CreateMayFail(base::SockFamily::kUnix,
119                                                  base::SockType::kStream);
120   if (!sock || !sock.Connect(sock_name)) {
121     PERFETTO_PLOG("Failed to connect to %s", sock_name.c_str());
122     return base::nullopt;
123   }
124   if (!sock.SetTxTimeout(kClientSockTimeoutMs)) {
125     PERFETTO_PLOG("Failed to set send timeout for %s", sock_name.c_str());
126     return base::nullopt;
127   }
128   if (!sock.SetRxTimeout(kClientSockTimeoutMs)) {
129     PERFETTO_PLOG("Failed to set receive timeout for %s", sock_name.c_str());
130     return base::nullopt;
131   }
132   return std::move(sock);
133 }
134 
135 // static
CreateAndHandshake(base::UnixSocketRaw sock,UnhookedAllocator<Client> unhooked_allocator)136 std::shared_ptr<Client> Client::CreateAndHandshake(
137     base::UnixSocketRaw sock,
138     UnhookedAllocator<Client> unhooked_allocator) {
139   if (!sock) {
140     PERFETTO_DFATAL_OR_ELOG("Socket not connected.");
141     return nullptr;
142   }
143 
144   PERFETTO_DCHECK(sock.IsBlocking());
145 
146   // We might be running in a process that is not dumpable (such as app
147   // processes on user builds), in which case the /proc/self/mem will be chown'd
148   // to root:root, and will not be accessible even to the process itself (see
149   // man 5 proc). In such situations, temporarily mark the process dumpable to
150   // be able to open the files, unsetting dumpability immediately afterwards.
151   int orig_dumpable = prctl(PR_GET_DUMPABLE);
152 
153   enum { kNop, kDoUnset };
154   base::ScopedResource<int, UnsetDumpable, kNop, false> unset_dumpable(kNop);
155   if (orig_dumpable == 0) {
156     unset_dumpable.reset(kDoUnset);
157     prctl(PR_SET_DUMPABLE, 1);
158   }
159 
160   size_t num_send_fds = kHandshakeSize;
161 
162   base::ScopedFile maps(base::OpenFile("/proc/self/maps", O_RDONLY));
163   if (!maps) {
164     PERFETTO_DFATAL_OR_ELOG("Failed to open /proc/self/maps");
165     return nullptr;
166   }
167   base::ScopedFile mem(base::OpenFile("/proc/self/mem", O_RDONLY));
168   if (!mem) {
169     PERFETTO_DFATAL_OR_ELOG("Failed to open /proc/self/mem");
170     return nullptr;
171   }
172 
173   base::ScopedFile page_idle(base::OpenFile("/proc/self/page_idle", O_RDWR));
174   if (!page_idle) {
175     PERFETTO_DLOG("Failed to open /proc/self/page_idle. Continuing.");
176     num_send_fds = kHandshakeSize - 1;
177   }
178 
179   // Restore original dumpability value if we overrode it.
180   unset_dumpable.reset();
181 
182   int fds[kHandshakeSize];
183   fds[kHandshakeMaps] = *maps;
184   fds[kHandshakeMem] = *mem;
185   fds[kHandshakePageIdle] = *page_idle;
186 
187   // Send an empty record to transfer fds for /proc/self/maps and
188   // /proc/self/mem.
189   if (sock.Send(kSingleByte, sizeof(kSingleByte), fds, num_send_fds) !=
190       sizeof(kSingleByte)) {
191     PERFETTO_DFATAL_OR_ELOG("Failed to send file descriptors.");
192     return nullptr;
193   }
194 
195   ClientConfiguration client_config;
196   base::ScopedFile shmem_fd;
197   size_t recv = 0;
198   while (recv < sizeof(client_config)) {
199     size_t num_fds = 0;
200     base::ScopedFile* fd = nullptr;
201     if (!shmem_fd) {
202       num_fds = 1;
203       fd = &shmem_fd;
204     }
205     ssize_t rd = sock.Receive(reinterpret_cast<char*>(&client_config) + recv,
206                               sizeof(client_config) - recv, fd, num_fds);
207     if (rd == -1) {
208       PERFETTO_PLOG("Failed to receive ClientConfiguration.");
209       return nullptr;
210     }
211     if (rd == 0) {
212       PERFETTO_LOG("Server disconnected while sending ClientConfiguration.");
213       return nullptr;
214     }
215     recv += static_cast<size_t>(rd);
216   }
217 
218   if (!shmem_fd) {
219     PERFETTO_DFATAL_OR_ELOG("Did not receive shmem fd.");
220     return nullptr;
221   }
222 
223   auto shmem = SharedRingBuffer::Attach(std::move(shmem_fd));
224   if (!shmem || !shmem->is_valid()) {
225     PERFETTO_DFATAL_OR_ELOG("Failed to attach to shmem.");
226     return nullptr;
227   }
228 
229   PERFETTO_DCHECK(client_config.interval >= 1);
230   sock.SetBlocking(false);
231   Sampler sampler{client_config.interval};
232   // note: the shared_ptr will retain a copy of the unhooked_allocator
233   return std::allocate_shared<Client>(unhooked_allocator, std::move(sock),
234                                       client_config, std::move(shmem.value()),
235                                       std::move(sampler), getpid(),
236                                       FindMainThreadStack());
237 }
238 
Client(base::UnixSocketRaw sock,ClientConfiguration client_config,SharedRingBuffer shmem,Sampler sampler,pid_t pid_at_creation,const char * main_thread_stack_base)239 Client::Client(base::UnixSocketRaw sock,
240                ClientConfiguration client_config,
241                SharedRingBuffer shmem,
242                Sampler sampler,
243                pid_t pid_at_creation,
244                const char* main_thread_stack_base)
245     : client_config_(client_config),
246       max_shmem_tries_(GetMaxTries(client_config_)),
247       sampler_(std::move(sampler)),
248       sock_(std::move(sock)),
249       main_thread_stack_base_(main_thread_stack_base),
250       shmem_(std::move(shmem)),
251       pid_at_creation_(pid_at_creation) {}
252 
~Client()253 Client::~Client() {
254   // This is work-around for code like the following:
255   // https://android.googlesource.com/platform/libcore/+/4ecb71f94378716f88703b9f7548b5d24839262f/ojluni/src/main/native/UNIXProcess_md.c#427
256   // They fork, close all fds by iterating over /proc/self/fd using opendir.
257   // Unfortunately closedir calls free, which detects the fork, and then tries
258   // to destruct this Client.
259   //
260   // ScopedResource crashes on failure to close, so we explicitly ignore
261   // failures here.
262   int fd = sock_.ReleaseFd().release();
263   if (fd != -1)
264     close(fd);
265 }
266 
GetStackBase()267 const char* Client::GetStackBase() {
268   if (IsMainThread()) {
269     if (!main_thread_stack_base_)
270       // Because pthread_attr_getstack reads and parses /proc/self/maps and
271       // /proc/self/stat, we have to cache the result here.
272       main_thread_stack_base_ = GetThreadStackBase();
273     return main_thread_stack_base_;
274   }
275   return GetThreadStackBase();
276 }
277 
278 // Best-effort detection of whether we're continuing work in a forked child of
279 // the profiled process, in which case we want to stop. Note that due to
280 // malloc_hooks.cc's atfork handler, the proper fork calls should leak the child
281 // before reaching this point. Therefore this logic exists primarily to handle
282 // clone and vfork.
283 // TODO(rsavitski): rename/delete |disable_fork_teardown| config option if this
284 // logic sticks, as the option becomes more clone-specific, and quite narrow.
IsPostFork()285 bool Client::IsPostFork() {
286   if (PERFETTO_UNLIKELY(getpid() != pid_at_creation_)) {
287     // Only print the message once, even if we do not shut down the client.
288     if (!detected_fork_) {
289       detected_fork_ = true;
290       const char* vfork_detected = "";
291 
292       // We use the fact that vfork does not update Bionic's TID cache, so
293       // we will have a mismatch between the actual TID (from the syscall)
294       // and the cached one.
295       //
296       // What we really want to check is if we are sharing virtual memory space
297       // with the original process. This would be
298       // syscall(__NR_kcmp, syscall(__NR_getpid), pid_at_creation_,
299       //         KCMP_VM, 0, 0),
300       //  but that is not compiled into our kernels and disallowed by seccomp.
301       if (!client_config_.disable_vfork_detection &&
302           syscall(__NR_gettid) != base::GetThreadId()) {
303         postfork_return_value_ = true;
304         vfork_detected = " (vfork detected)";
305       } else {
306         postfork_return_value_ = client_config_.disable_fork_teardown;
307       }
308       const char* action =
309           postfork_return_value_ ? "Not shutting down" : "Shutting down";
310       const char* force =
311           postfork_return_value_ ? " (fork teardown disabled)" : "";
312       PERFETTO_LOG(
313           "Detected post-fork child situation. Not profiling the child. "
314           "%s client%s%s",
315           action, force, vfork_detected);
316     }
317     return true;
318   }
319   return false;
320 }
321 
322 // The stack grows towards numerically smaller addresses, so the stack layout
323 // of main calling malloc is as follows.
324 //
325 //               +------------+
326 //               |SendWireMsg |
327 // stacktop +--> +------------+ 0x1000
328 //               |RecordMalloc|    +
329 //               +------------+    |
330 //               | malloc     |    |
331 //               +------------+    |
332 //               |  main      |    v
333 // stackbase +-> +------------+ 0xffff
RecordMalloc(uint64_t sample_size,uint64_t alloc_size,uint64_t alloc_address)334 bool Client::RecordMalloc(uint64_t sample_size,
335                           uint64_t alloc_size,
336                           uint64_t alloc_address) {
337   if (PERFETTO_UNLIKELY(IsPostFork())) {
338     return postfork_return_value_;
339   }
340 
341   AllocMetadata metadata;
342   const char* stackbase = GetStackBase();
343   const char* stacktop = reinterpret_cast<char*>(__builtin_frame_address(0));
344   unwindstack::AsmGetRegs(metadata.register_data);
345 
346   if (PERFETTO_UNLIKELY(stackbase < stacktop)) {
347     PERFETTO_DFATAL_OR_ELOG("Stackbase >= stacktop.");
348     return false;
349   }
350 
351   uint64_t stack_size = static_cast<uint64_t>(stackbase - stacktop);
352   metadata.sample_size = sample_size;
353   metadata.alloc_size = alloc_size;
354   metadata.alloc_address = alloc_address;
355   metadata.stack_pointer = reinterpret_cast<uint64_t>(stacktop);
356   metadata.stack_pointer_offset = sizeof(AllocMetadata);
357   metadata.arch = unwindstack::Regs::CurrentArch();
358   metadata.sequence_number =
359       1 + sequence_number_.fetch_add(1, std::memory_order_acq_rel);
360 
361   struct timespec ts;
362   if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
363     metadata.clock_monotonic_coarse_timestamp =
364         static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
365   } else {
366     metadata.clock_monotonic_coarse_timestamp = 0;
367   }
368 
369   WireMessage msg{};
370   msg.record_type = RecordType::Malloc;
371   msg.alloc_header = &metadata;
372   msg.payload = const_cast<char*>(stacktop);
373   msg.payload_size = static_cast<size_t>(stack_size);
374 
375   if (!SendWireMessageWithRetriesIfBlocking(msg))
376     return false;
377 
378   return SendControlSocketByte();
379 }
380 
SendWireMessageWithRetriesIfBlocking(const WireMessage & msg)381 bool Client::SendWireMessageWithRetriesIfBlocking(const WireMessage& msg) {
382   for (uint64_t i = 0;
383        max_shmem_tries_ == kInfiniteTries || i < max_shmem_tries_; ++i) {
384     if (PERFETTO_LIKELY(SendWireMessage(&shmem_, msg)))
385       return true;
386     // retry if in blocking mode and still connected
387     if (client_config_.block_client && base::IsAgain(errno) && IsConnected()) {
388       usleep(kResendBackoffUs);
389     } else {
390       break;
391     }
392   }
393   PERFETTO_PLOG("Failed to write to shared ring buffer. Disconnecting.");
394   return false;
395 }
396 
RecordFree(const uint64_t alloc_address)397 bool Client::RecordFree(const uint64_t alloc_address) {
398   if (PERFETTO_UNLIKELY(IsPostFork())) {
399     return postfork_return_value_;
400   }
401 
402   uint64_t sequence_number =
403       1 + sequence_number_.fetch_add(1, std::memory_order_acq_rel);
404 
405   std::unique_lock<std::timed_mutex> l(free_batch_lock_, kLockTimeout);
406   if (!l.owns_lock())
407     return false;
408   if (free_batch_.num_entries == kFreeBatchSize) {
409     if (!FlushFreesLocked())
410       return false;
411     // Flushed the contents of the buffer, reset it for reuse.
412     free_batch_.num_entries = 0;
413   }
414   FreeBatchEntry& current_entry =
415       free_batch_.entries[free_batch_.num_entries++];
416   current_entry.sequence_number = sequence_number;
417   current_entry.addr = alloc_address;
418   return true;
419 }
420 
FlushFreesLocked()421 bool Client::FlushFreesLocked() {
422   WireMessage msg = {};
423   msg.record_type = RecordType::Free;
424   msg.free_header = &free_batch_;
425   struct timespec ts;
426   if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
427     free_batch_.clock_monotonic_coarse_timestamp =
428         static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
429   } else {
430     free_batch_.clock_monotonic_coarse_timestamp = 0;
431   }
432 
433   if (!SendWireMessageWithRetriesIfBlocking(msg))
434     return false;
435   return SendControlSocketByte();
436 }
437 
IsConnected()438 bool Client::IsConnected() {
439   PERFETTO_DCHECK(!sock_.IsBlocking());
440   char buf[1];
441   ssize_t recv_bytes = sock_.Receive(buf, sizeof(buf), nullptr, 0);
442   if (recv_bytes == 0)
443     return false;
444   // This is not supposed to happen because currently heapprofd does not send
445   // data to the client. Here for generality's sake.
446   if (recv_bytes > 0)
447     return true;
448   return base::IsAgain(errno);
449 }
450 
SendControlSocketByte()451 bool Client::SendControlSocketByte() {
452   // If base::IsAgain(errno), the socket buffer is full, so the service will
453   // pick up the notification even without adding another byte.
454   // In other error cases (usually EPIPE) we want to disconnect, because that
455   // is how the service signals the tracing session was torn down.
456   if (sock_.Send(kSingleByte, sizeof(kSingleByte)) == -1 &&
457       !base::IsAgain(errno)) {
458     PERFETTO_PLOG("Failed to send control socket byte.");
459     return false;
460   }
461   return true;
462 }
463 
464 }  // namespace profiling
465 }  // namespace perfetto
466