1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/profiling/memory/client.h"
18
19 #include <signal.h>
20 #include <sys/prctl.h>
21 #include <sys/syscall.h>
22 #include <sys/types.h>
23 #include <unistd.h>
24
25 #include <algorithm>
26 #include <atomic>
27 #include <cinttypes>
28 #include <new>
29
30 #include <unwindstack/MachineArm.h>
31 #include <unwindstack/MachineArm64.h>
32 #include <unwindstack/MachineMips.h>
33 #include <unwindstack/MachineMips64.h>
34 #include <unwindstack/MachineX86.h>
35 #include <unwindstack/MachineX86_64.h>
36 #include <unwindstack/Regs.h>
37 #include <unwindstack/RegsGetLocal.h>
38
39 #include "perfetto/base/compiler.h"
40 #include "perfetto/base/logging.h"
41 #include "perfetto/base/thread_utils.h"
42 #include "perfetto/base/time.h"
43 #include "perfetto/ext/base/file_utils.h"
44 #include "perfetto/ext/base/scoped_file.h"
45 #include "perfetto/ext/base/string_utils.h"
46 #include "perfetto/ext/base/unix_socket.h"
47 #include "perfetto/ext/base/utils.h"
48 #include "src/profiling/memory/sampler.h"
49 #include "src/profiling/memory/scoped_spinlock.h"
50 #include "src/profiling/memory/shared_ring_buffer.h"
51 #include "src/profiling/memory/wire_protocol.h"
52
53 namespace perfetto {
54 namespace profiling {
55 namespace {
56
57 const char kSingleByte[1] = {'x'};
58 constexpr auto kResendBackoffUs = 100;
59
IsMainThread()60 inline bool IsMainThread() {
61 return getpid() == base::GetThreadId();
62 }
63
UnsetDumpable(int)64 int UnsetDumpable(int) {
65 prctl(PR_SET_DUMPABLE, 0);
66 return 0;
67 }
68
Contained(const StackRange & base,const char * ptr)69 bool Contained(const StackRange& base, const char* ptr) {
70 return (ptr >= base.begin && ptr < base.end);
71 }
72
73 } // namespace
74
GetMaxTries(const ClientConfiguration & client_config)75 uint64_t GetMaxTries(const ClientConfiguration& client_config) {
76 if (!client_config.block_client)
77 return 1u;
78 if (client_config.block_client_timeout_us == 0)
79 return kInfiniteTries;
80 return std::max<uint64_t>(
81 1ul, client_config.block_client_timeout_us / kResendBackoffUs);
82 }
83
GetThreadStackRange()84 StackRange GetThreadStackRange() {
85 pthread_attr_t attr;
86 if (pthread_getattr_np(pthread_self(), &attr) != 0)
87 return {nullptr, nullptr};
88 base::ScopedResource<pthread_attr_t*, pthread_attr_destroy, nullptr> cleanup(
89 &attr);
90
91 char* stackaddr;
92 size_t stacksize;
93 if (pthread_attr_getstack(&attr, reinterpret_cast<void**>(&stackaddr),
94 &stacksize) != 0)
95 return {nullptr, nullptr};
96 return {stackaddr, stackaddr + stacksize};
97 }
98
GetSigAltStackRange()99 StackRange GetSigAltStackRange() {
100 stack_t altstack;
101
102 if (sigaltstack(nullptr, &altstack) == -1) {
103 PERFETTO_PLOG("sigaltstack");
104 return {nullptr, nullptr};
105 }
106
107 if ((altstack.ss_flags & SS_ONSTACK) == 0) {
108 return {nullptr, nullptr};
109 }
110
111 return {static_cast<char*>(altstack.ss_sp),
112 static_cast<char*>(altstack.ss_sp) + altstack.ss_size};
113 }
114
115 // The implementation of pthread_getattr_np for the main thread uses malloc,
116 // so we cannot use it in GetStackEnd, which we use inside of RecordMalloc
117 // (which is called from malloc). We would re-enter malloc if we used it.
118 //
119 // This is why we find the stack base for the main-thread when constructing
120 // the client and remember it.
GetMainThreadStackRange()121 StackRange GetMainThreadStackRange() {
122 base::ScopedFstream maps(fopen("/proc/self/maps", "re"));
123 if (!maps) {
124 return {nullptr, nullptr};
125 }
126 while (!feof(*maps)) {
127 char line[1024];
128 char* data = fgets(line, sizeof(line), *maps);
129 if (data != nullptr && strstr(data, "[stack]")) {
130 char* sep = strstr(data, "-");
131 if (sep == nullptr)
132 continue;
133
134 char* min = reinterpret_cast<char*>(strtoll(data, nullptr, 16));
135 char* max = reinterpret_cast<char*>(strtoll(sep + 1, nullptr, 16));
136 return {min, max};
137 }
138 }
139 return {nullptr, nullptr};
140 }
141
142 // static
ConnectToHeapprofd(const std::string & sock_name)143 base::Optional<base::UnixSocketRaw> Client::ConnectToHeapprofd(
144 const std::string& sock_name) {
145 auto sock = base::UnixSocketRaw::CreateMayFail(base::SockFamily::kUnix,
146 base::SockType::kStream);
147 if (!sock || !sock.Connect(sock_name)) {
148 PERFETTO_PLOG("Failed to connect to %s", sock_name.c_str());
149 return base::nullopt;
150 }
151 if (!sock.SetTxTimeout(kClientSockTimeoutMs)) {
152 PERFETTO_PLOG("Failed to set send timeout for %s", sock_name.c_str());
153 return base::nullopt;
154 }
155 if (!sock.SetRxTimeout(kClientSockTimeoutMs)) {
156 PERFETTO_PLOG("Failed to set receive timeout for %s", sock_name.c_str());
157 return base::nullopt;
158 }
159 return std::move(sock);
160 }
161
162 // static
CreateAndHandshake(base::UnixSocketRaw sock,UnhookedAllocator<Client> unhooked_allocator)163 std::shared_ptr<Client> Client::CreateAndHandshake(
164 base::UnixSocketRaw sock,
165 UnhookedAllocator<Client> unhooked_allocator) {
166 if (!sock) {
167 PERFETTO_DFATAL_OR_ELOG("Socket not connected.");
168 return nullptr;
169 }
170
171 sock.DcheckIsBlocking(true);
172
173 // We might be running in a process that is not dumpable (such as app
174 // processes on user builds), in which case the /proc/self/mem will be chown'd
175 // to root:root, and will not be accessible even to the process itself (see
176 // man 5 proc). In such situations, temporarily mark the process dumpable to
177 // be able to open the files, unsetting dumpability immediately afterwards.
178 int orig_dumpable = prctl(PR_GET_DUMPABLE);
179
180 enum { kNop, kDoUnset };
181 base::ScopedResource<int, UnsetDumpable, kNop, false> unset_dumpable(kNop);
182 if (orig_dumpable == 0) {
183 unset_dumpable.reset(kDoUnset);
184 prctl(PR_SET_DUMPABLE, 1);
185 }
186
187 base::ScopedFile maps(base::OpenFile("/proc/self/maps", O_RDONLY));
188 if (!maps) {
189 PERFETTO_DFATAL_OR_ELOG("Failed to open /proc/self/maps");
190 return nullptr;
191 }
192 base::ScopedFile mem(base::OpenFile("/proc/self/mem", O_RDONLY));
193 if (!mem) {
194 PERFETTO_DFATAL_OR_ELOG("Failed to open /proc/self/mem");
195 return nullptr;
196 }
197
198 // Restore original dumpability value if we overrode it.
199 unset_dumpable.reset();
200
201 int fds[kHandshakeSize];
202 fds[kHandshakeMaps] = *maps;
203 fds[kHandshakeMem] = *mem;
204
205 // Send an empty record to transfer fds for /proc/self/maps and
206 // /proc/self/mem.
207 if (sock.Send(kSingleByte, sizeof(kSingleByte), fds, kHandshakeSize) !=
208 sizeof(kSingleByte)) {
209 PERFETTO_DFATAL_OR_ELOG("Failed to send file descriptors.");
210 return nullptr;
211 }
212
213 ClientConfiguration client_config;
214 base::ScopedFile shmem_fd;
215 size_t recv = 0;
216 while (recv < sizeof(client_config)) {
217 size_t num_fds = 0;
218 base::ScopedFile* fd = nullptr;
219 if (!shmem_fd) {
220 num_fds = 1;
221 fd = &shmem_fd;
222 }
223 ssize_t rd = sock.Receive(reinterpret_cast<char*>(&client_config) + recv,
224 sizeof(client_config) - recv, fd, num_fds);
225 if (rd == -1) {
226 PERFETTO_PLOG("Failed to receive ClientConfiguration.");
227 return nullptr;
228 }
229 if (rd == 0) {
230 PERFETTO_LOG("Server disconnected while sending ClientConfiguration.");
231 return nullptr;
232 }
233 recv += static_cast<size_t>(rd);
234 }
235
236 if (!shmem_fd) {
237 PERFETTO_DFATAL_OR_ELOG("Did not receive shmem fd.");
238 return nullptr;
239 }
240
241 auto shmem = SharedRingBuffer::Attach(std::move(shmem_fd));
242 if (!shmem || !shmem->is_valid()) {
243 PERFETTO_DFATAL_OR_ELOG("Failed to attach to shmem.");
244 return nullptr;
245 }
246
247 sock.SetBlocking(false);
248 // note: the shared_ptr will retain a copy of the unhooked_allocator
249 return std::allocate_shared<Client>(unhooked_allocator, std::move(sock),
250 client_config, std::move(shmem.value()),
251 getpid(), GetMainThreadStackRange());
252 }
253
Client(base::UnixSocketRaw sock,ClientConfiguration client_config,SharedRingBuffer shmem,pid_t pid_at_creation,StackRange main_thread_stack_range)254 Client::Client(base::UnixSocketRaw sock,
255 ClientConfiguration client_config,
256 SharedRingBuffer shmem,
257 pid_t pid_at_creation,
258 StackRange main_thread_stack_range)
259 : client_config_(client_config),
260 max_shmem_tries_(GetMaxTries(client_config_)),
261 sock_(std::move(sock)),
262 main_thread_stack_range_(main_thread_stack_range),
263 shmem_(std::move(shmem)),
264 pid_at_creation_(pid_at_creation) {}
265
~Client()266 Client::~Client() {
267 // This is work-around for code like the following:
268 // https://android.googlesource.com/platform/libcore/+/4ecb71f94378716f88703b9f7548b5d24839262f/ojluni/src/main/native/UNIXProcess_md.c#427
269 // They fork, close all fds by iterating over /proc/self/fd using opendir.
270 // Unfortunately closedir calls free, which detects the fork, and then tries
271 // to destruct this Client.
272 //
273 // ScopedResource crashes on failure to close, so we explicitly ignore
274 // failures here.
275 int fd = sock_.ReleaseFd().release();
276 if (fd != -1)
277 close(fd);
278 }
279
GetStackEnd(const char * stackptr)280 const char* Client::GetStackEnd(const char* stackptr) {
281 StackRange thread_stack_range;
282 bool is_main_thread = IsMainThread();
283 if (is_main_thread) {
284 thread_stack_range = main_thread_stack_range_;
285 } else {
286 thread_stack_range = GetThreadStackRange();
287 }
288 if (Contained(thread_stack_range, stackptr)) {
289 return thread_stack_range.end;
290 }
291 StackRange sigalt_stack_range = GetSigAltStackRange();
292 if (Contained(sigalt_stack_range, stackptr)) {
293 return sigalt_stack_range.end;
294 }
295 // The main thread might have expanded since we read its bounds. We now know
296 // it is not the sigaltstack, so it has to be the main stack.
297 // TODO(fmayer): We should reparse maps here, because now we will keep
298 // hitting the slow-path that calls the sigaltstack syscall.
299 if (is_main_thread && stackptr < thread_stack_range.end) {
300 return thread_stack_range.end;
301 }
302 return nullptr;
303 }
304
305 // Best-effort detection of whether we're continuing work in a forked child of
306 // the profiled process, in which case we want to stop. Note that due to
307 // malloc_hooks.cc's atfork handler, the proper fork calls should leak the child
308 // before reaching this point. Therefore this logic exists primarily to handle
309 // clone and vfork.
310 // TODO(rsavitski): rename/delete |disable_fork_teardown| config option if this
311 // logic sticks, as the option becomes more clone-specific, and quite narrow.
IsPostFork()312 bool Client::IsPostFork() {
313 if (PERFETTO_UNLIKELY(getpid() != pid_at_creation_)) {
314 // Only print the message once, even if we do not shut down the client.
315 if (!detected_fork_) {
316 detected_fork_ = true;
317 const char* vfork_detected = "";
318
319 // We use the fact that vfork does not update Bionic's TID cache, so
320 // we will have a mismatch between the actual TID (from the syscall)
321 // and the cached one.
322 //
323 // What we really want to check is if we are sharing virtual memory space
324 // with the original process. This would be
325 // syscall(__NR_kcmp, syscall(__NR_getpid), pid_at_creation_,
326 // KCMP_VM, 0, 0),
327 // but that is not compiled into our kernels and disallowed by seccomp.
328 if (!client_config_.disable_vfork_detection &&
329 syscall(__NR_gettid) != base::GetThreadId()) {
330 postfork_return_value_ = true;
331 vfork_detected = " (vfork detected)";
332 } else {
333 postfork_return_value_ = client_config_.disable_fork_teardown;
334 }
335 const char* action =
336 postfork_return_value_ ? "Not shutting down" : "Shutting down";
337 const char* force =
338 postfork_return_value_ ? " (fork teardown disabled)" : "";
339 PERFETTO_LOG(
340 "Detected post-fork child situation. Not profiling the child. "
341 "%s client%s%s",
342 action, force, vfork_detected);
343 }
344 return true;
345 }
346 return false;
347 }
348
349 // The stack grows towards numerically smaller addresses, so the stack layout
350 // of main calling malloc is as follows.
351 //
352 // +------------+
353 // |SendWireMsg |
354 // stackptr +--> +------------+ 0x1000
355 // |RecordMalloc| +
356 // +------------+ |
357 // | malloc | |
358 // +------------+ |
359 // | main | v
360 // stackend +-> +------------+ 0xffff
RecordMalloc(uint32_t heap_id,uint64_t sample_size,uint64_t alloc_size,uint64_t alloc_address)361 bool Client::RecordMalloc(uint32_t heap_id,
362 uint64_t sample_size,
363 uint64_t alloc_size,
364 uint64_t alloc_address) {
365 if (PERFETTO_UNLIKELY(IsPostFork())) {
366 return postfork_return_value_;
367 }
368
369 AllocMetadata metadata;
370 const char* stackptr = reinterpret_cast<char*>(__builtin_frame_address(0));
371 unwindstack::AsmGetRegs(metadata.register_data);
372 const char* stackend = GetStackEnd(stackptr);
373 if (!stackend) {
374 PERFETTO_ELOG("Failed to find stackend.");
375 shmem_.SetErrorState(SharedRingBuffer::kInvalidStackBounds);
376 return false;
377 }
378 uint64_t stack_size = static_cast<uint64_t>(stackend - stackptr);
379 metadata.sample_size = sample_size;
380 metadata.alloc_size = alloc_size;
381 metadata.alloc_address = alloc_address;
382 metadata.stack_pointer = reinterpret_cast<uint64_t>(stackptr);
383 metadata.arch = unwindstack::Regs::CurrentArch();
384 metadata.sequence_number =
385 1 + sequence_number_[heap_id].fetch_add(1, std::memory_order_acq_rel);
386 metadata.heap_id = heap_id;
387
388 struct timespec ts;
389 if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
390 metadata.clock_monotonic_coarse_timestamp =
391 static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
392 } else {
393 metadata.clock_monotonic_coarse_timestamp = 0;
394 }
395
396 WireMessage msg{};
397 msg.record_type = RecordType::Malloc;
398 msg.alloc_header = &metadata;
399 msg.payload = const_cast<char*>(stackptr);
400 msg.payload_size = static_cast<size_t>(stack_size);
401
402 if (SendWireMessageWithRetriesIfBlocking(msg) == -1)
403 return false;
404
405 if (!shmem_.GetAndResetReaderPaused())
406 return true;
407 return SendControlSocketByte();
408 }
409
SendWireMessageWithRetriesIfBlocking(const WireMessage & msg)410 int64_t Client::SendWireMessageWithRetriesIfBlocking(const WireMessage& msg) {
411 for (uint64_t i = 0;
412 max_shmem_tries_ == kInfiniteTries || i < max_shmem_tries_; ++i) {
413 if (shmem_.shutting_down())
414 return -1;
415 int64_t res = SendWireMessage(&shmem_, msg);
416 if (PERFETTO_LIKELY(res >= 0))
417 return res;
418 // retry if in blocking mode and still connected
419 if (client_config_.block_client && base::IsAgain(errno) && IsConnected()) {
420 usleep(kResendBackoffUs);
421 } else {
422 break;
423 }
424 }
425 if (IsConnected())
426 shmem_.SetErrorState(SharedRingBuffer::kHitTimeout);
427 PERFETTO_PLOG("Failed to write to shared ring buffer. Disconnecting.");
428 return -1;
429 }
430
RecordFree(uint32_t heap_id,const uint64_t alloc_address)431 bool Client::RecordFree(uint32_t heap_id, const uint64_t alloc_address) {
432 if (PERFETTO_UNLIKELY(IsPostFork())) {
433 return postfork_return_value_;
434 }
435
436 FreeEntry current_entry;
437 current_entry.sequence_number =
438 1 + sequence_number_[heap_id].fetch_add(1, std::memory_order_acq_rel);
439 current_entry.addr = alloc_address;
440 current_entry.heap_id = heap_id;
441 WireMessage msg = {};
442 msg.record_type = RecordType::Free;
443 msg.free_header = ¤t_entry;
444 // Do not send control socket byte, as frees are very cheap to handle, so we
445 // just delay to the next alloc. Sending the control socket byte is ~10x the
446 // rest of the client overhead.
447 int64_t bytes_free = SendWireMessageWithRetriesIfBlocking(msg);
448 if (bytes_free == -1)
449 return false;
450 // Seems like we are filling up the shmem with frees. Flush.
451 if (static_cast<uint64_t>(bytes_free) < shmem_.size() / 2 &&
452 shmem_.GetAndResetReaderPaused()) {
453 return SendControlSocketByte();
454 }
455 return true;
456 }
457
RecordHeapInfo(uint32_t heap_id,const char * heap_name,uint64_t interval)458 bool Client::RecordHeapInfo(uint32_t heap_id,
459 const char* heap_name,
460 uint64_t interval) {
461 if (PERFETTO_UNLIKELY(IsPostFork())) {
462 return postfork_return_value_;
463 }
464
465 HeapName hnr;
466 hnr.heap_id = heap_id;
467 base::StringCopy(&hnr.heap_name[0], heap_name, sizeof(hnr.heap_name));
468 hnr.sample_interval = interval;
469
470 WireMessage msg = {};
471 msg.record_type = RecordType::HeapName;
472 msg.heap_name_header = &hnr;
473 return SendWireMessageWithRetriesIfBlocking(msg);
474 }
475
IsConnected()476 bool Client::IsConnected() {
477 sock_.DcheckIsBlocking(false);
478 char buf[1];
479 ssize_t recv_bytes = sock_.Receive(buf, sizeof(buf), nullptr, 0);
480 if (recv_bytes == 0)
481 return false;
482 // This is not supposed to happen because currently heapprofd does not send
483 // data to the client. Here for generality's sake.
484 if (recv_bytes > 0)
485 return true;
486 return base::IsAgain(errno);
487 }
488
SendControlSocketByte()489 bool Client::SendControlSocketByte() {
490 // If base::IsAgain(errno), the socket buffer is full, so the service will
491 // pick up the notification even without adding another byte.
492 // In other error cases (usually EPIPE) we want to disconnect, because that
493 // is how the service signals the tracing session was torn down.
494 if (sock_.Send(kSingleByte, sizeof(kSingleByte)) == -1 &&
495 !base::IsAgain(errno)) {
496 if (shmem_.shutting_down()) {
497 PERFETTO_LOG("Profiling session ended.");
498 } else {
499 PERFETTO_PLOG("Failed to send control socket byte.");
500 }
501 return false;
502 }
503 return true;
504 }
505
506 } // namespace profiling
507 } // namespace perfetto
508