1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/profiling/memory/client.h"
18
19 #include <inttypes.h>
20 #include <sys/prctl.h>
21 #include <sys/syscall.h>
22 #include <sys/types.h>
23 #include <unistd.h>
24 #include <unwindstack/MachineArm.h>
25 #include <unwindstack/MachineArm64.h>
26 #include <unwindstack/MachineMips.h>
27 #include <unwindstack/MachineMips64.h>
28 #include <unwindstack/MachineX86.h>
29 #include <unwindstack/MachineX86_64.h>
30 #include <unwindstack/Regs.h>
31 #include <unwindstack/RegsGetLocal.h>
32
33 #include <atomic>
34 #include <new>
35
36 #include "perfetto/base/logging.h"
37 #include "perfetto/base/scoped_file.h"
38 #include "perfetto/base/thread_utils.h"
39 #include "perfetto/base/time.h"
40 #include "perfetto/base/unix_socket.h"
41 #include "perfetto/base/utils.h"
42 #include "src/profiling/memory/sampler.h"
43 #include "src/profiling/memory/scoped_spinlock.h"
44 #include "src/profiling/memory/wire_protocol.h"
45
46 namespace perfetto {
47 namespace profiling {
48 namespace {
49
50 const char kSingleByte[1] = {'x'};
51 constexpr std::chrono::seconds kLockTimeout{1};
52 constexpr auto kResendBackoffUs = 100;
53
IsMainThread()54 inline bool IsMainThread() {
55 return getpid() == base::GetThreadId();
56 }
57
58 // TODO(b/117203899): Remove this after making bionic implementation safe to
59 // use.
FindMainThreadStack()60 char* FindMainThreadStack() {
61 base::ScopedFstream maps(fopen("/proc/self/maps", "r"));
62 if (!maps) {
63 return nullptr;
64 }
65 while (!feof(*maps)) {
66 char line[1024];
67 char* data = fgets(line, sizeof(line), *maps);
68 if (data != nullptr && strstr(data, "[stack]")) {
69 char* sep = strstr(data, "-");
70 if (sep == nullptr)
71 continue;
72 sep++;
73 return reinterpret_cast<char*>(strtoll(sep, nullptr, 16));
74 }
75 }
76 return nullptr;
77 }
78
UnsetDumpable(int)79 int UnsetDumpable(int) {
80 prctl(PR_SET_DUMPABLE, 0);
81 return 0;
82 }
83
84 } // namespace
85
GetThreadStackBase()86 const char* GetThreadStackBase() {
87 pthread_attr_t attr;
88 if (pthread_getattr_np(pthread_self(), &attr) != 0)
89 return nullptr;
90 base::ScopedResource<pthread_attr_t*, pthread_attr_destroy, nullptr> cleanup(
91 &attr);
92
93 char* stackaddr;
94 size_t stacksize;
95 if (pthread_attr_getstack(&attr, reinterpret_cast<void**>(&stackaddr),
96 &stacksize) != 0)
97 return nullptr;
98 return stackaddr + stacksize;
99 }
100
101 // static
ConnectToHeapprofd(const std::string & sock_name)102 base::Optional<base::UnixSocketRaw> Client::ConnectToHeapprofd(
103 const std::string& sock_name) {
104 auto sock = base::UnixSocketRaw::CreateMayFail(base::SockType::kStream);
105 if (!sock || !sock.Connect(sock_name)) {
106 PERFETTO_PLOG("Failed to connect to %s", sock_name.c_str());
107 return base::nullopt;
108 }
109 if (!sock.SetTxTimeout(kClientSockTimeoutMs)) {
110 PERFETTO_PLOG("Failed to set send timeout for %s", sock_name.c_str());
111 return base::nullopt;
112 }
113 if (!sock.SetRxTimeout(kClientSockTimeoutMs)) {
114 PERFETTO_PLOG("Failed to set receive timeout for %s", sock_name.c_str());
115 return base::nullopt;
116 }
117 return std::move(sock);
118 }
119
120 // static
CreateAndHandshake(base::UnixSocketRaw sock,UnhookedAllocator<Client> unhooked_allocator)121 std::shared_ptr<Client> Client::CreateAndHandshake(
122 base::UnixSocketRaw sock,
123 UnhookedAllocator<Client> unhooked_allocator) {
124 if (!sock) {
125 PERFETTO_DFATAL("Socket not connected.");
126 return nullptr;
127 }
128
129 PERFETTO_DCHECK(sock.IsBlocking());
130
131 // We might be running in a process that is not dumpable (such as app
132 // processes on user builds), in which case the /proc/self/mem will be chown'd
133 // to root:root, and will not be accessible even to the process itself (see
134 // man 5 proc). In such situations, temporarily mark the process dumpable to
135 // be able to open the files, unsetting dumpability immediately afterwards.
136 int orig_dumpable = prctl(PR_GET_DUMPABLE);
137
138 enum { kNop, kDoUnset };
139 base::ScopedResource<int, UnsetDumpable, kNop, false> unset_dumpable(kNop);
140 if (orig_dumpable == 0) {
141 unset_dumpable.reset(kDoUnset);
142 prctl(PR_SET_DUMPABLE, 1);
143 }
144
145 base::ScopedFile maps(base::OpenFile("/proc/self/maps", O_RDONLY));
146 if (!maps) {
147 PERFETTO_DFATAL("Failed to open /proc/self/maps");
148 return nullptr;
149 }
150 base::ScopedFile mem(base::OpenFile("/proc/self/mem", O_RDONLY));
151 if (!mem) {
152 PERFETTO_DFATAL("Failed to open /proc/self/mem");
153 return nullptr;
154 }
155 // Restore original dumpability value if we overrode it.
156 unset_dumpable.reset();
157
158 int fds[kHandshakeSize];
159 fds[kHandshakeMaps] = *maps;
160 fds[kHandshakeMem] = *mem;
161
162 // Send an empty record to transfer fds for /proc/self/maps and
163 // /proc/self/mem.
164 if (sock.Send(kSingleByte, sizeof(kSingleByte), fds, kHandshakeSize) !=
165 sizeof(kSingleByte)) {
166 PERFETTO_DFATAL("Failed to send file descriptors.");
167 return nullptr;
168 }
169
170 ClientConfiguration client_config;
171 base::ScopedFile shmem_fd;
172 size_t recv = 0;
173 while (recv < sizeof(client_config)) {
174 size_t num_fds = 0;
175 base::ScopedFile* fd = nullptr;
176 if (!shmem_fd) {
177 num_fds = 1;
178 fd = &shmem_fd;
179 }
180 ssize_t rd = sock.Receive(reinterpret_cast<char*>(&client_config) + recv,
181 sizeof(client_config) - recv, fd, num_fds);
182 if (rd == -1) {
183 PERFETTO_PLOG("Failed to receive ClientConfiguration.");
184 return nullptr;
185 }
186 if (rd == 0) {
187 PERFETTO_LOG("Server disconnected while sending ClientConfiguration.");
188 return nullptr;
189 }
190 recv += static_cast<size_t>(rd);
191 }
192
193 if (!shmem_fd) {
194 PERFETTO_DFATAL("Did not receive shmem fd.");
195 return nullptr;
196 }
197
198 auto shmem = SharedRingBuffer::Attach(std::move(shmem_fd));
199 if (!shmem || !shmem->is_valid()) {
200 PERFETTO_DFATAL("Failed to attach to shmem.");
201 return nullptr;
202 }
203
204 PERFETTO_DCHECK(client_config.interval >= 1);
205 // TODO(fmayer): Always make this nonblocking.
206 // This is so that without block_client, we get the old behaviour that rate
207 // limits using the blocking socket. We do not want to change that for Q.
208 sock.SetBlocking(!client_config.block_client);
209 Sampler sampler{client_config.interval};
210 // note: the shared_ptr will retain a copy of the unhooked_allocator
211 return std::allocate_shared<Client>(unhooked_allocator, std::move(sock),
212 client_config, std::move(shmem.value()),
213 std::move(sampler), getpid(),
214 FindMainThreadStack());
215 }
216
Client(base::UnixSocketRaw sock,ClientConfiguration client_config,SharedRingBuffer shmem,Sampler sampler,pid_t pid_at_creation,const char * main_thread_stack_base)217 Client::Client(base::UnixSocketRaw sock,
218 ClientConfiguration client_config,
219 SharedRingBuffer shmem,
220 Sampler sampler,
221 pid_t pid_at_creation,
222 const char* main_thread_stack_base)
223 : client_config_(client_config),
224 sampler_(std::move(sampler)),
225 sock_(std::move(sock)),
226 main_thread_stack_base_(main_thread_stack_base),
227 shmem_(std::move(shmem)),
228 pid_at_creation_(pid_at_creation) {}
229
GetStackBase()230 const char* Client::GetStackBase() {
231 if (IsMainThread()) {
232 if (!main_thread_stack_base_)
233 // Because pthread_attr_getstack reads and parses /proc/self/maps and
234 // /proc/self/stat, we have to cache the result here.
235 main_thread_stack_base_ = GetThreadStackBase();
236 return main_thread_stack_base_;
237 }
238 return GetThreadStackBase();
239 }
240
241 // The stack grows towards numerically smaller addresses, so the stack layout
242 // of main calling malloc is as follows.
243 //
244 // +------------+
245 // |SendWireMsg |
246 // stacktop +--> +------------+ 0x1000
247 // |RecordMalloc| +
248 // +------------+ |
249 // | malloc | |
250 // +------------+ |
251 // | main | v
252 // stackbase +-> +------------+ 0xffff
RecordMalloc(uint64_t alloc_size,uint64_t total_size,uint64_t alloc_address)253 bool Client::RecordMalloc(uint64_t alloc_size,
254 uint64_t total_size,
255 uint64_t alloc_address) {
256 if (PERFETTO_UNLIKELY(getpid() != pid_at_creation_)) {
257 PERFETTO_LOG("Detected post-fork child situation, stopping profiling.");
258 return false;
259 }
260
261 AllocMetadata metadata;
262 const char* stackbase = GetStackBase();
263 const char* stacktop = reinterpret_cast<char*>(__builtin_frame_address(0));
264 unwindstack::AsmGetRegs(metadata.register_data);
265
266 if (stackbase < stacktop) {
267 PERFETTO_DFATAL("Stackbase >= stacktop.");
268 return false;
269 }
270
271 uint64_t stack_size = static_cast<uint64_t>(stackbase - stacktop);
272 metadata.total_size = total_size;
273 metadata.alloc_size = alloc_size;
274 metadata.alloc_address = alloc_address;
275 metadata.stack_pointer = reinterpret_cast<uint64_t>(stacktop);
276 metadata.stack_pointer_offset = sizeof(AllocMetadata);
277 metadata.arch = unwindstack::Regs::CurrentArch();
278 metadata.sequence_number =
279 1 + sequence_number_.fetch_add(1, std::memory_order_acq_rel);
280
281 struct timespec ts;
282 if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
283 metadata.clock_monotonic_coarse_timestamp =
284 static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
285 } else {
286 metadata.clock_monotonic_coarse_timestamp = 0;
287 }
288
289 WireMessage msg{};
290 msg.record_type = RecordType::Malloc;
291 msg.alloc_header = &metadata;
292 msg.payload = const_cast<char*>(stacktop);
293 msg.payload_size = static_cast<size_t>(stack_size);
294
295 if (!SendWireMessageWithRetriesIfBlocking(msg))
296 return false;
297
298 return SendControlSocketByte();
299 }
300
SendWireMessageWithRetriesIfBlocking(const WireMessage & msg)301 bool Client::SendWireMessageWithRetriesIfBlocking(const WireMessage& msg) {
302 for (;;) {
303 if (PERFETTO_LIKELY(SendWireMessage(&shmem_, msg)))
304 return true;
305 // retry if in blocking mode and still connected
306 if (client_config_.block_client && base::IsAgain(errno) && IsConnected()) {
307 usleep(kResendBackoffUs);
308 continue;
309 }
310 PERFETTO_PLOG("Failed to write to shared ring buffer. Disconnecting.");
311 return false;
312 }
313 }
314
RecordFree(const uint64_t alloc_address)315 bool Client::RecordFree(const uint64_t alloc_address) {
316 uint64_t sequence_number =
317 1 + sequence_number_.fetch_add(1, std::memory_order_acq_rel);
318
319 std::unique_lock<std::timed_mutex> l(free_batch_lock_, kLockTimeout);
320 if (!l.owns_lock())
321 return false;
322 if (free_batch_.num_entries == kFreeBatchSize) {
323 if (!FlushFreesLocked())
324 return false;
325 // Flushed the contents of the buffer, reset it for reuse.
326 free_batch_.num_entries = 0;
327 }
328 FreeBatchEntry& current_entry =
329 free_batch_.entries[free_batch_.num_entries++];
330 current_entry.sequence_number = sequence_number;
331 current_entry.addr = alloc_address;
332 return true;
333 }
334
FlushFreesLocked()335 bool Client::FlushFreesLocked() {
336 if (PERFETTO_UNLIKELY(getpid() != pid_at_creation_)) {
337 PERFETTO_LOG("Detected post-fork child situation, stopping profiling.");
338 return false;
339 }
340
341 WireMessage msg = {};
342 msg.record_type = RecordType::Free;
343 msg.free_header = &free_batch_;
344 struct timespec ts;
345 if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
346 free_batch_.clock_monotonic_coarse_timestamp =
347 static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
348 } else {
349 free_batch_.clock_monotonic_coarse_timestamp = 0;
350 }
351
352 if (!SendWireMessageWithRetriesIfBlocking(msg))
353 return false;
354 return SendControlSocketByte();
355 }
356
IsConnected()357 bool Client::IsConnected() {
358 PERFETTO_DCHECK(!sock_.IsBlocking());
359 char buf[1];
360 ssize_t recv_bytes = sock_.Receive(buf, sizeof(buf), nullptr, 0);
361 if (recv_bytes == 0)
362 return false;
363 // This is not supposed to happen because currently heapprofd does not send
364 // data to the client. Here for generality's sake.
365 if (recv_bytes > 0)
366 return true;
367 return base::IsAgain(errno);
368 }
369
SendControlSocketByte()370 bool Client::SendControlSocketByte() {
371 // TODO(fmayer): Fix the special casing that only block_client uses a
372 // nonblocking socket.
373 if (sock_.Send(kSingleByte, sizeof(kSingleByte)) == -1 &&
374 (!client_config_.block_client || !base::IsAgain(errno))) {
375 PERFETTO_PLOG("Failed to send control socket byte.");
376 return false;
377 }
378 return true;
379 }
380
381 } // namespace profiling
382 } // namespace perfetto
383