1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/profiling/memory/unwinding.h"
18 
19 #include <sys/types.h>
20 #include <unistd.h>
21 
22 #include <unwindstack/MachineArm.h>
23 #include <unwindstack/MachineArm64.h>
24 #include <unwindstack/MachineMips.h>
25 #include <unwindstack/MachineMips64.h>
26 #include <unwindstack/MachineX86.h>
27 #include <unwindstack/MachineX86_64.h>
28 #include <unwindstack/Maps.h>
29 #include <unwindstack/Memory.h>
30 #include <unwindstack/Regs.h>
31 #include <unwindstack/RegsArm.h>
32 #include <unwindstack/RegsArm64.h>
33 #include <unwindstack/RegsMips.h>
34 #include <unwindstack/RegsMips64.h>
35 #include <unwindstack/RegsX86.h>
36 #include <unwindstack/RegsX86_64.h>
37 #include <unwindstack/Unwinder.h>
38 #include <unwindstack/UserArm.h>
39 #include <unwindstack/UserArm64.h>
40 #include <unwindstack/UserMips.h>
41 #include <unwindstack/UserMips64.h>
42 #include <unwindstack/UserX86.h>
43 #include <unwindstack/UserX86_64.h>
44 
45 #include <procinfo/process_map.h>
46 
47 #include "perfetto/base/logging.h"
48 #include "perfetto/base/task_runner.h"
49 #include "perfetto/ext/base/file_utils.h"
50 #include "perfetto/ext/base/scoped_file.h"
51 #include "perfetto/ext/base/string_utils.h"
52 #include "perfetto/ext/base/thread_task_runner.h"
53 
54 #include "src/profiling/memory/unwound_messages.h"
55 #include "src/profiling/memory/wire_protocol.h"
56 
57 namespace perfetto {
58 namespace profiling {
59 namespace {
60 
61 constexpr base::TimeMillis kMapsReparseInterval{500};
62 constexpr uint32_t kRetryDelayMs = 100;
63 
64 constexpr size_t kMaxFrames = 500;
65 
66 // We assume average ~300us per unwind. If we handle up to 1000 unwinds, this
67 // makes sure other tasks get to be run at least every 300ms if the unwinding
68 // saturates this thread.
69 constexpr size_t kUnwindBatchSize = 1000;
70 constexpr size_t kRecordBatchSize = 1024;
71 constexpr size_t kMaxAllocRecordArenaSize = 2 * kRecordBatchSize;
72 
73 #pragma GCC diagnostic push
74 // We do not care about deterministic destructor order.
75 #pragma GCC diagnostic ignored "-Wglobal-constructors"
76 #pragma GCC diagnostic ignored "-Wexit-time-destructors"
77 static std::vector<std::string> kSkipMaps{"heapprofd_client.so",
78                                           "heapprofd_client_api.so"};
79 #pragma GCC diagnostic pop
80 
GetRegsSize(unwindstack::Regs * regs)81 size_t GetRegsSize(unwindstack::Regs* regs) {
82   if (regs->Is32Bit())
83     return sizeof(uint32_t) * regs->total_regs();
84   return sizeof(uint64_t) * regs->total_regs();
85 }
86 
ReadFromRawData(unwindstack::Regs * regs,void * raw_data)87 void ReadFromRawData(unwindstack::Regs* regs, void* raw_data) {
88   memcpy(regs->RawData(), raw_data, GetRegsSize(regs));
89 }
90 
91 }  // namespace
92 
CreateRegsFromRawData(unwindstack::ArchEnum arch,void * raw_data)93 std::unique_ptr<unwindstack::Regs> CreateRegsFromRawData(
94     unwindstack::ArchEnum arch,
95     void* raw_data) {
96   std::unique_ptr<unwindstack::Regs> ret;
97   switch (arch) {
98     case unwindstack::ARCH_X86:
99       ret.reset(new unwindstack::RegsX86());
100       break;
101     case unwindstack::ARCH_X86_64:
102       ret.reset(new unwindstack::RegsX86_64());
103       break;
104     case unwindstack::ARCH_ARM:
105       ret.reset(new unwindstack::RegsArm());
106       break;
107     case unwindstack::ARCH_ARM64:
108       ret.reset(new unwindstack::RegsArm64());
109       break;
110     case unwindstack::ARCH_MIPS:
111       ret.reset(new unwindstack::RegsMips());
112       break;
113     case unwindstack::ARCH_MIPS64:
114       ret.reset(new unwindstack::RegsMips64());
115       break;
116     case unwindstack::ARCH_UNKNOWN:
117       break;
118   }
119   if (ret)
120     ReadFromRawData(ret.get(), raw_data);
121   return ret;
122 }
123 
DoUnwind(WireMessage * msg,UnwindingMetadata * metadata,AllocRecord * out)124 bool DoUnwind(WireMessage* msg, UnwindingMetadata* metadata, AllocRecord* out) {
125   AllocMetadata* alloc_metadata = msg->alloc_header;
126   std::unique_ptr<unwindstack::Regs> regs(CreateRegsFromRawData(
127       alloc_metadata->arch, alloc_metadata->register_data));
128   if (regs == nullptr) {
129     PERFETTO_DLOG("Unable to construct unwindstack::Regs");
130     unwindstack::FrameData frame_data{};
131     frame_data.function_name = "ERROR READING REGISTERS";
132 
133     out->frames.clear();
134     out->build_ids.clear();
135     out->frames.emplace_back(std::move(frame_data));
136     out->build_ids.emplace_back("");
137     out->error = true;
138     return false;
139   }
140   uint8_t* stack = reinterpret_cast<uint8_t*>(msg->payload);
141   std::shared_ptr<unwindstack::Memory> mems =
142       std::make_shared<StackOverlayMemory>(metadata->fd_mem,
143                                            alloc_metadata->stack_pointer, stack,
144                                            msg->payload_size);
145 
146   unwindstack::Unwinder unwinder(kMaxFrames, &metadata->fd_maps, regs.get(),
147                                  mems);
148 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
149   unwinder.SetJitDebug(metadata->GetJitDebug(regs->Arch()));
150   unwinder.SetDexFiles(metadata->GetDexFiles(regs->Arch()));
151 #endif
152   // Suppress incorrect "variable may be uninitialized" error for if condition
153   // after this loop. error_code = LastErrorCode gets run at least once.
154   unwindstack::ErrorCode error_code = unwindstack::ERROR_NONE;
155   for (int attempt = 0; attempt < 2; ++attempt) {
156     if (attempt > 0) {
157       if (metadata->last_maps_reparse_time + kMapsReparseInterval >
158           base::GetWallTimeMs()) {
159         PERFETTO_DLOG("Skipping reparse due to rate limit.");
160         break;
161       }
162       PERFETTO_DLOG("Reparsing maps");
163       metadata->ReparseMaps();
164       metadata->last_maps_reparse_time = base::GetWallTimeMs();
165       // Regs got invalidated by libuwindstack's speculative jump.
166       // Reset.
167       ReadFromRawData(regs.get(), alloc_metadata->register_data);
168       out->reparsed_map = true;
169 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
170       unwinder.SetJitDebug(metadata->GetJitDebug(regs->Arch()));
171       unwinder.SetDexFiles(metadata->GetDexFiles(regs->Arch()));
172 #endif
173     }
174     out->frames.swap(unwinder.frames());  // Provide the unwinder buffer to use.
175     unwinder.Unwind(&kSkipMaps, /*map_suffixes_to_ignore=*/nullptr);
176     out->frames.swap(unwinder.frames());  // Take the buffer back.
177     error_code = unwinder.LastErrorCode();
178     if (error_code != unwindstack::ERROR_INVALID_MAP &&
179         (unwinder.warnings() & unwindstack::WARNING_DEX_PC_NOT_IN_MAP) == 0) {
180       break;
181     }
182   }
183   out->build_ids.resize(out->frames.size());
184   for (size_t i = 0; i < out->frames.size(); ++i) {
185     out->build_ids[i] = metadata->GetBuildId(out->frames[i]);
186   }
187 
188   if (error_code != unwindstack::ERROR_NONE) {
189     PERFETTO_DLOG("Unwinding error %" PRIu8, error_code);
190     unwindstack::FrameData frame_data{};
191     frame_data.function_name =
192         "ERROR " + StringifyLibUnwindstackError(error_code);
193 
194     out->frames.emplace_back(std::move(frame_data));
195     out->build_ids.emplace_back("");
196     out->error = true;
197   }
198   return true;
199 }
200 
~UnwindingWorker()201 UnwindingWorker::~UnwindingWorker() {
202   if (thread_task_runner_.get() == nullptr) {
203     return;
204   }
205   std::mutex mutex;
206   std::condition_variable cv;
207 
208   std::unique_lock<std::mutex> lock(mutex);
209   bool done = false;
210   thread_task_runner_.PostTask([&mutex, &cv, &done, this] {
211     for (auto& it : client_data_) {
212       auto& client_data = it.second;
213       client_data.sock->Shutdown(false);
214     }
215     client_data_.clear();
216 
217     std::lock_guard<std::mutex> inner_lock(mutex);
218     done = true;
219     cv.notify_one();
220   });
221   cv.wait(lock, [&done] { return done; });
222 }
223 
OnDisconnect(base::UnixSocket * self)224 void UnwindingWorker::OnDisconnect(base::UnixSocket* self) {
225   pid_t peer_pid = self->peer_pid_linux();
226   auto it = client_data_.find(peer_pid);
227   if (it == client_data_.end()) {
228     PERFETTO_DFATAL_OR_ELOG("Disconnected unexpected socket.");
229     return;
230   }
231 
232   ClientData& client_data = it->second;
233   ReadAndUnwindBatch(&client_data);
234   SharedRingBuffer& shmem = client_data.shmem;
235 
236   if (!client_data.free_records.empty()) {
237     delegate_->PostFreeRecord(this, std::move(client_data.free_records));
238     client_data.free_records.clear();
239   }
240 
241   SharedRingBuffer::Stats stats = {};
242   {
243     auto lock = shmem.AcquireLock(ScopedSpinlock::Mode::Try);
244     if (lock.locked())
245       stats = shmem.GetStats(lock);
246     else
247       PERFETTO_ELOG("Failed to log shmem to get stats.");
248   }
249   DataSourceInstanceID ds_id = client_data.data_source_instance_id;
250 
251   client_data_.erase(it);
252   // The erase invalidates the self pointer.
253   self = nullptr;
254   if (client_data_.empty()) {
255     // We got rid of the last client. Flush and destruct AllocRecords in
256     // arena. Disable the arena (will not accept returning borrowed records)
257     // in case there are pending AllocRecords on the main thread.
258     alloc_record_arena_.Disable();
259   }
260   delegate_->PostSocketDisconnected(this, ds_id, peer_pid, stats);
261 }
262 
OnDataAvailable(base::UnixSocket * self)263 void UnwindingWorker::OnDataAvailable(base::UnixSocket* self) {
264   // Drain buffer to clear the notification.
265   char recv_buf[kUnwindBatchSize];
266   self->Receive(recv_buf, sizeof(recv_buf));
267   BatchUnwindJob(self->peer_pid_linux());
268 }
269 
ReadAndUnwindBatch(ClientData * client_data)270 UnwindingWorker::ReadAndUnwindBatchResult UnwindingWorker::ReadAndUnwindBatch(
271     ClientData* client_data) {
272   SharedRingBuffer& shmem = client_data->shmem;
273   SharedRingBuffer::Buffer buf;
274 
275   size_t i;
276   for (i = 0; i < kUnwindBatchSize; ++i) {
277     uint64_t reparses_before = client_data->metadata.reparses;
278     buf = shmem.BeginRead();
279     if (!buf)
280       break;
281     HandleBuffer(this, &alloc_record_arena_, buf, client_data,
282                  client_data->sock->peer_pid_linux(), delegate_);
283     shmem.EndRead(std::move(buf));
284     // Reparsing takes time, so process the rest in a new batch to avoid timing
285     // out.
286     if (reparses_before < client_data->metadata.reparses) {
287       return ReadAndUnwindBatchResult::kHasMore;
288     }
289   }
290 
291   if (i == kUnwindBatchSize) {
292     return ReadAndUnwindBatchResult::kHasMore;
293   } else if (i > 0) {
294     return ReadAndUnwindBatchResult::kReadSome;
295   } else {
296     return ReadAndUnwindBatchResult::kReadNone;
297   }
298 }
299 
BatchUnwindJob(pid_t peer_pid)300 void UnwindingWorker::BatchUnwindJob(pid_t peer_pid) {
301   auto it = client_data_.find(peer_pid);
302   if (it == client_data_.end()) {
303     // This can happen if the client disconnected before the buffer was fully
304     // handled.
305     PERFETTO_DLOG("Unexpected data.");
306     return;
307   }
308 
309   bool job_reposted = false;
310   bool reader_paused = false;
311   ClientData& client_data = it->second;
312   switch (ReadAndUnwindBatch(&client_data)) {
313     case ReadAndUnwindBatchResult::kHasMore:
314       thread_task_runner_.get()->PostTask(
315           [this, peer_pid] { BatchUnwindJob(peer_pid); });
316       job_reposted = true;
317       break;
318     case ReadAndUnwindBatchResult::kReadSome:
319       thread_task_runner_.get()->PostDelayedTask(
320           [this, peer_pid] { BatchUnwindJob(peer_pid); }, kRetryDelayMs);
321       job_reposted = true;
322       break;
323     case ReadAndUnwindBatchResult::kReadNone:
324       client_data.shmem.SetReaderPaused();
325       reader_paused = true;
326       break;
327   }
328 
329   // We need to either repost the job, or set the reader paused bit. By
330   // setting that bit, we inform the client that we want to be notified when
331   // new data is written to the shared memory buffer.
332   // If we do neither of these things, we will not read from the shared memory
333   // buffer again.
334   PERFETTO_CHECK(job_reposted || reader_paused);
335 }
336 
337 // static
HandleBuffer(UnwindingWorker * self,AllocRecordArena * alloc_record_arena,const SharedRingBuffer::Buffer & buf,ClientData * client_data,pid_t peer_pid,Delegate * delegate)338 void UnwindingWorker::HandleBuffer(UnwindingWorker* self,
339                                    AllocRecordArena* alloc_record_arena,
340                                    const SharedRingBuffer::Buffer& buf,
341                                    ClientData* client_data,
342                                    pid_t peer_pid,
343                                    Delegate* delegate) {
344   UnwindingMetadata* unwinding_metadata = &client_data->metadata;
345   DataSourceInstanceID data_source_instance_id =
346       client_data->data_source_instance_id;
347   WireMessage msg;
348   // TODO(fmayer): standardise on char* or uint8_t*.
349   // char* has stronger guarantees regarding aliasing.
350   // see https://timsong-cpp.github.io/cppwp/n3337/basic.lval#10.8
351   if (!ReceiveWireMessage(reinterpret_cast<char*>(buf.data), buf.size, &msg)) {
352     PERFETTO_DFATAL_OR_ELOG("Failed to receive wire message.");
353     return;
354   }
355 
356   if (msg.record_type == RecordType::Malloc) {
357     std::unique_ptr<AllocRecord> rec = alloc_record_arena->BorrowAllocRecord();
358     rec->alloc_metadata = *msg.alloc_header;
359     rec->pid = peer_pid;
360     rec->data_source_instance_id = data_source_instance_id;
361     auto start_time_us = base::GetWallTimeNs() / 1000;
362     if (!client_data->stream_allocations)
363       DoUnwind(&msg, unwinding_metadata, rec.get());
364     rec->unwinding_time_us = static_cast<uint64_t>(
365         ((base::GetWallTimeNs() / 1000) - start_time_us).count());
366     delegate->PostAllocRecord(self, std::move(rec));
367   } else if (msg.record_type == RecordType::Free) {
368     FreeRecord rec;
369     rec.pid = peer_pid;
370     rec.data_source_instance_id = data_source_instance_id;
371     // We need to copy this, so we can return the memory to the shmem buffer.
372     memcpy(&rec.entry, msg.free_header, sizeof(*msg.free_header));
373     client_data->free_records.emplace_back(std::move(rec));
374     if (client_data->free_records.size() == kRecordBatchSize) {
375       delegate->PostFreeRecord(self, std::move(client_data->free_records));
376       client_data->free_records.clear();
377       client_data->free_records.reserve(kRecordBatchSize);
378     }
379   } else if (msg.record_type == RecordType::HeapName) {
380     HeapNameRecord rec;
381     rec.pid = peer_pid;
382     rec.data_source_instance_id = data_source_instance_id;
383     memcpy(&rec.entry, msg.heap_name_header, sizeof(*msg.heap_name_header));
384     rec.entry.heap_name[sizeof(rec.entry.heap_name) - 1] = '\0';
385     delegate->PostHeapNameRecord(self, std::move(rec));
386   } else {
387     PERFETTO_DFATAL_OR_ELOG("Invalid record type.");
388   }
389 }
390 
PostHandoffSocket(HandoffData handoff_data)391 void UnwindingWorker::PostHandoffSocket(HandoffData handoff_data) {
392   // Even with C++14, this cannot be moved, as std::function has to be
393   // copyable, which HandoffData is not.
394   HandoffData* raw_data = new HandoffData(std::move(handoff_data));
395   // We do not need to use a WeakPtr here because the task runner will not
396   // outlive its UnwindingWorker.
397   thread_task_runner_.get()->PostTask([this, raw_data] {
398     HandoffData data = std::move(*raw_data);
399     delete raw_data;
400     HandleHandoffSocket(std::move(data));
401   });
402 }
403 
HandleHandoffSocket(HandoffData handoff_data)404 void UnwindingWorker::HandleHandoffSocket(HandoffData handoff_data) {
405   auto sock = base::UnixSocket::AdoptConnected(
406       handoff_data.sock.ReleaseFd(), this, this->thread_task_runner_.get(),
407       base::SockFamily::kUnix, base::SockType::kStream);
408   pid_t peer_pid = sock->peer_pid_linux();
409 
410   UnwindingMetadata metadata(std::move(handoff_data.maps_fd),
411                              std::move(handoff_data.mem_fd));
412   ClientData client_data{
413       handoff_data.data_source_instance_id,
414       std::move(sock),
415       std::move(metadata),
416       std::move(handoff_data.shmem),
417       std::move(handoff_data.client_config),
418       handoff_data.stream_allocations,
419       {},
420   };
421   client_data.free_records.reserve(kRecordBatchSize);
422   client_data.shmem.SetReaderPaused();
423   client_data_.emplace(peer_pid, std::move(client_data));
424   alloc_record_arena_.Enable();
425 }
426 
PostDisconnectSocket(pid_t pid)427 void UnwindingWorker::PostDisconnectSocket(pid_t pid) {
428   // We do not need to use a WeakPtr here because the task runner will not
429   // outlive its UnwindingWorker.
430   thread_task_runner_.get()->PostTask(
431       [this, pid] { HandleDisconnectSocket(pid); });
432 }
433 
HandleDisconnectSocket(pid_t pid)434 void UnwindingWorker::HandleDisconnectSocket(pid_t pid) {
435   auto it = client_data_.find(pid);
436   if (it == client_data_.end()) {
437     // This is expected if the client voluntarily disconnects before the
438     // profiling session ended. In that case, there is a race between the main
439     // thread learning about the disconnect and it calling back here.
440     return;
441   }
442   ClientData& client_data = it->second;
443   // Shutdown and call OnDisconnect handler.
444   client_data.shmem.SetShuttingDown();
445   client_data.sock->Shutdown(/* notify= */ true);
446 }
447 
BorrowAllocRecord()448 std::unique_ptr<AllocRecord> AllocRecordArena::BorrowAllocRecord() {
449   std::lock_guard<std::mutex> l(*alloc_records_mutex_);
450   if (!alloc_records_.empty()) {
451     std::unique_ptr<AllocRecord> result = std::move(alloc_records_.back());
452     alloc_records_.pop_back();
453     return result;
454   }
455   return std::unique_ptr<AllocRecord>(new AllocRecord());
456 }
457 
ReturnAllocRecord(std::unique_ptr<AllocRecord> record)458 void AllocRecordArena::ReturnAllocRecord(std::unique_ptr<AllocRecord> record) {
459   std::lock_guard<std::mutex> l(*alloc_records_mutex_);
460   if (enabled_ && record && alloc_records_.size() < kMaxAllocRecordArenaSize)
461     alloc_records_.emplace_back(std::move(record));
462 }
463 
Disable()464 void AllocRecordArena::Disable() {
465   std::lock_guard<std::mutex> l(*alloc_records_mutex_);
466   alloc_records_.clear();
467   enabled_ = false;
468 }
469 
Enable()470 void AllocRecordArena::Enable() {
471   std::lock_guard<std::mutex> l(*alloc_records_mutex_);
472   enabled_ = true;
473 }
474 
475 UnwindingWorker::Delegate::~Delegate() = default;
476 
477 }  // namespace profiling
478 }  // namespace perfetto
479