• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_
18 #define SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_
19 
20 #include <array>
21 #include <functional>
22 #include <map>
23 #include <vector>
24 
25 #include "perfetto/base/task_runner.h"
26 #include "perfetto/ext/base/optional.h"
27 #include "perfetto/ext/base/unix_socket.h"
28 #include "perfetto/ext/base/unix_task_runner.h"
29 
30 #include "perfetto/ext/tracing/core/basic_types.h"
31 #include "perfetto/ext/tracing/core/producer.h"
32 #include "perfetto/ext/tracing/core/trace_writer.h"
33 #include "perfetto/ext/tracing/core/tracing_service.h"
34 #include "perfetto/tracing/core/data_source_config.h"
35 
36 #include "src/profiling/common/interning_output.h"
37 #include "src/profiling/common/proc_utils.h"
38 #include "src/profiling/memory/bookkeeping.h"
39 #include "src/profiling/memory/bookkeeping_dump.h"
40 #include "src/profiling/memory/page_idle_checker.h"
41 #include "src/profiling/memory/system_property.h"
42 #include "src/profiling/memory/unwinding.h"
43 
44 #include "protos/perfetto/config/profiling/heapprofd_config.gen.h"
45 
46 namespace perfetto {
47 namespace profiling {
48 
49 using HeapprofdConfig = protos::gen::HeapprofdConfig;
50 
51 struct Process {
52   pid_t pid;
53   std::string cmdline;
54 };
55 
56 class LogHistogram {
57  public:
58   static const uint64_t kMaxBucket;
59   static constexpr size_t kBuckets = 20;
60 
Add(uint64_t value)61   void Add(uint64_t value) { values_[GetBucket(value)]++; }
62   std::vector<std::pair<uint64_t, uint64_t>> GetData();
63 
64  private:
65   size_t GetBucket(uint64_t value);
66 
67   std::array<uint64_t, kBuckets> values_ = {};
68 };
69 
70 // TODO(rsavitski): central daemon can do less work if it knows that the global
71 // operating mode is fork-based, as it then will not be interacting with the
72 // clients. This can be implemented as an additional mode here.
73 enum class HeapprofdMode { kCentral, kChild };
74 
75 // Heap profiling producer. Can be instantiated in two modes, central and
76 // child (also referred to as fork mode).
77 //
78 // The central mode producer is instantiated by the system heapprofd daemon. Its
79 // primary responsibility is activating profiling (via system properties and
80 // signals) in targets identified by profiling configs. On debug platform
81 // builds, the central producer can also handle the out-of-process unwinding &
82 // writing of the profiles for all client processes.
83 //
84 // An alternative model is where the central heapprofd triggers the profiling in
85 // the target process, but the latter fork-execs a private heapprofd binary to
86 // handle unwinding only for that process. The forked heapprofd instantiates
87 // this producer in the "child" mode. In this scenario, the profiled process
88 // never talks to the system daemon.
89 //
90 // TODO(fmayer||rsavitski): cover interesting invariants/structure of the
91 // implementation (e.g. number of data sources in child mode), including
92 // threading structure.
93 class HeapprofdProducer : public Producer, public UnwindingWorker::Delegate {
94  public:
95   friend class SocketDelegate;
96 
97   // TODO(fmayer): Split into two delegates for the listening socket in kCentral
98   // and for the per-client sockets to make this easier to understand?
99   // Alternatively, find a better name for this.
100   class SocketDelegate : public base::UnixSocket::EventListener {
101    public:
SocketDelegate(HeapprofdProducer * producer)102     SocketDelegate(HeapprofdProducer* producer) : producer_(producer) {}
103 
104     void OnDisconnect(base::UnixSocket* self) override;
105     void OnNewIncomingConnection(
106         base::UnixSocket* self,
107         std::unique_ptr<base::UnixSocket> new_connection) override;
108     void OnDataAvailable(base::UnixSocket* self) override;
109 
110    private:
111     HeapprofdProducer* producer_;
112   };
113 
114   HeapprofdProducer(HeapprofdMode mode, base::TaskRunner* task_runner);
115   ~HeapprofdProducer() override;
116 
117   // Producer Impl:
118   void OnConnect() override;
119   void OnDisconnect() override;
120   void SetupDataSource(DataSourceInstanceID, const DataSourceConfig&) override;
121   void StartDataSource(DataSourceInstanceID, const DataSourceConfig&) override;
122   void StopDataSource(DataSourceInstanceID) override;
123   void OnTracingSetup() override;
124   void Flush(FlushRequestID,
125              const DataSourceInstanceID* data_source_ids,
126              size_t num_data_sources) override;
ClearIncrementalState(const DataSourceInstanceID *,size_t)127   void ClearIncrementalState(const DataSourceInstanceID* /*data_source_ids*/,
128                              size_t /*num_data_sources*/) override {}
129 
130   // TODO(fmayer): Refactor once/if we have generic reconnect logic.
131   void ConnectWithRetries(const char* socket_name);
132   void DumpAll();
133 
134   // UnwindingWorker::Delegate impl:
135   void PostAllocRecord(AllocRecord) override;
136   void PostFreeRecord(FreeRecord) override;
137   void PostSocketDisconnected(DataSourceInstanceID,
138                               pid_t,
139                               SharedRingBuffer::Stats) override;
140 
141   void HandleAllocRecord(AllocRecord);
142   void HandleFreeRecord(FreeRecord);
143   void HandleSocketDisconnected(DataSourceInstanceID,
144                                 pid_t,
145                                 SharedRingBuffer::Stats);
146 
147   // Valid only if mode_ == kChild.
148   void SetTargetProcess(pid_t target_pid,
149                         std::string target_cmdline,
150                         base::ScopedFile inherited_socket);
151   // Valid only if mode_ == kChild. Kicks off a periodic check that the child
152   // heapprofd is actively working on a data source (which should correspond to
153   // the target process). The first check is delayed to let the freshly spawned
154   // producer get the data sources from the tracing service (i.e. traced).
155   void ScheduleActiveDataSourceWatchdog();
156 
157   // Exposed for testing.
158   void SetProducerEndpoint(
159       std::unique_ptr<TracingService::ProducerEndpoint> endpoint);
160 
socket_delegate()161   base::UnixSocket::EventListener& socket_delegate() {
162     return socket_delegate_;
163   }
164 
165  private:
166   // State of the connection to tracing service (traced).
167   enum State {
168     kNotStarted = 0,
169     kNotConnected,
170     kConnecting,
171     kConnected,
172   };
173 
174   struct ProcessState {
ProcessStateProcessState175     ProcessState(GlobalCallstackTrie* callsites, bool dump_at_max_mode)
176         : heap_tracker(callsites, dump_at_max_mode) {}
177     bool disconnected = false;
178     bool buffer_overran = false;
179     bool buffer_corrupted = false;
180 
181     uint64_t heap_samples = 0;
182     uint64_t map_reparses = 0;
183     uint64_t unwinding_errors = 0;
184 
185     uint64_t total_unwinding_time_us = 0;
186     LogHistogram unwinding_time_us;
187     HeapTracker heap_tracker;
188 
189     base::Optional<PageIdleChecker> page_idle_checker;
190   };
191 
192   struct DataSource {
DataSourceDataSource193     DataSource(std::unique_ptr<TraceWriter> tw) : trace_writer(std::move(tw)) {}
194 
195     DataSourceInstanceID id;
196     std::unique_ptr<TraceWriter> trace_writer;
197     HeapprofdConfig config;
198     ClientConfiguration client_configuration;
199     std::vector<SystemProperties::Handle> properties;
200     std::set<pid_t> signaled_pids;
201     std::set<pid_t> rejected_pids;
202     std::map<pid_t, ProcessState> process_states;
203     std::vector<std::string> normalized_cmdlines;
204     InterningOutputTracker intern_state;
205     bool shutting_down = false;
206     bool started = false;
207     bool hit_guardrail = false;
208     bool was_stopped = false;
209     uint32_t stop_timeout_ms;
210     base::Optional<uint64_t> start_cputime_sec;
211   };
212 
213   struct PendingProcess {
214     std::unique_ptr<base::UnixSocket> sock;
215     DataSourceInstanceID data_source_instance_id;
216     SharedRingBuffer shmem;
217   };
218 
219   void HandleClientConnection(std::unique_ptr<base::UnixSocket> new_connection,
220                               Process process);
221 
222   void ConnectService();
223   void Restart();
224   void ResetConnectionBackoff();
225   void IncreaseConnectionBackoff();
226 
227   base::Optional<uint64_t> GetCputimeSec();
228 
229   void CheckDataSourceMemory();
230   void CheckDataSourceCpu();
231 
232   void FinishDataSourceFlush(FlushRequestID flush_id);
233   bool DumpProcessesInDataSource(DataSourceInstanceID id);
234   void DumpProcessState(DataSource* ds, pid_t pid, ProcessState* process);
235 
236   void DoContinuousDump(DataSourceInstanceID id, uint32_t dump_interval);
237 
238   UnwindingWorker& UnwinderForPID(pid_t);
239   bool IsPidProfiled(pid_t);
240   DataSource* GetDataSourceForProcess(const Process& proc);
241   void RecordOtherSourcesAsRejected(DataSource* active_ds, const Process& proc);
242 
243   void SetStartupProperties(DataSource* data_source);
244   void SignalRunningProcesses(DataSource* data_source);
245 
246   // Specific to mode_ == kChild
247   void TerminateProcess(int exit_status);
248   // Specific to mode_ == kChild
249   void ActiveDataSourceWatchdogCheck();
250   // Adopts the (connected) sockets inherited from the target process, invoking
251   // the on-connection callback.
252   // Specific to mode_ == kChild
253   void AdoptTargetProcessSocket();
254 
255   void ShutdownDataSource(DataSource* ds);
256   bool MaybeFinishDataSource(DataSource* ds);
257 
258   // Class state:
259 
260   // Task runner is owned by the main thread.
261   base::TaskRunner* const task_runner_;
262   const HeapprofdMode mode_;
263 
264   // State of connection to the tracing service.
265   State state_ = kNotStarted;
266   uint32_t connection_backoff_ms_ = 0;
267   const char* producer_sock_name_ = nullptr;
268 
269   // Client processes that have connected, but with which we have not yet
270   // finished the handshake.
271   std::map<pid_t, PendingProcess> pending_processes_;
272 
273   // Must outlive data_sources_ - owns at least the shared memory referenced by
274   // TraceWriters.
275   std::unique_ptr<TracingService::ProducerEndpoint> endpoint_;
276 
277   // Must outlive data_sources_ - HeapTracker references the trie.
278   GlobalCallstackTrie callsites_;
279 
280   // Must outlive data_sources_ - DataSource can hold
281   // SystemProperties::Handle-s.
282   // Specific to mode_ == kCentral
283   SystemProperties properties_;
284 
285   std::map<FlushRequestID, size_t> flushes_in_progress_;
286   std::map<DataSourceInstanceID, DataSource> data_sources_;
287   std::vector<UnwindingWorker> unwinding_workers_;
288 
289   // Specific to mode_ == kChild
290   Process target_process_{base::kInvalidPid, ""};
291   // This is a valid FD only between SetTargetProcess and
292   // AdoptTargetProcessSocket.
293   // Specific to mode_ == kChild
294   base::ScopedFile inherited_fd_;
295 
296   SocketDelegate socket_delegate_;
297   base::ScopedFile stat_fd_;
298 
299   base::WeakPtrFactory<HeapprofdProducer> weak_factory_;  // Keep last.
300 };
301 
302 }  // namespace profiling
303 }  // namespace perfetto
304 
305 #endif  // SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_
306