1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_ 18 #define SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_ 19 20 #include <array> 21 #include <functional> 22 #include <map> 23 #include <vector> 24 25 #include "perfetto/base/task_runner.h" 26 #include "perfetto/ext/base/optional.h" 27 #include "perfetto/ext/base/unix_socket.h" 28 #include "perfetto/ext/base/unix_task_runner.h" 29 30 #include "perfetto/ext/tracing/core/basic_types.h" 31 #include "perfetto/ext/tracing/core/producer.h" 32 #include "perfetto/ext/tracing/core/trace_writer.h" 33 #include "perfetto/ext/tracing/core/tracing_service.h" 34 #include "perfetto/tracing/core/data_source_config.h" 35 36 #include "src/profiling/common/interning_output.h" 37 #include "src/profiling/common/proc_utils.h" 38 #include "src/profiling/memory/bookkeeping.h" 39 #include "src/profiling/memory/bookkeeping_dump.h" 40 #include "src/profiling/memory/page_idle_checker.h" 41 #include "src/profiling/memory/system_property.h" 42 #include "src/profiling/memory/unwinding.h" 43 44 #include "protos/perfetto/config/profiling/heapprofd_config.gen.h" 45 46 namespace perfetto { 47 namespace profiling { 48 49 using HeapprofdConfig = protos::gen::HeapprofdConfig; 50 51 struct Process { 52 pid_t pid; 53 std::string cmdline; 54 }; 55 56 class LogHistogram { 57 public: 58 static const uint64_t kMaxBucket; 59 static constexpr size_t kBuckets = 20; 60 Add(uint64_t value)61 void Add(uint64_t value) { values_[GetBucket(value)]++; } 62 std::vector<std::pair<uint64_t, uint64_t>> GetData(); 63 64 private: 65 size_t GetBucket(uint64_t value); 66 67 std::array<uint64_t, kBuckets> values_ = {}; 68 }; 69 70 // TODO(rsavitski): central daemon can do less work if it knows that the global 71 // operating mode is fork-based, as it then will not be interacting with the 72 // clients. This can be implemented as an additional mode here. 73 enum class HeapprofdMode { kCentral, kChild }; 74 75 // Heap profiling producer. Can be instantiated in two modes, central and 76 // child (also referred to as fork mode). 77 // 78 // The central mode producer is instantiated by the system heapprofd daemon. Its 79 // primary responsibility is activating profiling (via system properties and 80 // signals) in targets identified by profiling configs. On debug platform 81 // builds, the central producer can also handle the out-of-process unwinding & 82 // writing of the profiles for all client processes. 83 // 84 // An alternative model is where the central heapprofd triggers the profiling in 85 // the target process, but the latter fork-execs a private heapprofd binary to 86 // handle unwinding only for that process. The forked heapprofd instantiates 87 // this producer in the "child" mode. In this scenario, the profiled process 88 // never talks to the system daemon. 89 // 90 // TODO(fmayer||rsavitski): cover interesting invariants/structure of the 91 // implementation (e.g. number of data sources in child mode), including 92 // threading structure. 93 class HeapprofdProducer : public Producer, public UnwindingWorker::Delegate { 94 public: 95 friend class SocketDelegate; 96 97 // TODO(fmayer): Split into two delegates for the listening socket in kCentral 98 // and for the per-client sockets to make this easier to understand? 99 // Alternatively, find a better name for this. 100 class SocketDelegate : public base::UnixSocket::EventListener { 101 public: SocketDelegate(HeapprofdProducer * producer)102 SocketDelegate(HeapprofdProducer* producer) : producer_(producer) {} 103 104 void OnDisconnect(base::UnixSocket* self) override; 105 void OnNewIncomingConnection( 106 base::UnixSocket* self, 107 std::unique_ptr<base::UnixSocket> new_connection) override; 108 void OnDataAvailable(base::UnixSocket* self) override; 109 110 private: 111 HeapprofdProducer* producer_; 112 }; 113 114 HeapprofdProducer(HeapprofdMode mode, base::TaskRunner* task_runner); 115 ~HeapprofdProducer() override; 116 117 // Producer Impl: 118 void OnConnect() override; 119 void OnDisconnect() override; 120 void SetupDataSource(DataSourceInstanceID, const DataSourceConfig&) override; 121 void StartDataSource(DataSourceInstanceID, const DataSourceConfig&) override; 122 void StopDataSource(DataSourceInstanceID) override; 123 void OnTracingSetup() override; 124 void Flush(FlushRequestID, 125 const DataSourceInstanceID* data_source_ids, 126 size_t num_data_sources) override; ClearIncrementalState(const DataSourceInstanceID *,size_t)127 void ClearIncrementalState(const DataSourceInstanceID* /*data_source_ids*/, 128 size_t /*num_data_sources*/) override {} 129 130 // TODO(fmayer): Refactor once/if we have generic reconnect logic. 131 void ConnectWithRetries(const char* socket_name); 132 void DumpAll(); 133 134 // UnwindingWorker::Delegate impl: 135 void PostAllocRecord(AllocRecord) override; 136 void PostFreeRecord(FreeRecord) override; 137 void PostSocketDisconnected(DataSourceInstanceID, 138 pid_t, 139 SharedRingBuffer::Stats) override; 140 141 void HandleAllocRecord(AllocRecord); 142 void HandleFreeRecord(FreeRecord); 143 void HandleSocketDisconnected(DataSourceInstanceID, 144 pid_t, 145 SharedRingBuffer::Stats); 146 147 // Valid only if mode_ == kChild. 148 void SetTargetProcess(pid_t target_pid, 149 std::string target_cmdline, 150 base::ScopedFile inherited_socket); 151 // Valid only if mode_ == kChild. Kicks off a periodic check that the child 152 // heapprofd is actively working on a data source (which should correspond to 153 // the target process). The first check is delayed to let the freshly spawned 154 // producer get the data sources from the tracing service (i.e. traced). 155 void ScheduleActiveDataSourceWatchdog(); 156 157 // Exposed for testing. 158 void SetProducerEndpoint( 159 std::unique_ptr<TracingService::ProducerEndpoint> endpoint); 160 socket_delegate()161 base::UnixSocket::EventListener& socket_delegate() { 162 return socket_delegate_; 163 } 164 165 private: 166 // State of the connection to tracing service (traced). 167 enum State { 168 kNotStarted = 0, 169 kNotConnected, 170 kConnecting, 171 kConnected, 172 }; 173 174 struct ProcessState { ProcessStateProcessState175 ProcessState(GlobalCallstackTrie* callsites, bool dump_at_max_mode) 176 : heap_tracker(callsites, dump_at_max_mode) {} 177 bool disconnected = false; 178 bool buffer_overran = false; 179 bool buffer_corrupted = false; 180 181 uint64_t heap_samples = 0; 182 uint64_t map_reparses = 0; 183 uint64_t unwinding_errors = 0; 184 185 uint64_t total_unwinding_time_us = 0; 186 LogHistogram unwinding_time_us; 187 HeapTracker heap_tracker; 188 189 base::Optional<PageIdleChecker> page_idle_checker; 190 }; 191 192 struct DataSource { DataSourceDataSource193 DataSource(std::unique_ptr<TraceWriter> tw) : trace_writer(std::move(tw)) {} 194 195 DataSourceInstanceID id; 196 std::unique_ptr<TraceWriter> trace_writer; 197 HeapprofdConfig config; 198 ClientConfiguration client_configuration; 199 std::vector<SystemProperties::Handle> properties; 200 std::set<pid_t> signaled_pids; 201 std::set<pid_t> rejected_pids; 202 std::map<pid_t, ProcessState> process_states; 203 std::vector<std::string> normalized_cmdlines; 204 InterningOutputTracker intern_state; 205 bool shutting_down = false; 206 bool started = false; 207 bool hit_guardrail = false; 208 bool was_stopped = false; 209 uint32_t stop_timeout_ms; 210 base::Optional<uint64_t> start_cputime_sec; 211 }; 212 213 struct PendingProcess { 214 std::unique_ptr<base::UnixSocket> sock; 215 DataSourceInstanceID data_source_instance_id; 216 SharedRingBuffer shmem; 217 }; 218 219 void HandleClientConnection(std::unique_ptr<base::UnixSocket> new_connection, 220 Process process); 221 222 void ConnectService(); 223 void Restart(); 224 void ResetConnectionBackoff(); 225 void IncreaseConnectionBackoff(); 226 227 base::Optional<uint64_t> GetCputimeSec(); 228 229 void CheckDataSourceMemory(); 230 void CheckDataSourceCpu(); 231 232 void FinishDataSourceFlush(FlushRequestID flush_id); 233 bool DumpProcessesInDataSource(DataSourceInstanceID id); 234 void DumpProcessState(DataSource* ds, pid_t pid, ProcessState* process); 235 236 void DoContinuousDump(DataSourceInstanceID id, uint32_t dump_interval); 237 238 UnwindingWorker& UnwinderForPID(pid_t); 239 bool IsPidProfiled(pid_t); 240 DataSource* GetDataSourceForProcess(const Process& proc); 241 void RecordOtherSourcesAsRejected(DataSource* active_ds, const Process& proc); 242 243 void SetStartupProperties(DataSource* data_source); 244 void SignalRunningProcesses(DataSource* data_source); 245 246 // Specific to mode_ == kChild 247 void TerminateProcess(int exit_status); 248 // Specific to mode_ == kChild 249 void ActiveDataSourceWatchdogCheck(); 250 // Adopts the (connected) sockets inherited from the target process, invoking 251 // the on-connection callback. 252 // Specific to mode_ == kChild 253 void AdoptTargetProcessSocket(); 254 255 void ShutdownDataSource(DataSource* ds); 256 bool MaybeFinishDataSource(DataSource* ds); 257 258 // Class state: 259 260 // Task runner is owned by the main thread. 261 base::TaskRunner* const task_runner_; 262 const HeapprofdMode mode_; 263 264 // State of connection to the tracing service. 265 State state_ = kNotStarted; 266 uint32_t connection_backoff_ms_ = 0; 267 const char* producer_sock_name_ = nullptr; 268 269 // Client processes that have connected, but with which we have not yet 270 // finished the handshake. 271 std::map<pid_t, PendingProcess> pending_processes_; 272 273 // Must outlive data_sources_ - owns at least the shared memory referenced by 274 // TraceWriters. 275 std::unique_ptr<TracingService::ProducerEndpoint> endpoint_; 276 277 // Must outlive data_sources_ - HeapTracker references the trie. 278 GlobalCallstackTrie callsites_; 279 280 // Must outlive data_sources_ - DataSource can hold 281 // SystemProperties::Handle-s. 282 // Specific to mode_ == kCentral 283 SystemProperties properties_; 284 285 std::map<FlushRequestID, size_t> flushes_in_progress_; 286 std::map<DataSourceInstanceID, DataSource> data_sources_; 287 std::vector<UnwindingWorker> unwinding_workers_; 288 289 // Specific to mode_ == kChild 290 Process target_process_{base::kInvalidPid, ""}; 291 // This is a valid FD only between SetTargetProcess and 292 // AdoptTargetProcessSocket. 293 // Specific to mode_ == kChild 294 base::ScopedFile inherited_fd_; 295 296 SocketDelegate socket_delegate_; 297 base::ScopedFile stat_fd_; 298 299 base::WeakPtrFactory<HeapprofdProducer> weak_factory_; // Keep last. 300 }; 301 302 } // namespace profiling 303 } // namespace perfetto 304 305 #endif // SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_ 306