1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROFILING_PERF_PERF_PRODUCER_H_ 18 #define SRC_PROFILING_PERF_PERF_PRODUCER_H_ 19 20 #include <deque> 21 #include <map> 22 #include <queue> 23 24 #include <unistd.h> 25 26 #include <unwindstack/Error.h> 27 #include <unwindstack/Regs.h> 28 29 #include "perfetto/base/task_runner.h" 30 #include "perfetto/ext/base/optional.h" 31 #include "perfetto/ext/base/scoped_file.h" 32 #include "perfetto/ext/base/unix_socket.h" 33 #include "perfetto/ext/base/weak_ptr.h" 34 #include "perfetto/ext/tracing/core/basic_types.h" 35 #include "perfetto/ext/tracing/core/producer.h" 36 #include "perfetto/ext/tracing/core/trace_writer.h" 37 #include "perfetto/ext/tracing/core/tracing_service.h" 38 #include "src/profiling/common/callstack_trie.h" 39 #include "src/profiling/common/interning_output.h" 40 #include "src/profiling/common/unwind_support.h" 41 #include "src/profiling/perf/common_types.h" 42 #include "src/profiling/perf/event_config.h" 43 #include "src/profiling/perf/event_reader.h" 44 #include "src/profiling/perf/proc_descriptors.h" 45 #include "src/profiling/perf/unwinding.h" 46 #include "src/tracing/core/metatrace_writer.h" 47 // TODO(rsavitski): move to e.g. src/tracefs/. 48 #include "src/traced/probes/ftrace/ftrace_procfs.h" 49 50 namespace perfetto { 51 namespace profiling { 52 53 // TODO(rsavitski): describe the high-level architecture and threading. Rough 54 // summary in the mean time: three stages: (1) kernel buffer reader that parses 55 // the samples -> (2) callstack unwinder -> (3) interning and serialization of 56 // samples. This class handles stages (1) and (3) on the main thread. Unwinding 57 // is done by |Unwinder| on a dedicated thread. 58 class PerfProducer : public Producer, 59 public ProcDescriptorDelegate, 60 public Unwinder::Delegate { 61 public: 62 PerfProducer(ProcDescriptorGetter* proc_fd_getter, 63 base::TaskRunner* task_runner); 64 ~PerfProducer() override = default; 65 66 PerfProducer(const PerfProducer&) = delete; 67 PerfProducer& operator=(const PerfProducer&) = delete; 68 PerfProducer(PerfProducer&&) = delete; 69 PerfProducer& operator=(PerfProducer&&) = delete; 70 71 void ConnectWithRetries(const char* socket_name); 72 73 // Producer impl: 74 void OnConnect() override; 75 void OnDisconnect() override; OnTracingSetup()76 void OnTracingSetup() override {} 77 void SetupDataSource(DataSourceInstanceID, const DataSourceConfig&) override; 78 void StartDataSource(DataSourceInstanceID instance_id, 79 const DataSourceConfig& config) override; 80 void StopDataSource(DataSourceInstanceID instance_id) override; 81 void Flush(FlushRequestID flush_id, 82 const DataSourceInstanceID* data_source_ids, 83 size_t num_data_sources) override; 84 void ClearIncrementalState(const DataSourceInstanceID* data_source_ids, 85 size_t num_data_sources) override; 86 87 // ProcDescriptorDelegate impl: 88 void OnProcDescriptors(pid_t pid, 89 uid_t uid, 90 base::ScopedFile maps_fd, 91 base::ScopedFile mem_fd) override; 92 93 // Unwinder::Delegate impl (callbacks from unwinder): 94 void PostEmitSample(DataSourceInstanceID ds_id, 95 CompletedSample sample) override; 96 void PostEmitUnwinderSkippedSample(DataSourceInstanceID ds_id, 97 ParsedSample sample) override; 98 void PostFinishDataSourceStop(DataSourceInstanceID ds_id) override; 99 100 private: 101 // State of the producer's connection to tracing service (traced). 102 enum State { 103 kNotStarted = 0, 104 kNotConnected, 105 kConnecting, 106 kConnected, 107 }; 108 109 // Represents the data source scoped view of a process. Specifically: 110 // * whether the process is in scope of the tracing session (if the latter 111 // specifies a target filter). 112 // * the state of the (possibly asynchronous) lookup of /proc/<pid>/{maps,mem} 113 // file descriptors, which are necessary for callstack unwinding of samples. 114 enum class ProcessTrackingStatus { 115 kInitial, 116 kResolving, // waiting on proc-fd lookup 117 kResolved, // proc-fds obtained, and process considered relevant 118 kExpired, // proc-fd lookup timed out 119 kRejected // process not considered relevant for the data source 120 }; 121 122 struct DataSourceState { 123 enum class Status { kActive, kShuttingDown }; 124 DataSourceStateDataSourceState125 DataSourceState(EventConfig _event_config, 126 std::unique_ptr<TraceWriter> _trace_writer, 127 std::vector<EventReader> _per_cpu_readers) 128 : event_config(std::move(_event_config)), 129 trace_writer(std::move(_trace_writer)), 130 per_cpu_readers(std::move(_per_cpu_readers)) {} 131 132 Status status = Status::kActive; 133 const EventConfig event_config; 134 std::unique_ptr<TraceWriter> trace_writer; 135 // Indexed by cpu, vector never resized. 136 std::vector<EventReader> per_cpu_readers; 137 // Tracks the incremental state for interned entries. 138 InterningOutputTracker interning_output; 139 // Producer thread's view of sampled processes. This is the primary tracking 140 // structure, but a subset of updates are replicated to a similar structure 141 // in the |Unwinder|, which needs to track whether the necessary unwinding 142 // inputs for a given process' samples are ready. 143 std::map<pid_t, ProcessTrackingStatus> process_states; 144 145 // Command lines we have decided to unwind, up to a total of 146 // additional_cmdline_count values. 147 base::FlatSet<std::string> additional_cmdlines; 148 }; 149 150 // For |EmitSkippedSample|. 151 enum class SampleSkipReason { 152 kReadStage = 0, // discarded at read stage 153 kUnwindEnqueue, // discarded due to unwinder queue being full 154 kUnwindStage, // discarded at unwind stage 155 }; 156 157 void ConnectService(); 158 void Restart(); 159 void ResetConnectionBackoff(); 160 void IncreaseConnectionBackoff(); 161 162 // Periodic read task which reads a batch of samples from all kernel ring 163 // buffers associated with the given data source. 164 void TickDataSourceRead(DataSourceInstanceID ds_id); 165 // Returns *false* if the reader has caught up with the writer position, true 166 // otherwise. Return value is only useful if the underlying perf_event has 167 // been paused (to identify when the buffer is empty). |max_samples| is a cap 168 // on the amount of samples that will be parsed, which might be more than the 169 // number of underlying records (as there might be non-sample records). 170 bool ReadAndParsePerCpuBuffer(EventReader* reader, 171 uint64_t max_samples, 172 DataSourceInstanceID ds_id, 173 DataSourceState* ds); 174 175 void InitiateDescriptorLookup(DataSourceInstanceID ds_id, 176 pid_t pid, 177 uint32_t timeout_ms); 178 // Do not call directly, use |InitiateDescriptorLookup|. 179 void StartDescriptorLookup(DataSourceInstanceID ds_id, 180 pid_t pid, 181 uint32_t timeout_ms); 182 void EvaluateDescriptorLookupTimeout(DataSourceInstanceID ds_id, pid_t pid); 183 184 void EmitSample(DataSourceInstanceID ds_id, CompletedSample sample); 185 void EmitRingBufferLoss(DataSourceInstanceID ds_id, 186 size_t cpu, 187 uint64_t records_lost); 188 189 void PostEmitSkippedSample(DataSourceInstanceID ds_id, 190 ParsedSample sample, 191 SampleSkipReason reason); 192 // Emit a packet indicating that a sample was relevant, but skipped as it was 193 // considered to be not unwindable (e.g. the process no longer exists). 194 void EmitSkippedSample(DataSourceInstanceID ds_id, 195 ParsedSample sample, 196 SampleSkipReason reason); 197 198 // Starts the shutdown of the given data source instance, starting with 199 // pausing the reader frontend. Once the reader reaches the point where all 200 // kernel buffers have been fully consumed, it will notify the |Unwinder| to 201 // proceed with the shutdown sequence. The unwinder in turn will call back to 202 // this producer once there are no more outstanding samples for the data 203 // source at the unwinding stage. 204 void InitiateReaderStop(DataSourceState* ds); 205 // Destroys the state belonging to this instance, and acks the stop to the 206 // tracing service. 207 void FinishDataSourceStop(DataSourceInstanceID ds_id); 208 // Immediately destroys the data source state, and instructs the unwinder to 209 // do the same. This is used for abrupt stops. 210 void PurgeDataSource(DataSourceInstanceID ds_id); 211 212 // Immediately stops the data source if this daemon's overall memory footprint 213 // is above the given threshold. This periodic task is started only for data 214 // sources that specify a limit. 215 void CheckMemoryFootprintPeriodic(DataSourceInstanceID ds_id, 216 uint32_t max_daemon_memory_kb); 217 218 void StartMetatraceSource(DataSourceInstanceID ds_id, BufferID target_buffer); 219 220 // Task runner owned by the main thread. 221 base::TaskRunner* const task_runner_; 222 State state_ = kNotStarted; 223 const char* producer_socket_name_ = nullptr; 224 uint32_t connection_backoff_ms_ = 0; 225 226 // Valid and stable for the lifetime of this class. 227 ProcDescriptorGetter* const proc_fd_getter_; 228 229 // Owns shared memory, must outlive trace writing. 230 std::unique_ptr<TracingService::ProducerEndpoint> endpoint_; 231 232 // If multiple metatrace sources are enabled concurrently, 233 // only the first one becomes active. 234 std::map<DataSourceInstanceID, MetatraceWriter> metatrace_writers_; 235 236 // Interns callstacks across all data sources. 237 // TODO(rsavitski): for long profiling sessions, consider purging trie when it 238 // grows too large (at the moment purged only when no sources are active). 239 // TODO(rsavitski): interning sequences are monotonic for the lifetime of the 240 // daemon. Consider resetting them at safe points - possible when no sources 241 // are active, and tricky otherwise. In the latter case, it'll require 242 // emitting incremental sequence invalidation packets on all relevant 243 // sequences. 244 GlobalCallstackTrie callstack_trie_; 245 246 // State associated with perf-sampling data sources. 247 std::map<DataSourceInstanceID, DataSourceState> data_sources_; 248 249 // Unwinding stage, running on a dedicated thread. 250 UnwinderHandle unwinding_worker_; 251 252 // Used for tracepoint name -> id lookups. Initialized lazily, and in general 253 // best effort - can be null if tracefs isn't accessible. 254 std::unique_ptr<FtraceProcfs> tracefs_; 255 256 base::WeakPtrFactory<PerfProducer> weak_factory_; // keep last 257 }; 258 259 } // namespace profiling 260 } // namespace perfetto 261 262 #endif // SRC_PROFILING_PERF_PERF_PRODUCER_H_ 263