1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROFILING_PERF_UNWINDING_H_ 18 #define SRC_PROFILING_PERF_UNWINDING_H_ 19 20 #include <stdint.h> 21 #include <condition_variable> 22 #include <map> 23 #include <optional> 24 #include <thread> 25 26 #include <linux/perf_event.h> 27 #include <unwindstack/Error.h> 28 29 #include "perfetto/base/flat_set.h" 30 #include "perfetto/base/logging.h" 31 #include "perfetto/ext/base/thread_checker.h" 32 #include "perfetto/ext/base/unix_task_runner.h" 33 #include "perfetto/ext/tracing/core/basic_types.h" 34 #include "src/kallsyms/kernel_symbol_map.h" 35 #include "src/kallsyms/lazy_kernel_symbolizer.h" 36 #include "src/profiling/common/unwind_support.h" 37 #include "src/profiling/perf/common_types.h" 38 #include "src/profiling/perf/unwind_queue.h" 39 40 namespace perfetto { 41 namespace profiling { 42 43 constexpr static uint32_t kUnwindQueueCapacity = 1024; 44 45 // Unwinds and symbolises callstacks. For userspace this uses the sampled stack 46 // and register state (see |ParsedSample|). For kernelspace, the kernel itself 47 // unwinds the stack (recording a list of instruction pointers), so only 48 // symbolisation using /proc/kallsyms is necessary. Has a single unwinding ring 49 // queue, shared across all data sources. 50 // 51 // Userspace samples cannot be unwound without having /proc/<pid>/{maps,mem} 52 // file descriptors for that process. This lookup can be asynchronous (e.g. on 53 // Android), so the unwinder might have to wait before it can process (or 54 // discard) some of the enqueued samples. To avoid blocking the entire queue, 55 // the unwinder is allowed to process the entries out of order. 56 // 57 // Besides the queue, all interactions between the unwinder and the rest of the 58 // producer logic are through posted tasks. 59 // 60 // As unwinding times are long-tailed (example measurements: median <1ms, 61 // worst-case ~1000ms), the unwinder runs on a dedicated thread to avoid 62 // starving the rest of the producer's work (including IPC and consumption of 63 // records from the kernel ring buffers). 64 // 65 // This class should not be instantiated directly, use the |UnwinderHandle| 66 // below instead. 67 // 68 // TODO(rsavitski): while the inputs to the unwinder are batched as a result of 69 // the reader posting a wakeup only after consuming a batch of kernel samples, 70 // the Unwinder might be staggering wakeups for the producer thread by posting a 71 // task every time a sample has been unwound. Evaluate how bad these wakeups are 72 // in practice, and consider also implementing a batching strategy for the 73 // unwinder->serialization handoff (which isn't very latency-sensitive). 74 class Unwinder { 75 public: 76 friend class UnwinderHandle; 77 78 // Callbacks from the unwinder to the primary producer thread. 79 class Delegate { 80 public: 81 virtual void PostEmitSample(DataSourceInstanceID ds_id, 82 CompletedSample sample) = 0; 83 virtual void PostEmitUnwinderSkippedSample(DataSourceInstanceID ds_id, 84 ParsedSample sample) = 0; 85 virtual void PostFinishDataSourceStop(DataSourceInstanceID ds_id) = 0; 86 87 virtual ~Delegate(); 88 }; 89 ~Unwinder()90 ~Unwinder() { PERFETTO_DCHECK_THREAD(thread_checker_); } 91 92 void PostStartDataSource(DataSourceInstanceID ds_id, bool kernel_frames); 93 void PostAdoptProcDescriptors(DataSourceInstanceID ds_id, 94 pid_t pid, 95 base::ScopedFile maps_fd, 96 base::ScopedFile mem_fd); 97 void PostRecordTimedOutProcDescriptors(DataSourceInstanceID ds_id, pid_t pid); 98 void PostRecordNoUserspaceProcess(DataSourceInstanceID ds_id, pid_t pid); 99 void PostProcessQueue(); 100 void PostInitiateDataSourceStop(DataSourceInstanceID ds_id); 101 void PostPurgeDataSource(DataSourceInstanceID ds_id); 102 103 void PostClearCachedStatePeriodic(DataSourceInstanceID ds_id, 104 uint32_t period_ms); 105 unwind_queue()106 UnwindQueue<UnwindEntry, kUnwindQueueCapacity>& unwind_queue() { 107 return unwind_queue_; 108 } 109 GetEnqueuedFootprint()110 uint64_t GetEnqueuedFootprint() { 111 uint64_t freed = 112 footprint_tracker_.stack_bytes_freed.load(std::memory_order_acquire); 113 uint64_t allocated = footprint_tracker_.stack_bytes_allocated.load( 114 std::memory_order_relaxed); 115 116 // overflow not a concern in practice 117 PERFETTO_DCHECK(allocated >= freed); 118 return allocated - freed; 119 } 120 IncrementEnqueuedFootprint(uint64_t increment)121 void IncrementEnqueuedFootprint(uint64_t increment) { 122 footprint_tracker_.stack_bytes_allocated.fetch_add( 123 increment, std::memory_order_relaxed); 124 } 125 126 private: 127 struct ProcessState { 128 // kInitial: unwinder waiting for more info on the process (proc-fds, their 129 // lookup expiration, or that there is no need for them). 130 // kFdsResolved: proc-fds available, can unwind samples. 131 // kFdsTimedOut: proc-fd lookup timed out, will discard samples. Can still 132 // transition to kFdsResolved if the fds are received later. 133 // kNoUserspace: only handling kernel callchains (the sample might 134 // still be for a userspace process), can process samples. 135 enum class Status { kInitial, kFdsResolved, kFdsTimedOut, kNoUserspace }; 136 137 Status status = Status::kInitial; 138 // Present iff status == kFdsResolved. 139 std::optional<UnwindingMetadata> unwind_state; 140 // Used to distinguish first-time unwinding attempts for a process, for 141 // logging purposes. 142 bool attempted_unwinding = false; 143 }; 144 145 struct DataSourceState { 146 enum class Status { kActive, kShuttingDown }; 147 148 Status status = Status::kActive; 149 std::map<pid_t, ProcessState> process_states; 150 }; 151 152 // Accounting for how much heap memory is attached to the enqueued samples at 153 // a given time. Read by the main thread, mutated by both threads. 154 // We track just the heap allocated for the sampled stacks, as it dominates 155 // the per-sample heap use. 156 struct QueueFootprintTracker { 157 std::atomic<uint64_t> stack_bytes_allocated; 158 std::atomic<uint64_t> stack_bytes_freed; 159 }; 160 161 // Must be instantiated via the |UnwinderHandle|. 162 Unwinder(Delegate* delegate, base::UnixTaskRunner* task_runner); 163 164 // Marks the data source as valid and active at the unwinding stage. 165 // Initializes kernel address symbolization if needed. 166 void StartDataSource(DataSourceInstanceID ds_id, bool kernel_frames); 167 168 void AdoptProcDescriptors(DataSourceInstanceID ds_id, 169 pid_t pid, 170 base::ScopedFile maps_fd, 171 base::ScopedFile mem_fd); 172 void UpdateProcessStateStatus(DataSourceInstanceID ds_id, 173 pid_t pid, 174 ProcessState::Status new_status); 175 176 // Primary task. Processes the enqueued samples using 177 // |ConsumeAndUnwindReadySamples|, and re-evaluates data source state. 178 void ProcessQueue(); 179 180 // Processes the enqueued samples for which all unwinding inputs are ready. 181 // Returns the set of data source instances which still have samples pending 182 // (i.e. waiting on the proc-fds). 183 base::FlatSet<DataSourceInstanceID> ConsumeAndUnwindReadySamples(); 184 185 CompletedSample UnwindSample(const ParsedSample& sample, 186 UnwindingMetadata* opt_user_state, 187 bool pid_unwound_before); 188 189 // Returns a list of symbolized kernel frames in the sample (if any). 190 std::vector<unwindstack::FrameData> SymbolizeKernelCallchain( 191 const ParsedSample& sample); 192 193 // Marks the data source as shutting down at the unwinding stage. It is known 194 // that no new samples for this source will be pushed into the queue, but we 195 // need to delay the unwinder state teardown until all previously-enqueued 196 // samples for this source are processed. 197 void InitiateDataSourceStop(DataSourceInstanceID ds_id); 198 199 // Tears down unwinding state for the data source without any outstanding 200 // samples, and informs the service that it can continue the shutdown 201 // sequence. 202 void FinishDataSourceStop(DataSourceInstanceID ds_id); 203 204 // Immediately destroys the data source state, used for abrupt stops. 205 void PurgeDataSource(DataSourceInstanceID ds_id); 206 DecrementEnqueuedFootprint(uint64_t decrement)207 void DecrementEnqueuedFootprint(uint64_t decrement) { 208 footprint_tracker_.stack_bytes_freed.fetch_add(decrement, 209 std::memory_order_relaxed); 210 } 211 212 // Clears the parsed maps for all previously-sampled processes, and resets the 213 // libunwindstack cache. This has the effect of deallocating the cached Elf 214 // objects within libunwindstack, which take up non-trivial amounts of memory. 215 // 216 // There are two reasons for having this operation: 217 // * over a longer trace, it's desireable to drop heavy state for processes 218 // that haven't been sampled recently. 219 // * since libunwindstack's cache is not bounded, it'll tend towards having 220 // state for all processes that are targeted by the profiling config. 221 // Clearing the cache periodically helps keep its footprint closer to the 222 // actual working set (NB: which might still be arbitrarily big, depending 223 // on the profiling config). 224 // 225 // After this function completes, the next unwind for each process will 226 // therefore incur a guaranteed maps reparse. 227 // 228 // Unwinding for concurrent data sources will *not* be directly affected at 229 // the time of writing, as the non-cleared parsed maps will keep the cached 230 // Elf objects alive through shared_ptrs. 231 // 232 // Note that this operation is heavy in terms of cpu%, and should therefore 233 // be called only for profiling configs that require it. 234 // 235 // TODO(rsavitski): dropping the full parsed maps is somewhat excessive, could 236 // instead clear just the |MapInfo.elf| shared_ptr, but that's considered too 237 // brittle as it's an implementation detail of libunwindstack. 238 // TODO(rsavitski): improve libunwindstack cache's architecture (it is still 239 // worth having at the moment to speed up unwinds across map reparses). 240 void ClearCachedStatePeriodic(DataSourceInstanceID ds_id, uint32_t period_ms); 241 242 void ResetAndEnableUnwindstackCache(); 243 244 base::UnixTaskRunner* const task_runner_; 245 Delegate* const delegate_; 246 UnwindQueue<UnwindEntry, kUnwindQueueCapacity> unwind_queue_; 247 QueueFootprintTracker footprint_tracker_; 248 std::map<DataSourceInstanceID, DataSourceState> data_sources_; 249 LazyKernelSymbolizer kernel_symbolizer_; 250 251 PERFETTO_THREAD_CHECKER(thread_checker_) 252 }; 253 254 // Owning resource handle for an |Unwinder| with a dedicated task thread. 255 // Ensures that the |Unwinder| is constructed and destructed on the task thread. 256 // TODO(rsavitski): update base::ThreadTaskRunner to allow for this pattern of 257 // owned state, and consolidate. 258 class UnwinderHandle { 259 public: UnwinderHandle(Unwinder::Delegate * delegate)260 explicit UnwinderHandle(Unwinder::Delegate* delegate) { 261 std::mutex init_lock; 262 std::condition_variable init_cv; 263 264 std::function<void(base::UnixTaskRunner*, Unwinder*)> initializer = 265 [this, &init_lock, &init_cv](base::UnixTaskRunner* task_runner, 266 Unwinder* unwinder) { 267 std::lock_guard<std::mutex> lock(init_lock); 268 task_runner_ = task_runner; 269 unwinder_ = unwinder; 270 // Notify while still holding the lock, as init_cv ceases to exist as 271 // soon as the main thread observes a non-null task_runner_, and it 272 // can wake up spuriously (i.e. before the notify if we had unlocked 273 // before notifying). 274 init_cv.notify_one(); 275 }; 276 277 thread_ = std::thread(&UnwinderHandle::RunTaskThread, this, 278 std::move(initializer), delegate); 279 280 std::unique_lock<std::mutex> lock(init_lock); 281 init_cv.wait(lock, [this] { return !!task_runner_ && !!unwinder_; }); 282 } 283 ~UnwinderHandle()284 ~UnwinderHandle() { 285 if (task_runner_) { 286 PERFETTO_CHECK(!task_runner_->QuitCalled()); 287 task_runner_->Quit(); 288 289 PERFETTO_DCHECK(thread_.joinable()); 290 } 291 if (thread_.joinable()) 292 thread_.join(); 293 } 294 295 Unwinder* operator->() { return unwinder_; } 296 297 private: RunTaskThread(std::function<void (base::UnixTaskRunner *,Unwinder *)> initializer,Unwinder::Delegate * delegate)298 void RunTaskThread( 299 std::function<void(base::UnixTaskRunner*, Unwinder*)> initializer, 300 Unwinder::Delegate* delegate) { 301 base::UnixTaskRunner task_runner; 302 Unwinder unwinder(delegate, &task_runner); 303 task_runner.PostTask( 304 std::bind(std::move(initializer), &task_runner, &unwinder)); 305 task_runner.Run(); 306 } 307 308 std::thread thread_; 309 base::UnixTaskRunner* task_runner_ = nullptr; 310 Unwinder* unwinder_ = nullptr; 311 }; 312 313 } // namespace profiling 314 } // namespace perfetto 315 316 #endif // SRC_PROFILING_PERF_UNWINDING_H_ 317