1 // Copyright 2019 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_PROFILER_METADATA_RECORDER_H_ 6 #define BASE_PROFILER_METADATA_RECORDER_H_ 7 8 #include <array> 9 #include <atomic> 10 #include <utility> 11 12 #include "base/base_export.h" 13 #include "base/memory/raw_ptr.h" 14 #include "base/synchronization/lock.h" 15 #include "base/thread_annotations.h" 16 #include "base/threading/platform_thread.h" 17 #include "third_party/abseil-cpp/absl/types/optional.h" 18 19 namespace base { 20 21 // MetadataRecorder provides a data structure to store metadata key/value pairs 22 // to be associated with samples taken by the sampling profiler. Whatever 23 // metadata is present in this map when a sample is recorded is then associated 24 // with the sample. 25 // 26 // Methods on this class are safe to call unsynchronized from arbitrary threads. 27 // 28 // This class was designed to read metadata from a single sampling thread and 29 // write metadata from many Chrome threads within the same process. These other 30 // threads might be suspended by the sampling thread at any time in order to 31 // collect a sample. 32 // 33 // This class has a few notable constraints: 34 // 35 // A) If a lock that's required to read the metadata might be held while writing 36 // the metadata, that lock must be acquirable *before* the thread is 37 // suspended. Otherwise, the sampling thread might suspend the target thread 38 // while it is holding the required lock, causing deadlock. 39 // 40 // Ramifications: 41 // 42 // - When retrieving items, lock acquisition (through 43 // CreateMetadataProvider()) and actual item retrieval (through 44 // MetadataProvider::GetItems()) are separate. 45 // 46 // B) We can't allocate data on the heap while reading the metadata items. This 47 // is because, on many operating systems, there's a process-wide heap lock 48 // that is held while allocating on the heap. If a thread is suspended while 49 // holding this lock and the sampling thread then tries to allocate on the 50 // heap to read the metadata, it will deadlock trying to acquire the heap 51 // lock. 52 // 53 // Ramifications: 54 // 55 // - We hold and retrieve the metadata using a fixed-size array, which 56 // allows readers to preallocate the data structure that we pass back 57 // the metadata in. 58 // 59 // C) We shouldn't guard writes with a lock that also guards reads, since the 60 // read lock is held from the time that the sampling thread requests that a 61 // thread be suspended up to the time that the thread is resumed. If all 62 // metadata writes block their thread during that time, we're very likely to 63 // block all Chrome threads. 64 // 65 // Ramifications: 66 // 67 // - We use two locks to guard the metadata: a read lock and a write 68 // lock. Only the write lock is required to write into the metadata, and 69 // only the read lock is required to read the metadata. 70 // 71 // - Because we can't guard reads and writes with the same lock, we have to 72 // face the possibility of writes occurring during a read. This is 73 // especially problematic because there's no way to read both the key and 74 // value for an item atomically without using mutexes, which violates 75 // constraint A). If the sampling thread were to see the following 76 // interleaving of reads and writes: 77 // 78 // * Reader thread reads key for slot 0 79 // * Writer thread removes item at slot 0 80 // * Writer thread creates new item with different key in slot 0 81 // * Reader thread reads value for slot 0 82 // 83 // then the reader would see an invalid value for the given key. Because 84 // of this possibility, we keep slots reserved for a specific key even 85 // after that item has been removed. We reclaim these slots on a 86 // best-effort basis during writes when the metadata recorder has become 87 // sufficiently full and we can acquire the read lock. 88 // 89 // - We use state stored in atomic data types to ensure that readers and 90 // writers are synchronized about where data should be written to and 91 // read from. We must use atomic data types to guarantee that there's no 92 // instruction during a write after which the recorder is in an 93 // inconsistent state that might yield garbage data for a reader. 94 // 95 // Here are a few of the many states the recorder can be in: 96 // 97 // - No thread is using the recorder. 98 // 99 // - A single writer is writing into the recorder without a simultaneous read. 100 // The write will succeed. 101 // 102 // - A reader is reading from the recorder without a simultaneous write. The 103 // read will succeed. 104 // 105 // - Multiple writers attempt to write into the recorder simultaneously. All 106 // writers but one will block because only one can hold the write lock. 107 // 108 // - A writer is writing into the recorder, which hasn't reached the threshold 109 // at which it will try to reclaim inactive slots. The writer won't try to 110 // acquire the read lock to reclaim inactive slots. The reader will therefore 111 // be able to immediately acquire the read lock, suspend the target thread, 112 // and read the metadata. 113 // 114 // - A writer is writing into the recorder, the recorder has reached the 115 // threshold at which it needs to reclaim inactive slots, and the writer 116 // thread is now in the middle of reclaiming those slots when a reader 117 // arrives. The reader will try to acquire the read lock before suspending the 118 // thread but will block until the writer thread finishes reclamation and 119 // releases the read lock. The reader will then be able to acquire the read 120 // lock and suspend the target thread. 121 // 122 // - A reader is reading the recorder when a writer attempts to write. The write 123 // will be successful. However, if the writer deems it necessary to reclaim 124 // inactive slots, it will skip doing so because it won't be able to acquire 125 // the read lock. 126 class BASE_EXPORT MetadataRecorder { 127 public: 128 MetadataRecorder(); 129 virtual ~MetadataRecorder(); 130 MetadataRecorder(const MetadataRecorder&) = delete; 131 MetadataRecorder& operator=(const MetadataRecorder&) = delete; 132 133 struct BASE_EXPORT Item { 134 Item(uint64_t name_hash, 135 absl::optional<int64_t> key, 136 absl::optional<PlatformThreadId> thread_id, 137 int64_t value); 138 Item(); 139 140 Item(const Item& other); 141 Item& operator=(const Item& other); 142 143 // The hash of the metadata name, as produced by HashMetricName(). 144 uint64_t name_hash; 145 // The key if specified when setting the item. 146 absl::optional<int64_t> key; 147 // The thread_id is captured from the current thread for a thread-scoped 148 // item. 149 absl::optional<PlatformThreadId> thread_id; 150 // The value of the metadata item. 151 int64_t value; 152 }; 153 static constexpr size_t MAX_METADATA_COUNT = 50; 154 typedef std::array<Item, MAX_METADATA_COUNT> ItemArray; 155 156 // Sets a value for a (|name_hash|, |key|, |thread_id|) tuple, overwriting any 157 // value previously set for the tuple. Nullopt keys are treated as just 158 // another key state for the purpose of associating values. 159 void Set(uint64_t name_hash, 160 absl::optional<int64_t> key, 161 absl::optional<PlatformThreadId> thread_id, 162 int64_t value); 163 164 // Removes the item with the specified name hash and optional key. Has no 165 // effect if such an item does not exist. 166 void Remove(uint64_t name_hash, 167 absl::optional<int64_t> key, 168 absl::optional<PlatformThreadId> thread_id); 169 170 // An object that provides access to a MetadataRecorder's items and holds the 171 // necessary exclusive read lock until the object is destroyed. Reclaiming of 172 // inactive slots in the recorder can't occur while this object lives, so it 173 // should be created as soon before it's needed as possible and released as 174 // soon as possible. 175 // 176 // This object should be created *before* suspending the target thread and 177 // destroyed after resuming the target thread. Otherwise, that thread might be 178 // suspended while reclaiming inactive slots and holding the read lock, which 179 // would cause the sampling thread to deadlock. 180 // 181 // Example usage: 182 // 183 // MetadataRecorder r; 184 // base::MetadataRecorder::ItemArray arr; 185 // size_t item_count; 186 // ... 187 // { 188 // MetadtaRecorder::MetadataProvider provider; 189 // item_count = provider.GetItems(arr); 190 // } 191 class SCOPED_LOCKABLE BASE_EXPORT MetadataProvider { 192 public: 193 // Acquires an exclusive read lock on the metadata recorder which is held 194 // until the object is destroyed. 195 explicit MetadataProvider(MetadataRecorder* metadata_recorder, 196 PlatformThreadId thread_id) 197 EXCLUSIVE_LOCK_FUNCTION(metadata_recorder_->read_lock_); 198 ~MetadataProvider() UNLOCK_FUNCTION(); 199 MetadataProvider(const MetadataProvider&) = delete; 200 MetadataProvider& operator=(const MetadataProvider&) = delete; 201 202 // Retrieves the first |available_slots| items in the metadata recorder for 203 // |thread_id| and copies them into |items|, returning the number of 204 // metadata items that were copied. To ensure that all items can be copied, 205 // |available slots| should be greater than or equal to 206 // |MAX_METADATA_COUNT|. Requires NO_THREAD_SAFETY_ANALYSIS because clang's 207 // analyzer doesn't understand the cross-class locking used in this class' 208 // implementation. 209 size_t GetItems(ItemArray* const items) const NO_THREAD_SAFETY_ANALYSIS; 210 211 private: 212 const raw_ptr<const MetadataRecorder> metadata_recorder_; 213 PlatformThreadId thread_id_; 214 base::AutoLock auto_lock_; 215 }; 216 217 private: 218 // TODO(charliea): Support large quantities of metadata efficiently. 219 struct ItemInternal { 220 ItemInternal(); 221 ~ItemInternal(); 222 223 // Indicates whether the metadata item is still active (i.e. not removed). 224 // 225 // Requires atomic reads and writes to avoid word tearing when reading and 226 // writing unsynchronized. Requires acquire/release semantics to ensure that 227 // the other state in this struct is visible to the reading thread before it 228 // is marked as active. 229 std::atomic<bool> is_active{false}; 230 231 // Neither name_hash, key or thread_id require atomicity or memory order 232 // constraints because no reader will attempt to read them mid-write. 233 // Specifically, readers wait until |is_active| is true to read them. 234 // Because |is_active| is always stored with a memory_order_release fence, 235 // we're guaranteed that |name_hash|, |key| and |thread_id| will be finished 236 // writing before |is_active| is set to true. 237 uint64_t name_hash; 238 absl::optional<int64_t> key; 239 absl::optional<PlatformThreadId> thread_id; 240 241 // Requires atomic reads and writes to avoid word tearing when updating an 242 // existing item unsynchronized. Does not require acquire/release semantics 243 // because we rely on the |is_active| acquire/release semantics to ensure 244 // that an item is fully created before we attempt to read it. 245 std::atomic<int64_t> value; 246 }; 247 248 // Attempts to free slots in the metadata map that are currently allocated to 249 // inactive items. May fail silently if the read lock is already held, in 250 // which case no slots will be freed. Returns the number of item slots used 251 // after the reclamation. 252 size_t TryReclaimInactiveSlots(size_t item_slots_used) 253 EXCLUSIVE_LOCKS_REQUIRED(write_lock_) LOCKS_EXCLUDED(read_lock_); 254 // Updates item_slots_used_ to reflect the new item count and returns the 255 // number of item slots used after the reclamation. 256 size_t ReclaimInactiveSlots(size_t item_slots_used) 257 EXCLUSIVE_LOCKS_REQUIRED(write_lock_) 258 EXCLUSIVE_LOCKS_REQUIRED(read_lock_); 259 260 // Retrieves items in the metadata recorder that are active for |thread_id| 261 // and copies them into |items|, returning the number of metadata items that 262 // were copied. 263 size_t GetItems(ItemArray* const items, PlatformThreadId thread_id) const 264 EXCLUSIVE_LOCKS_REQUIRED(read_lock_); 265 266 // Metadata items that the recorder has seen. Rather than implementing the 267 // metadata recorder as a dense array, we implement it as a sparse array where 268 // removed metadata items keep their slot with their |is_active| bit set to 269 // false. This avoids race conditions caused by reusing slots that might 270 // otherwise cause mismatches between metadata name hashes and values. 271 // 272 // For the rationale behind this design (along with others considered), see 273 // https://docs.google.com/document/d/18shLhVwuFbLl_jKZxCmOfRB98FmNHdKl0yZZZ3aEO4U/edit#. 274 std::array<ItemInternal, MAX_METADATA_COUNT> items_; 275 276 // The number of item slots used in the metadata map. 277 // 278 // Requires atomic reads and writes to avoid word tearing when reading and 279 // writing unsynchronized. Requires acquire/release semantics to ensure that a 280 // newly-allocated slot is fully initialized before the reader becomes aware 281 // of its existence. 282 std::atomic<size_t> item_slots_used_{0}; 283 284 // The number of item slots occupied by inactive items. 285 size_t inactive_item_count_ GUARDED_BY(write_lock_) = 0; 286 287 // A lock that guards against multiple threads trying to manipulate items_, 288 // item_slots_used_, or inactive_item_count_ at the same time. 289 base::Lock write_lock_; 290 291 // A lock that guards against a reader trying to read items_ while inactive 292 // slots are being reclaimed. 293 base::Lock read_lock_; 294 }; 295 296 } // namespace base 297 298 #endif // BASE_PROFILER_METADATA_RECORDER_H_ 299