1 // Copyright 2023 The Pigweed Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 // use this file except in compliance with the License. You may obtain a copy of 5 // the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 // License for the specific language governing permissions and limitations under 13 // the License. 14 15 #pragma once 16 #include <pw_assert/check.h> 17 #include <pw_chrono/system_clock.h> 18 19 #include <algorithm> 20 #include <array> 21 #include <functional> 22 #include <optional> 23 #include <utility> 24 #include <vector> 25 26 namespace bt::internal { 27 28 // This class is not thread-safe. 29 // TODO(fxbug.dev/42150683): Store each retirement's timestamp in order to 30 // provide information like how much time the log depth represents and overall 31 // throughput (bytes/sec and packets/sec) 32 class RetireLog final { 33 public: 34 // Create a bounded buffer intended to hold recently retired PipelineMonitor 35 // tokens. It supports efficient querying of statistics about logged events. 36 // |min_depth| specifies the number of entries that must be logged before 37 // queries return meaningful results and must be non-zero. |max_depth| limits 38 // the number of recent entries that are kept in memory. Each entry is 39 // represented by Retired, so the log memory use is roughly sizeof(Retired) * 40 // max_depth. The designed range is between 100 and 100,000 entries deep. 41 explicit RetireLog(size_t min_depth, size_t max_depth); 42 43 ~RetireLog() = default; 44 45 // Add an entry to the log. If depth() is less than max_depth, the log is 46 // expanded. Otherwise, the oldest entry is replaced. This is a fast operation 47 // that does not allocate. 48 void Retire(size_t byte_count, pw::chrono::SystemClock::duration age); 49 50 // The current number of log entries. depth()51 [[nodiscard]] size_t depth() const { return buffer_.size(); } 52 53 // Compute the quantiles at cut points specified in |partitions| as numbers 54 // between 0 and 1. Each partition specifies a point in the distribution of 55 // |bytes_count|s in the log. Returns an array of |byte_count| entries 56 // corresponding to those points, if |depth()| is at least min_depth as 57 // provided to the ctor. Otherwise, returns std::nullopt. 58 // 59 // Cut points may, but do not need to, be evenly distributed, e.g. {.25, .5, 60 // .75} for quartiles. If a cut point is "between" log entry values, the 61 // nearest value is chosen without interpolation (e.g. for 0.5 with an even 62 // log depth, a biased median is returned rather than the average of the true 63 // median samples). 64 // 65 // TODO(fxbug.dev/42150683): Add a |max_age| parameter to window to only 66 // samples that are recent enough to be relevant 67 template <size_t NumQuantiles> 68 [[nodiscard]] std::optional<std::array<size_t, NumQuantiles>> ComputeByteCountQuantiles(std::array<double,NumQuantiles> partitions)69 ComputeByteCountQuantiles(std::array<double, NumQuantiles> partitions) const { 70 return ComputeQuantiles(partitions, &Retired::byte_count); 71 } 72 73 // Similar to ComputeByteCountQuantiles, but for the |age| durations logged in 74 // |Retire|. 75 template <size_t NumQuantiles> 76 [[nodiscard]] std::optional< 77 std::array<pw::chrono::SystemClock::duration, NumQuantiles>> ComputeAgeQuantiles(std::array<double,NumQuantiles> partitions)78 ComputeAgeQuantiles(std::array<double, NumQuantiles> partitions) const { 79 return ComputeQuantiles(partitions, &Retired::age); 80 } 81 82 private: 83 struct Retired { 84 size_t byte_count; 85 pw::chrono::SystemClock::duration age; 86 }; 87 88 // Helper function to build the index_sequence 89 template <typename ArrayT, typename PointerT> ComputeQuantiles(ArrayT partitions,PointerT element_ptr)90 [[nodiscard]] auto ComputeQuantiles(ArrayT partitions, 91 PointerT element_ptr) const { 92 return ComputeQuantilesImpl( 93 partitions, element_ptr, std::make_index_sequence<partitions.size()>()); 94 } 95 96 // Computes the indexes to sample the retire log as if it were sorted, then 97 // returns those samples in the same order as specified. |element_ptr| 98 // specifies which field in Retired to sort by. The log isn't actually fully 99 // sorted; instead, a k-selection algorithm is used for each sample. Assuming 100 // partitions.size() << depth(), this runs on average linearly to their 101 // product. 102 template <size_t NumQuantiles, typename ElementT, size_t... Indexes> 103 [[nodiscard]] std::optional<std::array<ElementT, NumQuantiles>> ComputeQuantilesImpl(std::array<double,NumQuantiles> partitions,ElementT Retired::* element_ptr,std::index_sequence<Indexes...>)104 ComputeQuantilesImpl(std::array<double, NumQuantiles> partitions, 105 ElementT Retired::* element_ptr, 106 std::index_sequence<Indexes...> /*unused*/) const { 107 if (depth() < min_depth_) { 108 return std::nullopt; 109 } 110 111 // Computing quantiles is done in-place with k-selection routines, so use a 112 // working copy that is reused across invocations to prevent re-allocation 113 // with each call 114 std::vector<ElementT>& elements = 115 std::get<std::vector<ElementT>>(quantile_scratchpads_); 116 elements.resize(depth()); 117 std::transform(buffer_.begin(), 118 buffer_.end(), 119 elements.begin(), 120 std::mem_fn(element_ptr)); 121 122 // The k-selection we use is std::nth_element, which conveniently does a 123 // partial sort about k. By pre-sorting values of k, each invocation of 124 // nth_element selects only from the elements that are greater than or equal 125 // to the previous selection. The values are sorted with their corresponding 126 // indexes to remember the original order for the output 127 std::array partitions_and_indexes = { 128 std::pair{partitions[Indexes], Indexes}...}; 129 std::sort(partitions_and_indexes.begin(), partitions_and_indexes.end()); 130 131 std::array<ElementT, NumQuantiles> quantiles; // output 132 auto unsorted_begin = elements.begin(); 133 for (auto [partition, index] : partitions_and_indexes) { 134 PW_CHECK(partition >= 0.); 135 PW_CHECK(partition <= 1.); 136 // Even though the last element is at index depth()-1, use depth() here 137 // instead to ensure the final (max) element has sufficient range 138 // representation. 139 const size_t cut_point = 140 static_cast<size_t>(static_cast<double>(depth()) * partition); 141 PW_CHECK(cut_point <= depth()); 142 143 // In the case that partition = 1.0, cut_point = depth(). Saturate to the 144 // final (max) element. 145 const size_t selection_offset = std::min(cut_point, depth() - 1); 146 const auto cut_iter = std::next(elements.begin(), selection_offset); 147 std::nth_element(unsorted_begin, cut_iter, elements.end()); 148 // Post-condition: the element at cut_iter is what it would be if 149 // |elements| were sorted, with all preceding elements no greater than 150 // *cut_iter and all successive elements no less than *cut_iter. 151 quantiles[index] = *cut_iter; 152 153 // Technically the next unsorted element is at cut_iter+1 but moving 154 // unsorted_begin past cut_iter causes problems if multiple values of 155 // |partition| are the same. 156 unsorted_begin = cut_iter; 157 } 158 return quantiles; 159 } 160 161 const size_t min_depth_; 162 const size_t max_depth_; 163 164 // Circular buffer of recently retired entries, kept in increasing retirement 165 // time order starting at the oldest at |next_insertion_index_|. Bounded to 166 // |max_depth_| in size. 167 std::vector<Retired> buffer_; 168 size_t next_insertion_index_ = 0; 169 170 // Used by ComputeQuantilesImpl to store type-dependent k-selection 171 // computation working data. We could probably save memory by using the same 172 // scratchpad for both byte_count and age, but it's not worth the extra code 173 // complexity at this time. 174 mutable std::tuple<std::vector<size_t>, 175 std::vector<pw::chrono::SystemClock::duration>> 176 quantile_scratchpads_; 177 }; 178 179 } // namespace bt::internal 180