1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
17 #define TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
18
19 // clang-format off
20 // Required for IS_MOBILE_PLATFORM
21 #include "tensorflow/core/platform/platform.h"
22 // clang-format on
23
24 // We replace this implementation with a null implementation for mobile
25 // platforms.
26 #ifdef IS_MOBILE_PLATFORM
27
28 #include "tensorflow/core/lib/core/status.h"
29 #include "tensorflow/core/lib/monitoring/collection_registry.h"
30 #include "tensorflow/core/lib/monitoring/metric_def.h"
31 #include "tensorflow/core/lib/monitoring/types.h"
32 #include "tensorflow/core/platform/macros.h"
33
34 namespace tensorflow {
35 namespace monitoring {
36
37 class PercentileSamplerCell {
38 public:
Add(double sample)39 void Add(double sample) {}
40
value()41 Percentiles value() const { return Percentiles(); }
42 };
43
44 template <int NumLabels>
45 class PercentileSampler {
46 public:
47 static PercentileSampler* New(
48 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
49 metric_def,
50 std::vector<double> percentiles, size_t max_samples,
51 UnitOfMeasure unit_of_measure);
52
53 template <typename... Labels>
GetCell(const Labels &...labels)54 PercentileSamplerCell* GetCell(const Labels&... labels) {
55 return &default_cell_;
56 }
57
GetStatus()58 Status GetStatus() { return Status::OK(); }
59
60 private:
61 PercentileSamplerCell default_cell_;
62
63 PercentileSampler() = default;
64
65 TF_DISALLOW_COPY_AND_ASSIGN(PercentileSampler);
66 };
67
68 template <int NumLabels>
New(const MetricDef<MetricKind::kCumulative,Percentiles,NumLabels> &,std::vector<double>,size_t,UnitOfMeasure)69 PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
70 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
71 /* metric_def */,
72 std::vector<double> /* percentiles */, size_t /* max_samples */,
73 UnitOfMeasure /* unit_of_measure */) {
74 return new PercentileSampler<NumLabels>();
75 }
76
77 } // namespace monitoring
78 } // namespace tensorflow
79
80 #else // IS_MOBILE_PLATFORM
81
82 #include <cmath>
83 #include <map>
84
85 #include "tensorflow/core/lib/core/status.h"
86 #include "tensorflow/core/lib/monitoring/collection_registry.h"
87 #include "tensorflow/core/lib/monitoring/metric_def.h"
88 #include "tensorflow/core/lib/monitoring/types.h"
89 #include "tensorflow/core/platform/macros.h"
90 #include "tensorflow/core/platform/mutex.h"
91 #include "tensorflow/core/platform/thread_annotations.h"
92
93 namespace tensorflow {
94 namespace monitoring {
95
96 // PercentileSamplerCell stores each value of an PercentileSampler.
97 // The class uses a circular buffer to maintain a window of samples.
98 //
99 // This class is thread-safe.
100 class PercentileSamplerCell {
101 public:
PercentileSamplerCell(UnitOfMeasure unit_of_measure,std::vector<double> percentiles,size_t max_samples)102 PercentileSamplerCell(UnitOfMeasure unit_of_measure,
103 std::vector<double> percentiles, size_t max_samples)
104 : unit_of_measure_(unit_of_measure),
105 percentiles_(std::move(percentiles)),
106 samples_(max_samples),
107 num_samples_(0),
108 next_position_(0),
109 total_samples_(0),
110 accumulator_(0.0) {}
111
112 // Atomically adds a sample.
113 void Add(double sample);
114
115 Percentiles value() const;
116
117 private:
118 struct Sample {
119 bool operator<(const Sample& rhs) const { return value < rhs.value; }
120
121 uint64 nstime = 0;
122 double value = NAN;
123 };
124
125 std::vector<Sample> GetSamples(size_t* total_samples,
126 long double* accumulator) const;
127
128 mutable mutex mu_;
129 UnitOfMeasure unit_of_measure_;
130 const std::vector<double> percentiles_;
131 std::vector<Sample> samples_ TF_GUARDED_BY(mu_);
132 size_t num_samples_ TF_GUARDED_BY(mu_);
133 size_t next_position_ TF_GUARDED_BY(mu_);
134 size_t total_samples_ TF_GUARDED_BY(mu_);
135 long double accumulator_ TF_GUARDED_BY(mu_);
136
137 TF_DISALLOW_COPY_AND_ASSIGN(PercentileSamplerCell);
138 };
139
140 // A stateful class for updating a cumulative percentile sampled metric.
141 //
142 // This class stores, in each cell, up to max_samples values in a circular
143 // buffer, and returns the percentiles information as cell value.
144 //
145 // PercentileSampler allocates storage and maintains a cell for each value. You
146 // can retrieve an individual cell using a label-tuple and update it separately.
147 // This improves performance since operations related to retrieval, like
148 // map-indexing and locking, are avoided.
149 //
150 // This class is thread-safe.
151 template <int NumLabels>
152 class PercentileSampler {
153 public:
~PercentileSampler()154 ~PercentileSampler() {
155 // Deleted here, before the metric_def is destroyed.
156 registration_handle_.reset();
157 }
158
159 // Creates the metric based on the metric-definition arguments and buckets.
160 //
161 // Example;
162 // auto* sampler_with_label =
163 // PercentileSampler<1>::New({"/tensorflow/sampler",
164 // "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024,
165 // UnitOfMeasure::kTime);
166 static PercentileSampler* New(
167 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
168 metric_def,
169 std::vector<double> percentiles, size_t max_samples,
170 UnitOfMeasure unit_of_measure);
171
172 // Retrieves the cell for the specified labels, creating it on demand if
173 // not already present.
174 template <typename... Labels>
175 PercentileSamplerCell* GetCell(const Labels&... labels)
176 TF_LOCKS_EXCLUDED(mu_);
177
GetStatus()178 Status GetStatus() { return status_; }
179
180 private:
181 friend class PercentileSamplerCell;
182
PercentileSampler(const MetricDef<MetricKind::kCumulative,Percentiles,NumLabels> & metric_def,std::vector<double> percentiles,size_t max_samples,UnitOfMeasure unit_of_measure)183 PercentileSampler(const MetricDef<MetricKind::kCumulative, Percentiles,
184 NumLabels>& metric_def,
185 std::vector<double> percentiles, size_t max_samples,
186 UnitOfMeasure unit_of_measure)
187 : metric_def_(metric_def),
188 unit_of_measure_(unit_of_measure),
189 percentiles_(std::move(percentiles)),
190 max_samples_(max_samples),
191 registration_handle_(CollectionRegistry::Default()->Register(
192 &metric_def_, [&](MetricCollectorGetter getter) {
193 auto metric_collector = getter.Get(&metric_def_);
194 mutex_lock l(mu_);
195 for (const auto& cell : cells_) {
196 metric_collector.CollectValue(cell.first, cell.second.value());
197 }
198 })) {
199 if (registration_handle_) {
200 for (size_t i = 0; i < percentiles_.size(); ++i) {
201 if (percentiles_[i] < 0.0 || percentiles_[i] > 100.0) {
202 status_ = Status(tensorflow::error::Code::INVALID_ARGUMENT,
203 "Percentile values must be in [0, 100] range.");
204 break;
205 }
206 if (i + 1 < percentiles_.size() &&
207 percentiles_[i] >= percentiles_[i + 1]) {
208 status_ =
209 Status(tensorflow::error::Code::INVALID_ARGUMENT,
210 "Percentile values must be in strictly ascending order.");
211 break;
212 }
213 }
214 } else {
215 status_ = Status(tensorflow::error::Code::ALREADY_EXISTS,
216 "Another metric with the same name already exists.");
217 }
218 }
219
220 mutable mutex mu_;
221
222 Status status_;
223
224 // The metric definition. This will be used to identify the metric when we
225 // register it for collection.
226 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels> metric_def_;
227
228 UnitOfMeasure unit_of_measure_ = UnitOfMeasure::kNumber;
229
230 // The percentiles samples required for this metric.
231 const std::vector<double> percentiles_;
232
233 // The maximum size of the samples colected by the PercentileSamplerCell cell.
234 const size_t max_samples_ = 0;
235
236 // Registration handle with the CollectionRegistry.
237 std::unique_ptr<CollectionRegistry::RegistrationHandle> registration_handle_;
238
239 using LabelArray = std::array<string, NumLabels>;
240 // we need a container here that guarantees pointer stability of the value,
241 // namely, the pointer of the value should remain valid even after more cells
242 // are inserted.
243 std::map<LabelArray, PercentileSamplerCell> cells_ TF_GUARDED_BY(mu_);
244
245 TF_DISALLOW_COPY_AND_ASSIGN(PercentileSampler);
246 };
247
248 template <int NumLabels>
New(const MetricDef<MetricKind::kCumulative,Percentiles,NumLabels> & metric_def,std::vector<double> percentiles,size_t max_samples,UnitOfMeasure unit_of_measure)249 PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
250 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
251 metric_def,
252 std::vector<double> percentiles, size_t max_samples,
253 UnitOfMeasure unit_of_measure) {
254 return new PercentileSampler<NumLabels>(metric_def, std::move(percentiles),
255 max_samples, unit_of_measure);
256 }
257
258 template <int NumLabels>
259 template <typename... Labels>
GetCell(const Labels &...labels)260 PercentileSamplerCell* PercentileSampler<NumLabels>::GetCell(
261 const Labels&... labels) TF_LOCKS_EXCLUDED(mu_) {
262 // Provides a more informative error message than the one during array
263 // construction below.
264 static_assert(
265 sizeof...(Labels) == NumLabels,
266 "Mismatch between PercentileSampler<NumLabels> and number of labels "
267 "provided in GetCell(...).");
268
269 const LabelArray& label_array = {{labels...}};
270 mutex_lock l(mu_);
271 const auto found_it = cells_.find(label_array);
272 if (found_it != cells_.end()) {
273 return &(found_it->second);
274 }
275 return &(cells_
276 .emplace(std::piecewise_construct,
277 std::forward_as_tuple(label_array),
278 std::forward_as_tuple(unit_of_measure_, percentiles_,
279 max_samples_))
280 .first->second);
281 }
282
283 } // namespace monitoring
284 } // namespace tensorflow
285
286 #endif // IS_MOBILE_PLATFORM
287 #endif // TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
288