1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
17 #define TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
18
19 // clang-format off
20 // Required for IS_MOBILE_PLATFORM
21 #include "tensorflow/core/platform/platform.h"
22 // clang-format on
23
24 // We replace this implementation with a null implementation for mobile
25 // platforms.
26 #ifdef IS_MOBILE_PLATFORM
27 #define TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H // prevent accidental use
28 // of
29 // mobile_percentile_sampler.h
30 #include "tensorflow/core/lib/monitoring/mobile_percentile_sampler.h"
31 #undef TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H
32 #else
33
34 #include <cmath>
35 #include <map>
36
37 #include "tensorflow/core/lib/core/status.h"
38 #include "tensorflow/core/lib/monitoring/collection_registry.h"
39 #include "tensorflow/core/lib/monitoring/metric_def.h"
40 #include "tensorflow/core/lib/monitoring/types.h"
41 #include "tensorflow/core/platform/macros.h"
42 #include "tensorflow/core/platform/mutex.h"
43 #include "tensorflow/core/platform/thread_annotations.h"
44
45 namespace tensorflow {
46 namespace monitoring {
47
48 // PercentileSamplerCell stores each value of an PercentileSampler.
49 // The class uses a circular buffer to maintain a window of samples.
50 //
51 // This class is thread-safe.
52 class PercentileSamplerCell {
53 public:
PercentileSamplerCell(UnitOfMeasure unit_of_measure,std::vector<double> percentiles,size_t max_samples)54 PercentileSamplerCell(UnitOfMeasure unit_of_measure,
55 std::vector<double> percentiles, size_t max_samples)
56 : unit_of_measure_(unit_of_measure),
57 percentiles_(std::move(percentiles)),
58 samples_(max_samples),
59 num_samples_(0),
60 next_position_(0),
61 total_samples_(0),
62 accumulator_(0.0) {}
63
64 // Atomically adds a sample.
65 void Add(double sample);
66
67 Percentiles value() const;
68
69 private:
70 struct Sample {
71 bool operator<(const Sample& rhs) const { return value < rhs.value; }
72
73 uint64 nstime = 0;
74 double value = NAN;
75 };
76
77 std::vector<Sample> GetSamples(size_t* total_samples,
78 long double* accumulator) const;
79
80 mutable mutex mu_;
81 UnitOfMeasure unit_of_measure_;
82 const std::vector<double> percentiles_;
83 std::vector<Sample> samples_ TF_GUARDED_BY(mu_);
84 size_t num_samples_ TF_GUARDED_BY(mu_);
85 size_t next_position_ TF_GUARDED_BY(mu_);
86 size_t total_samples_ TF_GUARDED_BY(mu_);
87 long double accumulator_ TF_GUARDED_BY(mu_);
88
89 TF_DISALLOW_COPY_AND_ASSIGN(PercentileSamplerCell);
90 };
91
92 // A stateful class for updating a cumulative percentile sampled metric.
93 //
94 // This class stores, in each cell, up to max_samples values in a circular
95 // buffer, and returns the percentiles information as cell value.
96 //
97 // PercentileSampler allocates storage and maintains a cell for each value. You
98 // can retrieve an individual cell using a label-tuple and update it separately.
99 // This improves performance since operations related to retrieval, like
100 // map-indexing and locking, are avoided.
101 //
102 // This class is thread-safe.
103 template <int NumLabels>
104 class PercentileSampler {
105 public:
~PercentileSampler()106 ~PercentileSampler() {
107 // Deleted here, before the metric_def is destroyed.
108 registration_handle_.reset();
109 }
110
111 // Creates the metric based on the metric-definition arguments and buckets.
112 //
113 // Example;
114 // auto* sampler_with_label =
115 // PercentileSampler<1>::New({"/tensorflow/sampler",
116 // "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024,
117 // UnitOfMeasure::kTime);
118 static PercentileSampler* New(
119 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
120 metric_def,
121 std::vector<double> percentiles, size_t max_samples,
122 UnitOfMeasure unit_of_measure);
123
124 // Retrieves the cell for the specified labels, creating it on demand if
125 // not already present.
126 template <typename... Labels>
127 PercentileSamplerCell* GetCell(const Labels&... labels)
128 TF_LOCKS_EXCLUDED(mu_);
129
GetStatus()130 Status GetStatus() { return status_; }
131
132 private:
133 friend class PercentileSamplerCell;
134
PercentileSampler(const MetricDef<MetricKind::kCumulative,Percentiles,NumLabels> & metric_def,std::vector<double> percentiles,size_t max_samples,UnitOfMeasure unit_of_measure)135 PercentileSampler(const MetricDef<MetricKind::kCumulative, Percentiles,
136 NumLabels>& metric_def,
137 std::vector<double> percentiles, size_t max_samples,
138 UnitOfMeasure unit_of_measure)
139 : metric_def_(metric_def),
140 unit_of_measure_(unit_of_measure),
141 percentiles_(std::move(percentiles)),
142 max_samples_(max_samples),
143 registration_handle_(CollectionRegistry::Default()->Register(
144 &metric_def_, [&](MetricCollectorGetter getter) {
145 auto metric_collector = getter.Get(&metric_def_);
146 mutex_lock l(mu_);
147 for (const auto& cell : cells_) {
148 metric_collector.CollectValue(cell.first, cell.second.value());
149 }
150 })) {
151 if (registration_handle_) {
152 for (size_t i = 0; i < percentiles_.size(); ++i) {
153 if (percentiles_[i] < 0.0 || percentiles_[i] > 100.0) {
154 status_ = Status(tensorflow::error::Code::INVALID_ARGUMENT,
155 "Percentile values must be in [0, 100] range.");
156 break;
157 }
158 if (i + 1 < percentiles_.size() &&
159 percentiles_[i] >= percentiles_[i + 1]) {
160 status_ =
161 Status(tensorflow::error::Code::INVALID_ARGUMENT,
162 "Percentile values must be in strictly ascending order.");
163 break;
164 }
165 }
166 } else {
167 status_ = Status(tensorflow::error::Code::ALREADY_EXISTS,
168 "Another metric with the same name already exists.");
169 }
170 }
171
172 mutable mutex mu_;
173
174 Status status_;
175
176 // The metric definition. This will be used to identify the metric when we
177 // register it for collection.
178 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels> metric_def_;
179
180 UnitOfMeasure unit_of_measure_ = UnitOfMeasure::kNumber;
181
182 // The percentiles samples required for this metric.
183 const std::vector<double> percentiles_;
184
185 // The maximum size of the samples colected by the PercentileSamplerCell cell.
186 const size_t max_samples_ = 0;
187
188 // Registration handle with the CollectionRegistry.
189 std::unique_ptr<CollectionRegistry::RegistrationHandle> registration_handle_;
190
191 using LabelArray = std::array<string, NumLabels>;
192 // we need a container here that guarantees pointer stability of the value,
193 // namely, the pointer of the value should remain valid even after more cells
194 // are inserted.
195 std::map<LabelArray, PercentileSamplerCell> cells_ TF_GUARDED_BY(mu_);
196
197 TF_DISALLOW_COPY_AND_ASSIGN(PercentileSampler);
198 };
199
200 template <int NumLabels>
New(const MetricDef<MetricKind::kCumulative,Percentiles,NumLabels> & metric_def,std::vector<double> percentiles,size_t max_samples,UnitOfMeasure unit_of_measure)201 PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
202 const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
203 metric_def,
204 std::vector<double> percentiles, size_t max_samples,
205 UnitOfMeasure unit_of_measure) {
206 return new PercentileSampler<NumLabels>(metric_def, std::move(percentiles),
207 max_samples, unit_of_measure);
208 }
209
210 template <int NumLabels>
211 template <typename... Labels>
GetCell(const Labels &...labels)212 PercentileSamplerCell* PercentileSampler<NumLabels>::GetCell(
213 const Labels&... labels) TF_LOCKS_EXCLUDED(mu_) {
214 // Provides a more informative error message than the one during array
215 // construction below.
216 static_assert(
217 sizeof...(Labels) == NumLabels,
218 "Mismatch between PercentileSampler<NumLabels> and number of labels "
219 "provided in GetCell(...).");
220
221 const LabelArray& label_array = {{labels...}};
222 mutex_lock l(mu_);
223 const auto found_it = cells_.find(label_array);
224 if (found_it != cells_.end()) {
225 return &(found_it->second);
226 }
227 return &(cells_
228 .emplace(std::piecewise_construct,
229 std::forward_as_tuple(label_array),
230 std::forward_as_tuple(unit_of_measure_, percentiles_,
231 max_samples_))
232 .first->second);
233 }
234
235 } // namespace monitoring
236 } // namespace tensorflow
237
238 #endif // IS_MOBILE_PLATFORM
239 #endif // TENSORFLOW_CORE_LIB_MONITORING_PERCENTILE_SAMPLER_H_
240