1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xrt/xrt_metrics.h"
17
18 #include "tensorflow/core/lib/monitoring/collection_registry.h"
19 #include "tensorflow/core/platform/regexp.h"
20
21 namespace tensorflow {
22 namespace {
23
24 static const size_t kMaxSamples = 1024;
25
GetDefaultPercentiles()26 std::vector<double> GetDefaultPercentiles() {
27 return {25.0, 50.0, 80.0, 90.0, 95.0, 99.0};
28 }
29
IsSelectedMetric(const xrt::XRTMetricsCollect & metrics,const string & name)30 bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics,
31 const string& name) {
32 if (metrics.metrics_regex_size() == 0) {
33 return true;
34 }
35 for (auto& metric_regex : metrics.metrics_regex()) {
36 if (RE2::FullMatch(name, metric_regex)) {
37 return true;
38 }
39 }
40 return false;
41 }
42
SetUnitOfMeasure(xrt::MetricValues * metrics,monitoring::UnitOfMeasure unit_of_measure)43 void SetUnitOfMeasure(xrt::MetricValues* metrics,
44 monitoring::UnitOfMeasure unit_of_measure) {
45 switch (unit_of_measure) {
46 case monitoring::UnitOfMeasure::kNumber:
47 metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
48 break;
49 case monitoring::UnitOfMeasure::kTime:
50 metrics->set_unit_of_measure(xrt::MetricValues::TIME);
51 break;
52 case monitoring::UnitOfMeasure::kBytes:
53 metrics->set_unit_of_measure(xrt::MetricValues::BYTES);
54 break;
55 }
56 }
57
AddMetrics(xrt::MetricsReport * report,const monitoring::PointSet & point_set)58 Status AddMetrics(xrt::MetricsReport* report,
59 const monitoring::PointSet& point_set) {
60 for (auto& point : point_set.points) {
61 xrt::MetricValues* metrics = report->add_metrics();
62 metrics->set_name(point_set.metric_name);
63 if (point->value_type == monitoring::ValueType::kPercentiles) {
64 xrt::Percentiles* percentiles = metrics->mutable_percentiles_value();
65 SetUnitOfMeasure(metrics, point->percentiles_value.unit_of_measure);
66 percentiles->set_start_nstime(point->percentiles_value.start_nstime);
67 percentiles->set_end_nstime(point->percentiles_value.end_nstime);
68 percentiles->set_min_value(point->percentiles_value.min_value);
69 percentiles->set_max_value(point->percentiles_value.max_value);
70 percentiles->set_mean(point->percentiles_value.mean);
71 percentiles->set_stddev(point->percentiles_value.stddev);
72 percentiles->set_num_samples(point->percentiles_value.num_samples);
73 percentiles->set_total_samples(point->percentiles_value.total_samples);
74 percentiles->set_accumulator(point->percentiles_value.accumulator);
75 for (auto& pct_point : point->percentiles_value.points) {
76 xrt::Percentiles::Point* xpoint = percentiles->add_points();
77 xpoint->set_percentile(pct_point.percentile);
78 xpoint->set_value(pct_point.value);
79 }
80 } else if (point->value_type == monitoring::ValueType::kInt64) {
81 metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
82 metrics->set_int64_value(point->int64_value);
83 }
84 }
85 return Status::OK();
86 }
87
88 } // namespace
89
90 namespace xrt_metrics {
91
GetAllocateCell()92 monitoring::PercentileSamplerCell* GetAllocateCell() {
93 static monitoring::PercentileSamplerCell* cell =
94 monitoring::PercentileSampler<0>::New(
95 {"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"},
96 GetDefaultPercentiles(), kMaxSamples,
97 monitoring::UnitOfMeasure::kTime)
98 ->GetCell();
99 return cell;
100 }
101
GetAllocateUninitializedCell()102 monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() {
103 static monitoring::PercentileSamplerCell* cell =
104 monitoring::PercentileSampler<0>::New(
105 {"/tensorflow/xrt/ops/allocate_uninitialized",
106 "Tracks XRTAllocateUninitialized times"},
107 GetDefaultPercentiles(), kMaxSamples,
108 monitoring::UnitOfMeasure::kTime)
109 ->GetCell();
110 return cell;
111 }
112
GetAllocateFromTensorCell()113 monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() {
114 static monitoring::PercentileSamplerCell* cell =
115 monitoring::PercentileSampler<0>::New(
116 {"/tensorflow/xrt/ops/allocate_from_tensor",
117 "Tracks XRTAllocateFromTensor times"},
118 GetDefaultPercentiles(), kMaxSamples,
119 monitoring::UnitOfMeasure::kTime)
120 ->GetCell();
121 return cell;
122 }
123
GetSubTupleCell()124 monitoring::PercentileSamplerCell* GetSubTupleCell() {
125 static monitoring::PercentileSamplerCell* cell =
126 monitoring::PercentileSampler<0>::New(
127 {"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"},
128 GetDefaultPercentiles(), kMaxSamples,
129 monitoring::UnitOfMeasure::kTime)
130 ->GetCell();
131 return cell;
132 }
133
GetMakeTupleCell()134 monitoring::PercentileSamplerCell* GetMakeTupleCell() {
135 static monitoring::PercentileSamplerCell* cell =
136 monitoring::PercentileSampler<0>::New(
137 {"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"},
138 GetDefaultPercentiles(), kMaxSamples,
139 monitoring::UnitOfMeasure::kTime)
140 ->GetCell();
141 return cell;
142 }
143
GetReadLiteralCell()144 monitoring::PercentileSamplerCell* GetReadLiteralCell() {
145 static monitoring::PercentileSamplerCell* cell =
146 monitoring::PercentileSampler<0>::New(
147 {"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"},
148 GetDefaultPercentiles(), kMaxSamples,
149 monitoring::UnitOfMeasure::kTime)
150 ->GetCell();
151 return cell;
152 }
153
GetReadToTensorCell()154 monitoring::PercentileSamplerCell* GetReadToTensorCell() {
155 static monitoring::PercentileSamplerCell* cell =
156 monitoring::PercentileSampler<0>::New(
157 {"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"},
158 GetDefaultPercentiles(), kMaxSamples,
159 monitoring::UnitOfMeasure::kTime)
160 ->GetCell();
161 return cell;
162 }
163
GetWriteLiteralCell()164 monitoring::PercentileSamplerCell* GetWriteLiteralCell() {
165 static monitoring::PercentileSamplerCell* cell =
166 monitoring::PercentileSampler<0>::New(
167 {"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"},
168 GetDefaultPercentiles(), kMaxSamples,
169 monitoring::UnitOfMeasure::kTime)
170 ->GetCell();
171 return cell;
172 }
173
GetReleaseAllocationCell()174 monitoring::PercentileSamplerCell* GetReleaseAllocationCell() {
175 static monitoring::PercentileSamplerCell* cell =
176 monitoring::PercentileSampler<0>::New(
177 {"/tensorflow/xrt/ops/release_allocation",
178 "Tracks XRTReleaseAllocation times"},
179 GetDefaultPercentiles(), kMaxSamples,
180 monitoring::UnitOfMeasure::kTime)
181 ->GetCell();
182 return cell;
183 }
184
GetReleaseAllAllocationsCell()185 monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() {
186 static monitoring::PercentileSamplerCell* cell =
187 monitoring::PercentileSampler<0>::New(
188 {"/tensorflow/xrt/ops/release_all_allocations",
189 "Tracks XRTReleaseAllAllocations times"},
190 GetDefaultPercentiles(), kMaxSamples,
191 monitoring::UnitOfMeasure::kTime)
192 ->GetCell();
193 return cell;
194 }
195
GetCompactAllocationsCell()196 monitoring::PercentileSamplerCell* GetCompactAllocationsCell() {
197 static monitoring::PercentileSamplerCell* cell =
198 monitoring::PercentileSampler<0>::New(
199 {"/tensorflow/xrt/ops/compact_allocations",
200 "Tracks XRTCompactAllocations times"},
201 GetDefaultPercentiles(), kMaxSamples,
202 monitoring::UnitOfMeasure::kTime)
203 ->GetCell();
204 return cell;
205 }
206
GetCompileCell()207 monitoring::PercentileSamplerCell* GetCompileCell() {
208 static monitoring::PercentileSamplerCell* cell =
209 monitoring::PercentileSampler<0>::New(
210 {"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"},
211 GetDefaultPercentiles(), kMaxSamples,
212 monitoring::UnitOfMeasure::kTime)
213 ->GetCell();
214 return cell;
215 }
216
GetReleaseCompilationCell()217 monitoring::PercentileSamplerCell* GetReleaseCompilationCell() {
218 static monitoring::PercentileSamplerCell* cell =
219 monitoring::PercentileSampler<0>::New(
220 {"/tensorflow/xrt/ops/release_compilation",
221 "Tracks XRTReleaseCompilationRef times"},
222 GetDefaultPercentiles(), kMaxSamples,
223 monitoring::UnitOfMeasure::kTime)
224 ->GetCell();
225 return cell;
226 }
227
GetExecuteCell()228 monitoring::PercentileSamplerCell* GetExecuteCell() {
229 static monitoring::PercentileSamplerCell* cell =
230 monitoring::PercentileSampler<0>::New(
231 {"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"},
232 GetDefaultPercentiles(), kMaxSamples,
233 monitoring::UnitOfMeasure::kTime)
234 ->GetCell();
235 return cell;
236 }
237
GetExecuteChainedCell()238 monitoring::PercentileSamplerCell* GetExecuteChainedCell() {
239 static monitoring::PercentileSamplerCell* cell =
240 monitoring::PercentileSampler<0>::New(
241 {"/tensorflow/xrt/ops/execute_chained",
242 "Tracks XRTExecuteChained times"},
243 GetDefaultPercentiles(), kMaxSamples,
244 monitoring::UnitOfMeasure::kTime)
245 ->GetCell();
246 return cell;
247 }
248
GetMemoryCompactCell()249 monitoring::PercentileSamplerCell* GetMemoryCompactCell() {
250 static monitoring::PercentileSamplerCell* cell =
251 monitoring::PercentileSampler<0>::New(
252 {"/tensorflow/xrt/memory_manager/compaction",
253 "Tracks XRT memory manager memory compaction times"},
254 GetDefaultPercentiles(), kMaxSamples,
255 monitoring::UnitOfMeasure::kTime)
256 ->GetCell();
257 return cell;
258 }
259
GetTryFreeMemoryCell()260 monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() {
261 static monitoring::PercentileSamplerCell* cell =
262 monitoring::PercentileSampler<0>::New(
263 {"/tensorflow/xrt/memory_manager/try_free_memory",
264 "Tracks XRT memory manager times in trying to "
265 "free memory by swpping device memory to host memory"},
266 GetDefaultPercentiles(), kMaxSamples,
267 monitoring::UnitOfMeasure::kTime)
268 ->GetCell();
269 return cell;
270 }
271
272 } // namespace xrt_metrics
273
CollectMetrics(const xrt::XRTMetricsCollect & metrics)274 xla::StatusOr<xrt::MetricsReport> CollectMetrics(
275 const xrt::XRTMetricsCollect& metrics) {
276 auto* collection_registry = monitoring::CollectionRegistry::Default();
277 monitoring::CollectionRegistry::CollectMetricsOptions options;
278 options.collect_metric_descriptors = false;
279 auto collected_metrics = collection_registry->CollectMetrics(options);
280 xrt::MetricsReport report;
281 for (auto& name_pointset : collected_metrics->point_set_map) {
282 if (IsSelectedMetric(metrics, name_pointset.first)) {
283 TF_RETURN_IF_ERROR(AddMetrics(&report, *name_pointset.second));
284 }
285 }
286 return std::move(report);
287 }
288
289 } // namespace tensorflow
290