• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xrt/xrt_metrics.h"
17 
18 #include "tensorflow/core/lib/monitoring/collection_registry.h"
19 #include "tensorflow/core/platform/regexp.h"
20 
21 namespace tensorflow {
22 namespace {
23 
24 static const size_t kMaxSamples = 1024;
25 
GetDefaultPercentiles()26 std::vector<double> GetDefaultPercentiles() {
27   return {25.0, 50.0, 80.0, 90.0, 95.0, 99.0};
28 }
29 
IsSelectedMetric(const xrt::XRTMetricsCollect & metrics,const string & name)30 bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics,
31                       const string& name) {
32   if (metrics.metrics_regex_size() == 0) {
33     return true;
34   }
35   for (auto& metric_regex : metrics.metrics_regex()) {
36     if (RE2::FullMatch(name, metric_regex)) {
37       return true;
38     }
39   }
40   return false;
41 }
42 
SetUnitOfMeasure(xrt::MetricValues * metrics,monitoring::UnitOfMeasure unit_of_measure)43 void SetUnitOfMeasure(xrt::MetricValues* metrics,
44                       monitoring::UnitOfMeasure unit_of_measure) {
45   switch (unit_of_measure) {
46     case monitoring::UnitOfMeasure::kNumber:
47       metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
48       break;
49     case monitoring::UnitOfMeasure::kTime:
50       metrics->set_unit_of_measure(xrt::MetricValues::TIME);
51       break;
52     case monitoring::UnitOfMeasure::kBytes:
53       metrics->set_unit_of_measure(xrt::MetricValues::BYTES);
54       break;
55   }
56 }
57 
AddMetrics(xrt::MetricsReport * report,const monitoring::PointSet & point_set)58 Status AddMetrics(xrt::MetricsReport* report,
59                   const monitoring::PointSet& point_set) {
60   for (auto& point : point_set.points) {
61     xrt::MetricValues* metrics = report->add_metrics();
62     metrics->set_name(point_set.metric_name);
63     if (point->value_type == monitoring::ValueType::kPercentiles) {
64       xrt::Percentiles* percentiles = metrics->mutable_percentiles_value();
65       SetUnitOfMeasure(metrics, point->percentiles_value.unit_of_measure);
66       percentiles->set_start_nstime(point->percentiles_value.start_nstime);
67       percentiles->set_end_nstime(point->percentiles_value.end_nstime);
68       percentiles->set_min_value(point->percentiles_value.min_value);
69       percentiles->set_max_value(point->percentiles_value.max_value);
70       percentiles->set_mean(point->percentiles_value.mean);
71       percentiles->set_stddev(point->percentiles_value.stddev);
72       percentiles->set_num_samples(point->percentiles_value.num_samples);
73       percentiles->set_total_samples(point->percentiles_value.total_samples);
74       percentiles->set_accumulator(point->percentiles_value.accumulator);
75       for (auto& pct_point : point->percentiles_value.points) {
76         xrt::Percentiles::Point* xpoint = percentiles->add_points();
77         xpoint->set_percentile(pct_point.percentile);
78         xpoint->set_value(pct_point.value);
79       }
80     } else if (point->value_type == monitoring::ValueType::kInt64) {
81       metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
82       metrics->set_int64_value(point->int64_value);
83     }
84   }
85   return Status::OK();
86 }
87 
88 }  // namespace
89 
90 namespace xrt_metrics {
91 
GetAllocateCell()92 monitoring::PercentileSamplerCell* GetAllocateCell() {
93   static monitoring::PercentileSamplerCell* cell =
94       monitoring::PercentileSampler<0>::New(
95           {"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"},
96           GetDefaultPercentiles(), kMaxSamples,
97           monitoring::UnitOfMeasure::kTime)
98           ->GetCell();
99   return cell;
100 }
101 
GetAllocateUninitializedCell()102 monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() {
103   static monitoring::PercentileSamplerCell* cell =
104       monitoring::PercentileSampler<0>::New(
105           {"/tensorflow/xrt/ops/allocate_uninitialized",
106            "Tracks XRTAllocateUninitialized times"},
107           GetDefaultPercentiles(), kMaxSamples,
108           monitoring::UnitOfMeasure::kTime)
109           ->GetCell();
110   return cell;
111 }
112 
GetAllocateFromTensorCell()113 monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() {
114   static monitoring::PercentileSamplerCell* cell =
115       monitoring::PercentileSampler<0>::New(
116           {"/tensorflow/xrt/ops/allocate_from_tensor",
117            "Tracks XRTAllocateFromTensor times"},
118           GetDefaultPercentiles(), kMaxSamples,
119           monitoring::UnitOfMeasure::kTime)
120           ->GetCell();
121   return cell;
122 }
123 
GetSubTupleCell()124 monitoring::PercentileSamplerCell* GetSubTupleCell() {
125   static monitoring::PercentileSamplerCell* cell =
126       monitoring::PercentileSampler<0>::New(
127           {"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"},
128           GetDefaultPercentiles(), kMaxSamples,
129           monitoring::UnitOfMeasure::kTime)
130           ->GetCell();
131   return cell;
132 }
133 
GetMakeTupleCell()134 monitoring::PercentileSamplerCell* GetMakeTupleCell() {
135   static monitoring::PercentileSamplerCell* cell =
136       monitoring::PercentileSampler<0>::New(
137           {"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"},
138           GetDefaultPercentiles(), kMaxSamples,
139           monitoring::UnitOfMeasure::kTime)
140           ->GetCell();
141   return cell;
142 }
143 
GetReadLiteralCell()144 monitoring::PercentileSamplerCell* GetReadLiteralCell() {
145   static monitoring::PercentileSamplerCell* cell =
146       monitoring::PercentileSampler<0>::New(
147           {"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"},
148           GetDefaultPercentiles(), kMaxSamples,
149           monitoring::UnitOfMeasure::kTime)
150           ->GetCell();
151   return cell;
152 }
153 
GetReadToTensorCell()154 monitoring::PercentileSamplerCell* GetReadToTensorCell() {
155   static monitoring::PercentileSamplerCell* cell =
156       monitoring::PercentileSampler<0>::New(
157           {"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"},
158           GetDefaultPercentiles(), kMaxSamples,
159           monitoring::UnitOfMeasure::kTime)
160           ->GetCell();
161   return cell;
162 }
163 
GetWriteLiteralCell()164 monitoring::PercentileSamplerCell* GetWriteLiteralCell() {
165   static monitoring::PercentileSamplerCell* cell =
166       monitoring::PercentileSampler<0>::New(
167           {"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"},
168           GetDefaultPercentiles(), kMaxSamples,
169           monitoring::UnitOfMeasure::kTime)
170           ->GetCell();
171   return cell;
172 }
173 
GetReleaseAllocationCell()174 monitoring::PercentileSamplerCell* GetReleaseAllocationCell() {
175   static monitoring::PercentileSamplerCell* cell =
176       monitoring::PercentileSampler<0>::New(
177           {"/tensorflow/xrt/ops/release_allocation",
178            "Tracks XRTReleaseAllocation times"},
179           GetDefaultPercentiles(), kMaxSamples,
180           monitoring::UnitOfMeasure::kTime)
181           ->GetCell();
182   return cell;
183 }
184 
GetReleaseAllAllocationsCell()185 monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() {
186   static monitoring::PercentileSamplerCell* cell =
187       monitoring::PercentileSampler<0>::New(
188           {"/tensorflow/xrt/ops/release_all_allocations",
189            "Tracks XRTReleaseAllAllocations times"},
190           GetDefaultPercentiles(), kMaxSamples,
191           monitoring::UnitOfMeasure::kTime)
192           ->GetCell();
193   return cell;
194 }
195 
GetCompactAllocationsCell()196 monitoring::PercentileSamplerCell* GetCompactAllocationsCell() {
197   static monitoring::PercentileSamplerCell* cell =
198       monitoring::PercentileSampler<0>::New(
199           {"/tensorflow/xrt/ops/compact_allocations",
200            "Tracks XRTCompactAllocations times"},
201           GetDefaultPercentiles(), kMaxSamples,
202           monitoring::UnitOfMeasure::kTime)
203           ->GetCell();
204   return cell;
205 }
206 
GetCompileCell()207 monitoring::PercentileSamplerCell* GetCompileCell() {
208   static monitoring::PercentileSamplerCell* cell =
209       monitoring::PercentileSampler<0>::New(
210           {"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"},
211           GetDefaultPercentiles(), kMaxSamples,
212           monitoring::UnitOfMeasure::kTime)
213           ->GetCell();
214   return cell;
215 }
216 
GetReleaseCompilationCell()217 monitoring::PercentileSamplerCell* GetReleaseCompilationCell() {
218   static monitoring::PercentileSamplerCell* cell =
219       monitoring::PercentileSampler<0>::New(
220           {"/tensorflow/xrt/ops/release_compilation",
221            "Tracks XRTReleaseCompilationRef times"},
222           GetDefaultPercentiles(), kMaxSamples,
223           monitoring::UnitOfMeasure::kTime)
224           ->GetCell();
225   return cell;
226 }
227 
GetExecuteCell()228 monitoring::PercentileSamplerCell* GetExecuteCell() {
229   static monitoring::PercentileSamplerCell* cell =
230       monitoring::PercentileSampler<0>::New(
231           {"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"},
232           GetDefaultPercentiles(), kMaxSamples,
233           monitoring::UnitOfMeasure::kTime)
234           ->GetCell();
235   return cell;
236 }
237 
GetExecuteChainedCell()238 monitoring::PercentileSamplerCell* GetExecuteChainedCell() {
239   static monitoring::PercentileSamplerCell* cell =
240       monitoring::PercentileSampler<0>::New(
241           {"/tensorflow/xrt/ops/execute_chained",
242            "Tracks XRTExecuteChained times"},
243           GetDefaultPercentiles(), kMaxSamples,
244           monitoring::UnitOfMeasure::kTime)
245           ->GetCell();
246   return cell;
247 }
248 
GetMemoryCompactCell()249 monitoring::PercentileSamplerCell* GetMemoryCompactCell() {
250   static monitoring::PercentileSamplerCell* cell =
251       monitoring::PercentileSampler<0>::New(
252           {"/tensorflow/xrt/memory_manager/compaction",
253            "Tracks XRT memory manager memory compaction times"},
254           GetDefaultPercentiles(), kMaxSamples,
255           monitoring::UnitOfMeasure::kTime)
256           ->GetCell();
257   return cell;
258 }
259 
GetTryFreeMemoryCell()260 monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() {
261   static monitoring::PercentileSamplerCell* cell =
262       monitoring::PercentileSampler<0>::New(
263           {"/tensorflow/xrt/memory_manager/try_free_memory",
264            "Tracks XRT memory manager times in trying to "
265            "free memory by swpping device memory to host memory"},
266           GetDefaultPercentiles(), kMaxSamples,
267           monitoring::UnitOfMeasure::kTime)
268           ->GetCell();
269   return cell;
270 }
271 
272 }  // namespace xrt_metrics
273 
CollectMetrics(const xrt::XRTMetricsCollect & metrics)274 xla::StatusOr<xrt::MetricsReport> CollectMetrics(
275     const xrt::XRTMetricsCollect& metrics) {
276   auto* collection_registry = monitoring::CollectionRegistry::Default();
277   monitoring::CollectionRegistry::CollectMetricsOptions options;
278   options.collect_metric_descriptors = false;
279   auto collected_metrics = collection_registry->CollectMetrics(options);
280   xrt::MetricsReport report;
281   for (auto& name_pointset : collected_metrics->point_set_map) {
282     if (IsSelectedMetric(metrics, name_pointset.first)) {
283       TF_RETURN_IF_ERROR(AddMetrics(&report, *name_pointset.second));
284     }
285   }
286   return std::move(report);
287 }
288 
289 }  // namespace tensorflow
290