1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
17
18 #include <algorithm>
19 #include <string>
20 #include <tuple>
21 #include <type_traits>
22 #include <utility>
23 #include <vector>
24
25 #include "absl/algorithm/container.h"
26 #include "absl/container/flat_hash_map.h"
27 #include "absl/container/flat_hash_set.h"
28 #include "absl/strings/str_cat.h"
29 #include "absl/strings/string_view.h"
30 #include "absl/types/optional.h"
31 #include "tensorflow/core/framework/types.h"
32 #include "tensorflow/core/framework/types.pb.h"
33 #include "tensorflow/core/lib/gtl/map_util.h"
34 #include "tensorflow/core/platform/logging.h"
35 #include "tensorflow/core/platform/protobuf.h"
36 #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
37 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
38 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
39 #include "tensorflow/core/profiler/utils/xplane_schema.h"
40 #include "tensorflow/core/profiler/utils/xplane_utils.h"
41 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
42
43 namespace tensorflow {
44 namespace profiler {
45
46 namespace {
47
48 constexpr int64_t kInvalidStepId = -1;
49
50 // Index of the time-sorted memory_profile_snapshots list, and the
51 // MemoryActivityMetadata proto it contains.
52 using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
53
IsMemoryAllocation(int64_t event_type)54 bool IsMemoryAllocation(int64_t event_type) {
55 return event_type == HostEventType::kMemoryAllocation;
56 }
57
IsMemoryDeallocation(int64_t event_type)58 bool IsMemoryDeallocation(int64_t event_type) {
59 return event_type == HostEventType::kMemoryDeallocation;
60 }
61
UpdateProfileSummary(const MemoryAggregationStats & stats,int64_t time_offset_ps,MemoryProfileSummary * summary)62 void UpdateProfileSummary(const MemoryAggregationStats& stats,
63 int64_t time_offset_ps,
64 MemoryProfileSummary* summary) {
65 // Update the peak memory usage over allocator's lifetime.
66 summary->set_peak_bytes_usage_lifetime(stats.peak_bytes_in_use());
67 MemoryAggregationStats* peak_stats = summary->mutable_peak_stats();
68 // If we reach (or stay at) peak memory usage within the profiling window,
69 // update memory profile summary.
70 if (stats.stack_reserved_bytes() + stats.heap_allocated_bytes() >=
71 peak_stats->peak_bytes_in_use()) {
72 *peak_stats = stats;
73 peak_stats->set_peak_bytes_in_use(stats.stack_reserved_bytes() +
74 stats.heap_allocated_bytes());
75 summary->set_peak_stats_time_ps(time_offset_ps);
76 summary->set_memory_capacity(stats.stack_reserved_bytes() +
77 stats.heap_allocated_bytes() +
78 stats.free_memory_bytes());
79 }
80 }
81
82 // Generate memory profile proto by processing host trace XPlane.
GenerateMemoryProfile(const XPlane * host_trace)83 MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
84 XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
85 MemoryProfile memory_profile;
86 // Iterate over all XEvents in the XPlane, and add the XStats to a new
87 // MemoryProfileSnapshot if the EventType is kMemoryAllocation or
88 // kMemoryDeallocation.
89 plane.ForEachLine([&](const XLineVisitor& line) {
90 line.ForEachEvent([&](const XEventVisitor& event) {
91 int64_t event_type = event.Type().value_or(kUnknownHostEventType);
92 if (!(IsMemoryAllocation(event_type) ||
93 IsMemoryDeallocation(event_type))) {
94 return;
95 }
96
97 MemoryAggregationStats stats;
98 MemoryActivityMetadata metadata;
99 if (IsMemoryAllocation(event_type)) {
100 metadata.set_memory_activity(ALLOCATION);
101 } else if (IsMemoryDeallocation(event_type)) {
102 metadata.set_memory_activity(DEALLOCATION);
103 }
104 metadata.set_step_id(kInvalidStepId);
105
106 std::string memory_id;
107 event.ForEachStat([&](const XStatVisitor& stat) {
108 if (!stat.Type().has_value()) return;
109 switch (stat.Type().value()) {
110 case StatType::kIndexOnHost:
111 case StatType::kDeviceOrdinal:
112 memory_id = absl::StrCat(stat.IntValue());
113 break;
114 case StatType::kAllocatorName:
115 memory_id = std::string(stat.StrOrRefValue());
116 break;
117 case StatType::kBytesReserved:
118 stats.set_stack_reserved_bytes(stat.IntValue());
119 break;
120 case StatType::kBytesAllocated:
121 stats.set_heap_allocated_bytes(stat.IntValue());
122 break;
123 case StatType::kBytesAvailable:
124 stats.set_free_memory_bytes(stat.IntValue());
125 break;
126 case StatType::kFragmentation:
127 stats.set_fragmentation(stat.DoubleValue());
128 break;
129 case StatType::kPeakBytesInUse:
130 stats.set_peak_bytes_in_use(stat.IntValue());
131 break;
132 case StatType::kRequestedBytes:
133 metadata.set_requested_bytes(stat.IntValue());
134 break;
135 case StatType::kAllocationBytes:
136 metadata.set_allocation_bytes(stat.IntValue());
137 break;
138 case StatType::kAddress:
139 metadata.set_address(stat.IntValue());
140 break;
141 case StatType::kTfOp:
142 metadata.set_tf_op_name(std::string(stat.StrOrRefValue()));
143 break;
144 case StatType::kGroupId:
145 metadata.set_step_id(stat.IntValue());
146 break;
147 case StatType::kRegionType:
148 metadata.set_region_type(std::string(stat.StrOrRefValue()));
149 break;
150 case StatType::kDataType:
151 metadata.set_data_type(tensorflow::DataTypeString(
152 static_cast<tensorflow::DataType>(stat.IntValue())));
153 break;
154 case StatType::kTensorShapes:
155 metadata.set_tensor_shape(std::string(stat.StrOrRefValue()));
156 break;
157 }
158 });
159
160 MemoryProfileSummary* summary =
161 (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
162 .mutable_profile_summary();
163 UpdateProfileSummary(stats, event.OffsetPs(), summary);
164
165 MemoryProfileSnapshot* snapshot =
166 (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
167 .add_memory_profile_snapshots();
168 snapshot->set_time_offset_ps(event.OffsetPs());
169 *snapshot->mutable_aggregation_stats() = std::move(stats);
170 *snapshot->mutable_activity_metadata() = std::move(metadata);
171 });
172 });
173 return memory_profile;
174 }
175
176 // Fix invalid step ids of snapshots at the beginning/end of the profile or at
177 // the step boundaries. The snapshots with invalid step ids at the beginning get
178 // 0 for their step ids. Those at the step boundaries or at the end get the
179 // previous snapshot's step id + 1.
UpdateStepId(PerAllocatorMemoryProfile * memory_profile)180 void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) {
181 int64_t last_valid_step_id = -1;
182 // Snapshots are already sorted in time.
183 for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
184 DCHECK(snapshot.has_activity_metadata());
185 if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) {
186 snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1);
187 } else {
188 last_valid_step_id = snapshot.mutable_activity_metadata()->step_id();
189 }
190 }
191 }
192
193 // Update the MemoryActivityMetadata for each deallocation event by copying from
194 // matching allocation.
UpdateDeallocation(PerAllocatorMemoryProfile * memory_profile)195 void UpdateDeallocation(PerAllocatorMemoryProfile* memory_profile) {
196 absl::flat_hash_map<uint64 /*address*/, const MemoryActivityMetadata*>
197 addr_metadata_map;
198 for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
199 // Match the deallocation with previous allocation based on address.
200 uint64 address = snapshot.activity_metadata().address();
201 if (snapshot.activity_metadata().memory_activity() == DEALLOCATION) {
202 if (addr_metadata_map.contains(address)) {
203 const MemoryActivityMetadata* alloc_meta = addr_metadata_map[address];
204 snapshot.mutable_activity_metadata()->set_tf_op_name(
205 alloc_meta->tf_op_name());
206 snapshot.mutable_activity_metadata()->set_region_type(
207 alloc_meta->region_type());
208 snapshot.mutable_activity_metadata()->set_data_type(
209 alloc_meta->data_type());
210 snapshot.mutable_activity_metadata()->set_tensor_shape(
211 alloc_meta->tensor_shape());
212 // In case of following (unexpected) deallocations to the same chunk
213 // address, leave the metadata as it is (empty or already captured).
214 addr_metadata_map.erase(address);
215 } else {
216 VLOG(2)
217 << "Can't find matching memory allocation for this deallocation: "
218 << snapshot.DebugString();
219 }
220 } else if (!addr_metadata_map.contains(address)) { // Allocation.
221 addr_metadata_map[address] = &snapshot.activity_metadata();
222 } else {
223 VLOG(2) << "There are two allocations recorded for the same address: "
224 << address
225 << ". The later allocation event is: " << snapshot.DebugString();
226 }
227 }
228 VLOG(2) << "Number of allocations that cannot find matching dealloctions: "
229 << addr_metadata_map.size();
230 }
231
232 // Return the step id for the peak memory usage data point.
GetPeakMemoryStep(int64_t peak_bytes_profile,const PerAllocatorMemoryProfile * memory_profile)233 int64 GetPeakMemoryStep(int64_t peak_bytes_profile,
234 const PerAllocatorMemoryProfile* memory_profile) {
235 int64_t peak_bytes_profile_step_id = 0;
236 for (const auto& snapshot : memory_profile->memory_profile_snapshots()) {
237 // Get the step id of the peak memory usage.
238 if (peak_bytes_profile ==
239 snapshot.aggregation_stats().heap_allocated_bytes() +
240 snapshot.aggregation_stats().stack_reserved_bytes()) {
241 DCHECK(snapshot.has_activity_metadata());
242 peak_bytes_profile_step_id = snapshot.activity_metadata().step_id();
243 }
244 }
245 return peak_bytes_profile_step_id;
246 }
247
248 // Functor that compares (index, metadata) pair to sort in the order of
249 // allocation bytes and requested bytes (descending), as well as TF Op name,
250 // region type, data type, and tensor shape (ascending).
251 struct MetadataComparator {
operator ()tensorflow::profiler::__anon1a1cbe010111::MetadataComparator252 bool operator()(const IndexMetaPair& a, const IndexMetaPair& b) const {
253 const MemoryActivityMetadata* a_meta = a.second;
254 const MemoryActivityMetadata* b_meta = b.second;
255 DCHECK_NE(a_meta, nullptr);
256 DCHECK_NE(b_meta, nullptr);
257
258 auto lhs =
259 std::make_tuple(-a_meta->allocation_bytes(), -a_meta->requested_bytes(),
260 a_meta->tf_op_name(), a_meta->region_type(),
261 a_meta->data_type(), a_meta->tensor_shape());
262 auto rhs =
263 std::make_tuple(-b_meta->allocation_bytes(), -b_meta->requested_bytes(),
264 b_meta->tf_op_name(), b_meta->region_type(),
265 b_meta->data_type(), b_meta->tensor_shape());
266 return lhs < rhs;
267 }
268 };
269
270 // If applicable, add items into active_allocs vector and special_allocations
271 // proto for the unmapped memory usage (in heap) and stack reservation at peak.
InsertSpecialAllocations(int64_t unmapped_allocation_bytes,int64_t step_id,PerAllocatorMemoryProfile * memory_profile,std::vector<IndexMetaPair> * active_allocs)272 void InsertSpecialAllocations(int64_t unmapped_allocation_bytes,
273 int64_t step_id,
274 PerAllocatorMemoryProfile* memory_profile,
275 std::vector<IndexMetaPair>* active_allocs) {
276 int index = 0;
277 if (unmapped_allocation_bytes > 0) {
278 MemoryActivityMetadata* special_allocation =
279 memory_profile->add_special_allocations();
280 special_allocation->set_memory_activity(ALLOCATION);
281 special_allocation->set_requested_bytes(unmapped_allocation_bytes);
282 special_allocation->set_allocation_bytes(unmapped_allocation_bytes);
283 special_allocation->set_address(0);
284 special_allocation->set_tf_op_name("unused preallocated device memory");
285 special_allocation->set_step_id(step_id);
286 special_allocation->set_region_type("persist/dynamic");
287 special_allocation->set_data_type(
288 tensorflow::DataTypeString(static_cast<tensorflow::DataType>(0)));
289 special_allocation->set_tensor_shape("unknown");
290 active_allocs->push_back({--index, special_allocation});
291 }
292 int64_t stack_bytes =
293 memory_profile->profile_summary().peak_stats().stack_reserved_bytes();
294 if (stack_bytes > 0) {
295 MemoryActivityMetadata* special_allocation =
296 memory_profile->add_special_allocations();
297 special_allocation->set_memory_activity(ALLOCATION);
298 special_allocation->set_requested_bytes(stack_bytes);
299 special_allocation->set_allocation_bytes(stack_bytes);
300 special_allocation->set_address(0);
301 special_allocation->set_tf_op_name("stack");
302 special_allocation->set_step_id(step_id);
303 special_allocation->set_region_type("stack");
304 special_allocation->set_data_type(
305 tensorflow::DataTypeString(static_cast<tensorflow::DataType>(0)));
306 special_allocation->set_tensor_shape("unknown");
307 active_allocs->push_back({--index, special_allocation});
308 }
309 }
310
operator ==(const IndexMetaPair & a,const IndexMetaPair & b)311 bool operator==(const IndexMetaPair& a, const IndexMetaPair& b) {
312 const MemoryActivityMetadata* a_meta = a.second;
313 const MemoryActivityMetadata* b_meta = b.second;
314 return a_meta->allocation_bytes() == b_meta->allocation_bytes() &&
315 a_meta->requested_bytes() == b_meta->requested_bytes() &&
316 a_meta->tf_op_name() == b_meta->tf_op_name() &&
317 a_meta->region_type() == b_meta->region_type() &&
318 a_meta->data_type() == b_meta->data_type() &&
319 a_meta->tensor_shape() == b_meta->tensor_shape();
320 }
321
322 // Generate the memory breakdown table of active allocations at the peak usage
323 // (within profiling window) and fill each ActiveAllocation proto (i.e. a row).
ProcessActiveAllocations(int64_t peak_bytes_profile_step_id,PerAllocatorMemoryProfile * memory_profile)324 void ProcessActiveAllocations(int64_t peak_bytes_profile_step_id,
325 PerAllocatorMemoryProfile* memory_profile) {
326 int64_t unmapped_allocation_bytes =
327 memory_profile->profile_summary().peak_stats().heap_allocated_bytes();
328 int64_t unmapped_deallocation_bytes = 0;
329 absl::flat_hash_map<int64 /*address*/, IndexMetaPair> active_alloc_map;
330 // Only account for the memory activities in the step that includes peak
331 // memory usage.
332 for (int i = 0; i < memory_profile->memory_profile_snapshots_size(); i++) {
333 const auto& snapshot = memory_profile->memory_profile_snapshots().at(i);
334 DCHECK(snapshot.has_activity_metadata());
335 const MemoryActivityMetadata& metadata = snapshot.activity_metadata();
336 if (snapshot.time_offset_ps() >
337 memory_profile->profile_summary().peak_stats_time_ps())
338 break;
339 if (metadata.step_id() != peak_bytes_profile_step_id) continue;
340
341 if (metadata.memory_activity() == ALLOCATION) {
342 active_alloc_map[metadata.address()] = {i, &metadata};
343 unmapped_allocation_bytes -= metadata.allocation_bytes();
344 } else {
345 DCHECK_EQ(metadata.memory_activity(), DEALLOCATION);
346 if (active_alloc_map.contains(metadata.address())) {
347 active_alloc_map.erase(metadata.address());
348 } else {
349 unmapped_deallocation_bytes += metadata.allocation_bytes();
350 }
351 unmapped_allocation_bytes += metadata.allocation_bytes();
352 }
353 }
354 // This separates the persistent memory from the freed memory from last step's
355 // allocations.
356 unmapped_allocation_bytes -= unmapped_deallocation_bytes;
357
358 VLOG(2) << "unmapped_allocation_bytes=" << unmapped_allocation_bytes
359 << ", unmapped_deallocation_bytes=" << unmapped_deallocation_bytes;
360
361 // Using pair of (index, MemoryActivityMetadata*) so that we can sort by the
362 // metadata, and fetch metadata by indexing the time-sorted snapshots at
363 // frontend.
364 std::vector<IndexMetaPair> active_allocs;
365 for (const auto& address_and_index_meta : active_alloc_map) {
366 active_allocs.push_back(address_and_index_meta.second);
367 }
368
369 InsertSpecialAllocations(unmapped_allocation_bytes,
370 peak_bytes_profile_step_id, memory_profile,
371 &active_allocs);
372
373 std::sort(active_allocs.begin(), active_allocs.end(), MetadataComparator());
374
375 // Fill the sorted active_allocations proto messages at peak memory usage.
376 // Merge identical allocations and show occurrences.
377 for (int i = 0, end = active_allocs.size(); i < end; i++) {
378 ActiveAllocation* allocation = memory_profile->add_active_allocations();
379 allocation->set_snapshot_index(active_allocs[i].first);
380 if (active_allocs[i].first < 0) {
381 allocation->set_special_index(-active_allocs[i].first - 1);
382 } else {
383 allocation->set_special_index(-1);
384 }
385 allocation->set_num_occurrences(1);
386 const int last_alloc = active_allocs.size() - 1;
387 while (i < last_alloc && active_allocs[i] == active_allocs[i + 1]) {
388 allocation->set_num_occurrences(allocation->num_occurrences() + 1);
389 i++;
390 }
391 }
392
393 VLOG(2) << "Distinctive active allocation count="
394 << memory_profile->active_allocations_size();
395 }
396
397 struct Sample {
398 int64 orig_index; // original index to the snapshot.
399 MemoryProfileSnapshot* snapshot;
400 };
401
402 // This function samples max_num_snapshots from snapshots. We first keep the
403 // snapshots referenced by active_allocations in the samples. After this, if
404 // there is still room for more samples, we pick more from snapshots into the
405 // samples. Then, we sort the samples in time (so that they can be correctly
406 // displayed on the timeline). Finally, we need to adjust the original indices
407 // (to snapshots) in active_allocations to the new indices in the samples.
SampleSnapshots(int64_t max_num_snapshots,protobuf::RepeatedPtrField<MemoryProfileSnapshot> * snapshots,protobuf::RepeatedPtrField<ActiveAllocation> * active_allocations)408 void SampleSnapshots(
409 int64_t max_num_snapshots,
410 protobuf::RepeatedPtrField<MemoryProfileSnapshot>* snapshots,
411 protobuf::RepeatedPtrField<ActiveAllocation>* active_allocations) {
412 if (snapshots->size() <= max_num_snapshots) return;
413
414 std::vector<Sample> samples;
415
416 // First, puts the snapshots referenced by active_allocations in samples[].
417 absl::flat_hash_set<int64> allocation_snapshot_indices;
418 for (const auto& allocation : *active_allocations) {
419 auto orig_index = allocation.snapshot_index();
420 if (orig_index < 0) continue;
421 allocation_snapshot_indices.insert(orig_index);
422 samples.push_back({orig_index, &(*snapshots)[orig_index]});
423 if (allocation_snapshot_indices.size() >= max_num_snapshots) break;
424 }
425
426 // Second, extracts remaining samples from snapshots.
427 int64_t num_samples_remained =
428 max_num_snapshots - allocation_snapshot_indices.size();
429 if (num_samples_remained > 0) {
430 std::vector<Sample> remaining;
431 for (int64_t i = 0; i < snapshots->size(); i++) {
432 if (allocation_snapshot_indices.contains(i)) continue;
433 // snapshots[i] is not yet sampled; put it in remaining[] for further
434 // consideration.
435 remaining.push_back({i, &(*snapshots)[i]});
436 }
437 // Moves the num_samples_remained snapshots with least free bytes to the
438 // beginning of remaining[].
439 absl::c_partial_sort(
440 remaining, remaining.begin() + num_samples_remained,
441 [](const Sample& a, const Sample& b) {
442 return a.snapshot->aggregation_stats().free_memory_bytes() <
443 b.snapshot->aggregation_stats().free_memory_bytes();
444 });
445 // Copies the first num_samples_remained in remaining[] to samples[].
446 for (int64_t i = 0; i < num_samples_remained; i++)
447 samples.push_back(remaining[i]);
448 }
449
450 // Third, sorts samples[] in ascending order of time_offset_ps.
451 absl::c_sort(samples, [](const Sample& a, const Sample& b) {
452 return a.snapshot->time_offset_ps() < b.snapshot->time_offset_ps();
453 });
454
455 // Fourth, constructs a map from the original snapshot index to samples index.
456 absl::flat_hash_map</*original=*/int64, /*new=*/int64> index_map;
457 for (int64_t i = 0; i < samples.size(); i++) {
458 index_map[samples[i].orig_index] = i;
459 }
460
461 // Fifth, changes the original snapshot indices in active_allocations to the
462 // sample indices.
463 for (auto& allocation : *active_allocations) {
464 auto orig_index = allocation.snapshot_index();
465 if (orig_index < 0) continue;
466 auto new_index = gtl::FindWithDefault(index_map, orig_index, -1);
467 allocation.set_snapshot_index(new_index);
468 }
469
470 // Sixth, replaces *snapshot by samples[]
471 protobuf::RepeatedPtrField<MemoryProfileSnapshot> new_snapshots;
472 new_snapshots.Reserve(samples.size());
473 for (const auto& sample : samples) {
474 *new_snapshots.Add() = std::move(*sample.snapshot);
475 }
476 *snapshots = std::move(new_snapshots);
477 }
478
479 // Post-process the memory profile to correctly update proto fields, and break
480 // down peak memory usage for each allocator.
ProcessMemoryProfileProto(int64_t max_num_snapshots,MemoryProfile * memory_profile)481 void ProcessMemoryProfileProto(int64_t max_num_snapshots,
482 MemoryProfile* memory_profile) {
483 memory_profile->set_num_hosts(1);
484 // Add sorted memory ids within memory profile data to the selection list.
485 for (const auto& id_and_allocator_profile :
486 memory_profile->memory_profile_per_allocator()) {
487 if (!id_and_allocator_profile.second.memory_profile_snapshots().empty()) {
488 memory_profile->add_memory_ids(id_and_allocator_profile.first);
489 }
490 }
491 absl::c_sort(*memory_profile->mutable_memory_ids());
492
493 for (auto& id_and_allocator_profile :
494 *memory_profile->mutable_memory_profile_per_allocator()) {
495 PerAllocatorMemoryProfile* allocator_memory_profile =
496 &id_and_allocator_profile.second;
497 protobuf::RepeatedPtrField<MemoryProfileSnapshot>* snapshots =
498 allocator_memory_profile->mutable_memory_profile_snapshots();
499 // Sort the memory_profile_snapshots by time_offset_ps (ascending) in proto.
500 absl::c_sort(*snapshots, [](const MemoryProfileSnapshot& a,
501 const MemoryProfileSnapshot& b) {
502 return a.time_offset_ps() < b.time_offset_ps();
503 });
504
505 UpdateStepId(allocator_memory_profile);
506 UpdateDeallocation(allocator_memory_profile);
507
508 int64_t peak_step_id =
509 GetPeakMemoryStep(allocator_memory_profile->profile_summary()
510 .peak_stats()
511 .peak_bytes_in_use(),
512 allocator_memory_profile);
513 ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
514 SampleSnapshots(max_num_snapshots, snapshots,
515 allocator_memory_profile->mutable_active_allocations());
516 }
517 }
518
519 template <typename Proto>
ConvertProtoToJson(const Proto & proto_output,std::string * json_output)520 Status ConvertProtoToJson(const Proto& proto_output, std::string* json_output) {
521 protobuf::util::JsonPrintOptions json_options;
522 json_options.always_print_primitive_fields = true;
523 auto status = protobuf::util::MessageToJsonString(proto_output, json_output,
524 json_options);
525 if (!status.ok()) {
526 // Convert error_msg google::protobuf::StringPiece (or absl::string_view) to
527 // tensorflow::StringPiece.
528 auto error_msg = status.message();
529 return errors::Internal(
530 "Could not convert proto to JSON string: ",
531 absl::string_view(error_msg.data(), error_msg.length()));
532 }
533 return Status::OK();
534 }
535
536 } // namespace
537
ConvertXPlaneToMemoryProfile(const XPlane & host_plane,int64_t max_num_snapshots)538 MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane,
539 int64_t max_num_snapshots) {
540 MemoryProfile memory_profile = GenerateMemoryProfile(&host_plane);
541 ProcessMemoryProfileProto(max_num_snapshots, &memory_profile);
542 return memory_profile;
543 }
544
ConvertXSpaceToMemoryProfileJson(const XSpace & xspace,std::string * json_output)545 Status ConvertXSpaceToMemoryProfileJson(const XSpace& xspace,
546 std::string* json_output) {
547 if (const XPlane* host_plane =
548 FindPlaneWithName(xspace, kHostThreadsPlaneName)) {
549 MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
550 TF_RETURN_IF_ERROR(ConvertProtoToJson(memory_profile, json_output));
551 }
552 return Status::OK();
553 }
554
555 } // namespace profiler
556 } // namespace tensorflow
557