• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
17 
18 #include <algorithm>
19 #include <string>
20 #include <tuple>
21 #include <type_traits>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/algorithm/container.h"
26 #include "absl/container/flat_hash_map.h"
27 #include "absl/container/flat_hash_set.h"
28 #include "absl/strings/str_cat.h"
29 #include "absl/strings/string_view.h"
30 #include "absl/types/optional.h"
31 #include "tensorflow/core/framework/types.h"
32 #include "tensorflow/core/framework/types.pb.h"
33 #include "tensorflow/core/lib/gtl/map_util.h"
34 #include "tensorflow/core/platform/logging.h"
35 #include "tensorflow/core/platform/protobuf.h"
36 #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
37 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
38 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
39 #include "tensorflow/core/profiler/utils/xplane_schema.h"
40 #include "tensorflow/core/profiler/utils/xplane_utils.h"
41 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
42 
43 namespace tensorflow {
44 namespace profiler {
45 
46 namespace {
47 
48 constexpr int64_t kInvalidStepId = -1;
49 
50 // Index of the time-sorted memory_profile_snapshots list, and the
51 // MemoryActivityMetadata proto it contains.
52 using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
53 
IsMemoryAllocation(int64_t event_type)54 bool IsMemoryAllocation(int64_t event_type) {
55   return event_type == HostEventType::kMemoryAllocation;
56 }
57 
IsMemoryDeallocation(int64_t event_type)58 bool IsMemoryDeallocation(int64_t event_type) {
59   return event_type == HostEventType::kMemoryDeallocation;
60 }
61 
UpdateProfileSummary(const MemoryAggregationStats & stats,int64_t time_offset_ps,MemoryProfileSummary * summary)62 void UpdateProfileSummary(const MemoryAggregationStats& stats,
63                           int64_t time_offset_ps,
64                           MemoryProfileSummary* summary) {
65   // Update the peak memory usage over allocator's lifetime.
66   summary->set_peak_bytes_usage_lifetime(stats.peak_bytes_in_use());
67   MemoryAggregationStats* peak_stats = summary->mutable_peak_stats();
68   // If we reach (or stay at) peak memory usage within the profiling window,
69   // update memory profile summary.
70   if (stats.stack_reserved_bytes() + stats.heap_allocated_bytes() >=
71       peak_stats->peak_bytes_in_use()) {
72     *peak_stats = stats;
73     peak_stats->set_peak_bytes_in_use(stats.stack_reserved_bytes() +
74                                       stats.heap_allocated_bytes());
75     summary->set_peak_stats_time_ps(time_offset_ps);
76     summary->set_memory_capacity(stats.stack_reserved_bytes() +
77                                  stats.heap_allocated_bytes() +
78                                  stats.free_memory_bytes());
79   }
80 }
81 
82 // Generate memory profile proto by processing host trace XPlane.
GenerateMemoryProfile(const XPlane * host_trace)83 MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
84   XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
85   MemoryProfile memory_profile;
86   // Iterate over all XEvents in the XPlane, and add the XStats to a new
87   // MemoryProfileSnapshot if the EventType is kMemoryAllocation or
88   // kMemoryDeallocation.
89   plane.ForEachLine([&](const XLineVisitor& line) {
90     line.ForEachEvent([&](const XEventVisitor& event) {
91       int64_t event_type = event.Type().value_or(kUnknownHostEventType);
92       if (!(IsMemoryAllocation(event_type) ||
93             IsMemoryDeallocation(event_type))) {
94         return;
95       }
96 
97       MemoryAggregationStats stats;
98       MemoryActivityMetadata metadata;
99       if (IsMemoryAllocation(event_type)) {
100         metadata.set_memory_activity(ALLOCATION);
101       } else if (IsMemoryDeallocation(event_type)) {
102         metadata.set_memory_activity(DEALLOCATION);
103       }
104       metadata.set_step_id(kInvalidStepId);
105 
106       std::string memory_id;
107       event.ForEachStat([&](const XStatVisitor& stat) {
108         if (!stat.Type().has_value()) return;
109         switch (stat.Type().value()) {
110           case StatType::kIndexOnHost:
111           case StatType::kDeviceOrdinal:
112             memory_id = absl::StrCat(stat.IntValue());
113             break;
114           case StatType::kAllocatorName:
115             memory_id = std::string(stat.StrOrRefValue());
116             break;
117           case StatType::kBytesReserved:
118             stats.set_stack_reserved_bytes(stat.IntValue());
119             break;
120           case StatType::kBytesAllocated:
121             stats.set_heap_allocated_bytes(stat.IntValue());
122             break;
123           case StatType::kBytesAvailable:
124             stats.set_free_memory_bytes(stat.IntValue());
125             break;
126           case StatType::kFragmentation:
127             stats.set_fragmentation(stat.DoubleValue());
128             break;
129           case StatType::kPeakBytesInUse:
130             stats.set_peak_bytes_in_use(stat.IntValue());
131             break;
132           case StatType::kRequestedBytes:
133             metadata.set_requested_bytes(stat.IntValue());
134             break;
135           case StatType::kAllocationBytes:
136             metadata.set_allocation_bytes(stat.IntValue());
137             break;
138           case StatType::kAddress:
139             metadata.set_address(stat.IntValue());
140             break;
141           case StatType::kTfOp:
142             metadata.set_tf_op_name(std::string(stat.StrOrRefValue()));
143             break;
144           case StatType::kGroupId:
145             metadata.set_step_id(stat.IntValue());
146             break;
147           case StatType::kRegionType:
148             metadata.set_region_type(std::string(stat.StrOrRefValue()));
149             break;
150           case StatType::kDataType:
151             metadata.set_data_type(tensorflow::DataTypeString(
152                 static_cast<tensorflow::DataType>(stat.IntValue())));
153             break;
154           case StatType::kTensorShapes:
155             metadata.set_tensor_shape(std::string(stat.StrOrRefValue()));
156             break;
157         }
158       });
159 
160       MemoryProfileSummary* summary =
161           (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
162               .mutable_profile_summary();
163       UpdateProfileSummary(stats, event.OffsetPs(), summary);
164 
165       MemoryProfileSnapshot* snapshot =
166           (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
167               .add_memory_profile_snapshots();
168       snapshot->set_time_offset_ps(event.OffsetPs());
169       *snapshot->mutable_aggregation_stats() = std::move(stats);
170       *snapshot->mutable_activity_metadata() = std::move(metadata);
171     });
172   });
173   return memory_profile;
174 }
175 
176 // Fix invalid step ids of snapshots at the beginning/end of the profile or at
177 // the step boundaries. The snapshots with invalid step ids at the beginning get
178 // 0 for their step ids. Those at the step boundaries or at the end get the
179 // previous snapshot's step id + 1.
UpdateStepId(PerAllocatorMemoryProfile * memory_profile)180 void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) {
181   int64_t last_valid_step_id = -1;
182   // Snapshots are already sorted in time.
183   for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
184     DCHECK(snapshot.has_activity_metadata());
185     if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) {
186       snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1);
187     } else {
188       last_valid_step_id = snapshot.mutable_activity_metadata()->step_id();
189     }
190   }
191 }
192 
193 // Update the MemoryActivityMetadata for each deallocation event by copying from
194 // matching allocation.
UpdateDeallocation(PerAllocatorMemoryProfile * memory_profile)195 void UpdateDeallocation(PerAllocatorMemoryProfile* memory_profile) {
196   absl::flat_hash_map<uint64 /*address*/, const MemoryActivityMetadata*>
197       addr_metadata_map;
198   for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
199     // Match the deallocation with previous allocation based on address.
200     uint64 address = snapshot.activity_metadata().address();
201     if (snapshot.activity_metadata().memory_activity() == DEALLOCATION) {
202       if (addr_metadata_map.contains(address)) {
203         const MemoryActivityMetadata* alloc_meta = addr_metadata_map[address];
204         snapshot.mutable_activity_metadata()->set_tf_op_name(
205             alloc_meta->tf_op_name());
206         snapshot.mutable_activity_metadata()->set_region_type(
207             alloc_meta->region_type());
208         snapshot.mutable_activity_metadata()->set_data_type(
209             alloc_meta->data_type());
210         snapshot.mutable_activity_metadata()->set_tensor_shape(
211             alloc_meta->tensor_shape());
212         // In case of following (unexpected) deallocations to the same chunk
213         // address, leave the metadata as it is (empty or already captured).
214         addr_metadata_map.erase(address);
215       } else {
216         VLOG(2)
217             << "Can't find matching memory allocation for this deallocation: "
218             << snapshot.DebugString();
219       }
220     } else if (!addr_metadata_map.contains(address)) {  // Allocation.
221       addr_metadata_map[address] = &snapshot.activity_metadata();
222     } else {
223       VLOG(2) << "There are two allocations recorded for the same address: "
224               << address
225               << ". The later allocation event is: " << snapshot.DebugString();
226     }
227   }
228   VLOG(2) << "Number of allocations that cannot find matching dealloctions: "
229           << addr_metadata_map.size();
230 }
231 
232 // Return the step id for the peak memory usage data point.
GetPeakMemoryStep(int64_t peak_bytes_profile,const PerAllocatorMemoryProfile * memory_profile)233 int64 GetPeakMemoryStep(int64_t peak_bytes_profile,
234                         const PerAllocatorMemoryProfile* memory_profile) {
235   int64_t peak_bytes_profile_step_id = 0;
236   for (const auto& snapshot : memory_profile->memory_profile_snapshots()) {
237     // Get the step id of the peak memory usage.
238     if (peak_bytes_profile ==
239         snapshot.aggregation_stats().heap_allocated_bytes() +
240             snapshot.aggregation_stats().stack_reserved_bytes()) {
241       DCHECK(snapshot.has_activity_metadata());
242       peak_bytes_profile_step_id = snapshot.activity_metadata().step_id();
243     }
244   }
245   return peak_bytes_profile_step_id;
246 }
247 
248 // Functor that compares (index, metadata) pair to sort in the order of
249 // allocation bytes and requested bytes (descending), as well as TF Op name,
250 // region type, data type, and tensor shape (ascending).
251 struct MetadataComparator {
operator ()tensorflow::profiler::__anon1a1cbe010111::MetadataComparator252   bool operator()(const IndexMetaPair& a, const IndexMetaPair& b) const {
253     const MemoryActivityMetadata* a_meta = a.second;
254     const MemoryActivityMetadata* b_meta = b.second;
255     DCHECK_NE(a_meta, nullptr);
256     DCHECK_NE(b_meta, nullptr);
257 
258     auto lhs =
259         std::make_tuple(-a_meta->allocation_bytes(), -a_meta->requested_bytes(),
260                         a_meta->tf_op_name(), a_meta->region_type(),
261                         a_meta->data_type(), a_meta->tensor_shape());
262     auto rhs =
263         std::make_tuple(-b_meta->allocation_bytes(), -b_meta->requested_bytes(),
264                         b_meta->tf_op_name(), b_meta->region_type(),
265                         b_meta->data_type(), b_meta->tensor_shape());
266     return lhs < rhs;
267   }
268 };
269 
270 // If applicable, add items into active_allocs vector and special_allocations
271 // proto for the unmapped memory usage (in heap) and stack reservation at peak.
InsertSpecialAllocations(int64_t unmapped_allocation_bytes,int64_t step_id,PerAllocatorMemoryProfile * memory_profile,std::vector<IndexMetaPair> * active_allocs)272 void InsertSpecialAllocations(int64_t unmapped_allocation_bytes,
273                               int64_t step_id,
274                               PerAllocatorMemoryProfile* memory_profile,
275                               std::vector<IndexMetaPair>* active_allocs) {
276   int index = 0;
277   if (unmapped_allocation_bytes > 0) {
278     MemoryActivityMetadata* special_allocation =
279         memory_profile->add_special_allocations();
280     special_allocation->set_memory_activity(ALLOCATION);
281     special_allocation->set_requested_bytes(unmapped_allocation_bytes);
282     special_allocation->set_allocation_bytes(unmapped_allocation_bytes);
283     special_allocation->set_address(0);
284     special_allocation->set_tf_op_name("unused preallocated device memory");
285     special_allocation->set_step_id(step_id);
286     special_allocation->set_region_type("persist/dynamic");
287     special_allocation->set_data_type(
288         tensorflow::DataTypeString(static_cast<tensorflow::DataType>(0)));
289     special_allocation->set_tensor_shape("unknown");
290     active_allocs->push_back({--index, special_allocation});
291   }
292   int64_t stack_bytes =
293       memory_profile->profile_summary().peak_stats().stack_reserved_bytes();
294   if (stack_bytes > 0) {
295     MemoryActivityMetadata* special_allocation =
296         memory_profile->add_special_allocations();
297     special_allocation->set_memory_activity(ALLOCATION);
298     special_allocation->set_requested_bytes(stack_bytes);
299     special_allocation->set_allocation_bytes(stack_bytes);
300     special_allocation->set_address(0);
301     special_allocation->set_tf_op_name("stack");
302     special_allocation->set_step_id(step_id);
303     special_allocation->set_region_type("stack");
304     special_allocation->set_data_type(
305         tensorflow::DataTypeString(static_cast<tensorflow::DataType>(0)));
306     special_allocation->set_tensor_shape("unknown");
307     active_allocs->push_back({--index, special_allocation});
308   }
309 }
310 
operator ==(const IndexMetaPair & a,const IndexMetaPair & b)311 bool operator==(const IndexMetaPair& a, const IndexMetaPair& b) {
312   const MemoryActivityMetadata* a_meta = a.second;
313   const MemoryActivityMetadata* b_meta = b.second;
314   return a_meta->allocation_bytes() == b_meta->allocation_bytes() &&
315          a_meta->requested_bytes() == b_meta->requested_bytes() &&
316          a_meta->tf_op_name() == b_meta->tf_op_name() &&
317          a_meta->region_type() == b_meta->region_type() &&
318          a_meta->data_type() == b_meta->data_type() &&
319          a_meta->tensor_shape() == b_meta->tensor_shape();
320 }
321 
322 // Generate the memory breakdown table of active allocations at the peak usage
323 // (within profiling window) and fill each ActiveAllocation proto (i.e. a row).
ProcessActiveAllocations(int64_t peak_bytes_profile_step_id,PerAllocatorMemoryProfile * memory_profile)324 void ProcessActiveAllocations(int64_t peak_bytes_profile_step_id,
325                               PerAllocatorMemoryProfile* memory_profile) {
326   int64_t unmapped_allocation_bytes =
327       memory_profile->profile_summary().peak_stats().heap_allocated_bytes();
328   int64_t unmapped_deallocation_bytes = 0;
329   absl::flat_hash_map<int64 /*address*/, IndexMetaPair> active_alloc_map;
330   // Only account for the memory activities in the step that includes peak
331   // memory usage.
332   for (int i = 0; i < memory_profile->memory_profile_snapshots_size(); i++) {
333     const auto& snapshot = memory_profile->memory_profile_snapshots().at(i);
334     DCHECK(snapshot.has_activity_metadata());
335     const MemoryActivityMetadata& metadata = snapshot.activity_metadata();
336     if (snapshot.time_offset_ps() >
337         memory_profile->profile_summary().peak_stats_time_ps())
338       break;
339     if (metadata.step_id() != peak_bytes_profile_step_id) continue;
340 
341     if (metadata.memory_activity() == ALLOCATION) {
342       active_alloc_map[metadata.address()] = {i, &metadata};
343       unmapped_allocation_bytes -= metadata.allocation_bytes();
344     } else {
345       DCHECK_EQ(metadata.memory_activity(), DEALLOCATION);
346       if (active_alloc_map.contains(metadata.address())) {
347         active_alloc_map.erase(metadata.address());
348       } else {
349         unmapped_deallocation_bytes += metadata.allocation_bytes();
350       }
351       unmapped_allocation_bytes += metadata.allocation_bytes();
352     }
353   }
354   // This separates the persistent memory from the freed memory from last step's
355   // allocations.
356   unmapped_allocation_bytes -= unmapped_deallocation_bytes;
357 
358   VLOG(2) << "unmapped_allocation_bytes=" << unmapped_allocation_bytes
359           << ", unmapped_deallocation_bytes=" << unmapped_deallocation_bytes;
360 
361   // Using pair of (index, MemoryActivityMetadata*) so that we can sort by the
362   // metadata, and fetch metadata by indexing the time-sorted snapshots at
363   // frontend.
364   std::vector<IndexMetaPair> active_allocs;
365   for (const auto& address_and_index_meta : active_alloc_map) {
366     active_allocs.push_back(address_and_index_meta.second);
367   }
368 
369   InsertSpecialAllocations(unmapped_allocation_bytes,
370                            peak_bytes_profile_step_id, memory_profile,
371                            &active_allocs);
372 
373   std::sort(active_allocs.begin(), active_allocs.end(), MetadataComparator());
374 
375   // Fill the sorted active_allocations proto messages at peak memory usage.
376   // Merge identical allocations and show occurrences.
377   for (int i = 0, end = active_allocs.size(); i < end; i++) {
378     ActiveAllocation* allocation = memory_profile->add_active_allocations();
379     allocation->set_snapshot_index(active_allocs[i].first);
380     if (active_allocs[i].first < 0) {
381       allocation->set_special_index(-active_allocs[i].first - 1);
382     } else {
383       allocation->set_special_index(-1);
384     }
385     allocation->set_num_occurrences(1);
386     const int last_alloc = active_allocs.size() - 1;
387     while (i < last_alloc && active_allocs[i] == active_allocs[i + 1]) {
388       allocation->set_num_occurrences(allocation->num_occurrences() + 1);
389       i++;
390     }
391   }
392 
393   VLOG(2) << "Distinctive active allocation count="
394           << memory_profile->active_allocations_size();
395 }
396 
397 struct Sample {
398   int64 orig_index;  // original index to the snapshot.
399   MemoryProfileSnapshot* snapshot;
400 };
401 
402 // This function samples max_num_snapshots from snapshots. We first keep the
403 // snapshots referenced by active_allocations in the samples. After this, if
404 // there is still room for more samples, we pick more from snapshots into the
405 // samples. Then, we sort the samples in time (so that they can be correctly
406 // displayed on the timeline). Finally, we need to adjust the original indices
407 // (to snapshots) in active_allocations to the new indices in the samples.
SampleSnapshots(int64_t max_num_snapshots,protobuf::RepeatedPtrField<MemoryProfileSnapshot> * snapshots,protobuf::RepeatedPtrField<ActiveAllocation> * active_allocations)408 void SampleSnapshots(
409     int64_t max_num_snapshots,
410     protobuf::RepeatedPtrField<MemoryProfileSnapshot>* snapshots,
411     protobuf::RepeatedPtrField<ActiveAllocation>* active_allocations) {
412   if (snapshots->size() <= max_num_snapshots) return;
413 
414   std::vector<Sample> samples;
415 
416   // First, puts the snapshots referenced by active_allocations in samples[].
417   absl::flat_hash_set<int64> allocation_snapshot_indices;
418   for (const auto& allocation : *active_allocations) {
419     auto orig_index = allocation.snapshot_index();
420     if (orig_index < 0) continue;
421     allocation_snapshot_indices.insert(orig_index);
422     samples.push_back({orig_index, &(*snapshots)[orig_index]});
423     if (allocation_snapshot_indices.size() >= max_num_snapshots) break;
424   }
425 
426   // Second, extracts remaining samples from snapshots.
427   int64_t num_samples_remained =
428       max_num_snapshots - allocation_snapshot_indices.size();
429   if (num_samples_remained > 0) {
430     std::vector<Sample> remaining;
431     for (int64_t i = 0; i < snapshots->size(); i++) {
432       if (allocation_snapshot_indices.contains(i)) continue;
433       // snapshots[i] is not yet sampled; put it in remaining[] for further
434       // consideration.
435       remaining.push_back({i, &(*snapshots)[i]});
436     }
437     // Moves the num_samples_remained snapshots with least free bytes to the
438     // beginning of remaining[].
439     absl::c_partial_sort(
440         remaining, remaining.begin() + num_samples_remained,
441         [](const Sample& a, const Sample& b) {
442           return a.snapshot->aggregation_stats().free_memory_bytes() <
443                  b.snapshot->aggregation_stats().free_memory_bytes();
444         });
445     // Copies the first num_samples_remained in remaining[] to samples[].
446     for (int64_t i = 0; i < num_samples_remained; i++)
447       samples.push_back(remaining[i]);
448   }
449 
450   // Third, sorts samples[] in ascending order of time_offset_ps.
451   absl::c_sort(samples, [](const Sample& a, const Sample& b) {
452     return a.snapshot->time_offset_ps() < b.snapshot->time_offset_ps();
453   });
454 
455   // Fourth, constructs a map from the original snapshot index to samples index.
456   absl::flat_hash_map</*original=*/int64, /*new=*/int64> index_map;
457   for (int64_t i = 0; i < samples.size(); i++) {
458     index_map[samples[i].orig_index] = i;
459   }
460 
461   // Fifth, changes the original snapshot indices in active_allocations to the
462   // sample indices.
463   for (auto& allocation : *active_allocations) {
464     auto orig_index = allocation.snapshot_index();
465     if (orig_index < 0) continue;
466     auto new_index = gtl::FindWithDefault(index_map, orig_index, -1);
467     allocation.set_snapshot_index(new_index);
468   }
469 
470   // Sixth, replaces *snapshot by samples[]
471   protobuf::RepeatedPtrField<MemoryProfileSnapshot> new_snapshots;
472   new_snapshots.Reserve(samples.size());
473   for (const auto& sample : samples) {
474     *new_snapshots.Add() = std::move(*sample.snapshot);
475   }
476   *snapshots = std::move(new_snapshots);
477 }
478 
479 // Post-process the memory profile to correctly update proto fields, and break
480 // down peak memory usage for each allocator.
ProcessMemoryProfileProto(int64_t max_num_snapshots,MemoryProfile * memory_profile)481 void ProcessMemoryProfileProto(int64_t max_num_snapshots,
482                                MemoryProfile* memory_profile) {
483   memory_profile->set_num_hosts(1);
484   // Add sorted memory ids within memory profile data to the selection list.
485   for (const auto& id_and_allocator_profile :
486        memory_profile->memory_profile_per_allocator()) {
487     if (!id_and_allocator_profile.second.memory_profile_snapshots().empty()) {
488       memory_profile->add_memory_ids(id_and_allocator_profile.first);
489     }
490   }
491   absl::c_sort(*memory_profile->mutable_memory_ids());
492 
493   for (auto& id_and_allocator_profile :
494        *memory_profile->mutable_memory_profile_per_allocator()) {
495     PerAllocatorMemoryProfile* allocator_memory_profile =
496         &id_and_allocator_profile.second;
497     protobuf::RepeatedPtrField<MemoryProfileSnapshot>* snapshots =
498         allocator_memory_profile->mutable_memory_profile_snapshots();
499     // Sort the memory_profile_snapshots by time_offset_ps (ascending) in proto.
500     absl::c_sort(*snapshots, [](const MemoryProfileSnapshot& a,
501                                 const MemoryProfileSnapshot& b) {
502       return a.time_offset_ps() < b.time_offset_ps();
503     });
504 
505     UpdateStepId(allocator_memory_profile);
506     UpdateDeallocation(allocator_memory_profile);
507 
508     int64_t peak_step_id =
509         GetPeakMemoryStep(allocator_memory_profile->profile_summary()
510                               .peak_stats()
511                               .peak_bytes_in_use(),
512                           allocator_memory_profile);
513     ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
514     SampleSnapshots(max_num_snapshots, snapshots,
515                     allocator_memory_profile->mutable_active_allocations());
516   }
517 }
518 
519 template <typename Proto>
ConvertProtoToJson(const Proto & proto_output,std::string * json_output)520 Status ConvertProtoToJson(const Proto& proto_output, std::string* json_output) {
521   protobuf::util::JsonPrintOptions json_options;
522   json_options.always_print_primitive_fields = true;
523   auto status = protobuf::util::MessageToJsonString(proto_output, json_output,
524                                                     json_options);
525   if (!status.ok()) {
526     // Convert error_msg google::protobuf::StringPiece (or absl::string_view) to
527     // tensorflow::StringPiece.
528     auto error_msg = status.message();
529     return errors::Internal(
530         "Could not convert proto to JSON string: ",
531         absl::string_view(error_msg.data(), error_msg.length()));
532   }
533   return Status::OK();
534 }
535 
536 }  // namespace
537 
ConvertXPlaneToMemoryProfile(const XPlane & host_plane,int64_t max_num_snapshots)538 MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane,
539                                            int64_t max_num_snapshots) {
540   MemoryProfile memory_profile = GenerateMemoryProfile(&host_plane);
541   ProcessMemoryProfileProto(max_num_snapshots, &memory_profile);
542   return memory_profile;
543 }
544 
ConvertXSpaceToMemoryProfileJson(const XSpace & xspace,std::string * json_output)545 Status ConvertXSpaceToMemoryProfileJson(const XSpace& xspace,
546                                         std::string* json_output) {
547   if (const XPlane* host_plane =
548           FindPlaneWithName(xspace, kHostThreadsPlaneName)) {
549     MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
550     TF_RETURN_IF_ERROR(ConvertProtoToJson(memory_profile, json_output));
551   }
552   return Status::OK();
553 }
554 
555 }  // namespace profiler
556 }  // namespace tensorflow
557