1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/profiling/profile_summarizer.h"
17
18 #include <memory>
19 #include <sstream>
20
21 #include "tensorflow/lite/profiling/memory_info.h"
22 #include "tensorflow/lite/schema/schema_generated.h"
23
24 namespace tflite {
25 namespace profiling {
26 namespace {
27
28 struct OperatorDetails {
29 uint32_t subgraph_index;
30 uint32_t node_index;
31 std::string op_description;
32 std::vector<std::string> inputs;
33 std::vector<std::string> outputs;
34 };
35
GetTensorName(const tflite::Interpreter & interpreter,int tensor_index)36 std::string GetTensorName(const tflite::Interpreter& interpreter,
37 int tensor_index) {
38 const auto tensor = interpreter.tensor(tensor_index);
39 if (tensor == nullptr || tensor->name == nullptr) {
40 return "Unknown";
41 }
42 return tensor->name;
43 }
GetTensorNames(const tflite::Interpreter & interpreter,const TfLiteIntArray * tensor_indices)44 std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
45 const TfLiteIntArray* tensor_indices) {
46 std::vector<std::string> tensors;
47 tensors.reserve(tensor_indices->size);
48 for (int i = 0; i < tensor_indices->size; i++) {
49 tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
50 }
51 return tensors;
52 }
53
ToString(const std::vector<std::string> & str_vector)54 std::string ToString(const std::vector<std::string>& str_vector) {
55 std::stringstream stream;
56 stream << "[";
57 bool first = true;
58 for (const auto& s : str_vector) {
59 if (!first) {
60 stream << ", ";
61 } else {
62 first = false;
63 }
64 stream << s;
65 }
66 stream << "]";
67 return stream.str();
68 }
69
GetOperatorDetails(const tflite::Interpreter & interpreter,uint32_t subgraph_index,uint32_t node_index)70 OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
71 uint32_t subgraph_index,
72 uint32_t node_index) {
73 auto subgraph =
74 const_cast<tflite::Interpreter&>(interpreter).subgraph(subgraph_index);
75 auto node_reg = subgraph->node_and_registration(node_index);
76 auto inputs = node_reg->first.inputs;
77 auto outputs = node_reg->first.outputs;
78 const char* profiling_string =
79 interpreter.OpProfilingString(node_reg->second, &node_reg->first);
80 OperatorDetails details;
81 if (profiling_string) {
82 details.op_description = std::string(profiling_string);
83 }
84 details.inputs = GetTensorNames(interpreter, inputs);
85 details.outputs = GetTensorNames(interpreter, outputs);
86 return details;
87 }
88
89 } // namespace
90
ProfileSummarizer(std::shared_ptr<ProfileSummaryFormatter> summary_formatter)91 ProfileSummarizer::ProfileSummarizer(
92 std::shared_ptr<ProfileSummaryFormatter> summary_formatter)
93 : summary_formatter_(summary_formatter) {
94 // Create stats calculator for the primary graph.
95 stats_calculator_map_[0] = std::unique_ptr<tensorflow::StatsCalculator>(
96 new tensorflow::StatsCalculator(
97 summary_formatter_->GetStatSummarizerOptions()));
98
99 // Create stats calculator for the delegation op.
100 delegate_stats_calculator_ = std::unique_ptr<tensorflow::StatsCalculator>(
101 new tensorflow::StatsCalculator(
102 summary_formatter_->GetStatSummarizerOptions()));
103 }
ProcessProfiles(const std::vector<const ProfileEvent * > & profile_stats,const tflite::Interpreter & interpreter)104 void ProfileSummarizer::ProcessProfiles(
105 const std::vector<const ProfileEvent*>& profile_stats,
106 const tflite::Interpreter& interpreter) {
107 if (profile_stats.empty()) return;
108
109 std::vector<const ProfileEvent*> events;
110 std::copy_if(profile_stats.begin(), profile_stats.end(),
111 std::back_inserter(events), [](const ProfileEvent* e) {
112 return e->end_timestamp_us >= e->begin_timestamp_us;
113 });
114 // Sort with begin_time.
115 std::sort(events.begin(), events.end(),
116 [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
117 return a->begin_timestamp_us < b->begin_timestamp_us;
118 });
119 if (events.empty()) {
120 return;
121 }
122
123 int64_t base_start_us = events[0]->begin_timestamp_us;
124 int node_num = 0;
125
126 // Total time will be accumulated per subgraph.
127 std::map<uint32_t, int64_t> total_us_per_subgraph_map;
128 int64_t delegate_internal_total_us = 0;
129
130 for (auto event : events) {
131 const auto subgraph_index = event->extra_event_metadata;
132 auto stats_calculator = GetStatsCalculator(subgraph_index);
133 int64_t start_us = event->begin_timestamp_us - base_start_us;
134 int64_t node_exec_time =
135 event->end_timestamp_us - event->begin_timestamp_us;
136 if (event->event_type == Profiler::EventType::OPERATOR_INVOKE_EVENT) {
137 // When recording an OPERATOR_INVOKE_EVENT, we have recorded the node
138 // index as event_metadata. See the macro
139 // TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE defined in
140 // tensorflow/lite/core/api/profiler.h for details.
141 const auto node_index = event->event_metadata;
142
143 const auto op_details =
144 GetOperatorDetails(interpreter, subgraph_index, node_index);
145 std::string type_in_stats(event->tag);
146 if (!op_details.op_description.empty()) {
147 type_in_stats += "/" + op_details.op_description;
148 }
149
150 const auto node_name = ToString(op_details.outputs);
151 // Append node index to node name because 'stats_calculator' can not
152 // distinguish two nodes w/ the same 'node_name'.
153 const auto node_name_in_stats =
154 node_name + ":" + std::to_string(node_index);
155
156 stats_calculator->AddNodeStats(node_name_in_stats, type_in_stats,
157 node_num, start_us, node_exec_time,
158 0 /*memory */);
159 } else if (event->event_type ==
160 Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
161 const std::string node_name(event->tag);
162 // Append event_metadata to node name because 'stats_calculator' can not
163 // distinguish two nodes w/ the same 'node_name'.
164 const auto node_name_in_stats =
165 "Delegate/" + node_name + ":" + std::to_string(event->event_metadata);
166
167 delegate_stats_calculator_->AddNodeStats(
168 node_name_in_stats, "DelegateOpInvoke", node_num, start_us,
169 node_exec_time, 0 /*memory */);
170 } else {
171 // TODO(b/139812778) consider use a different stats_calculator to record
172 // non-op-invoke events so that these could be separated from
173 // op-invoke-events in the final profiling stats report.
174 const memory::MemoryUsage node_mem_usage =
175 event->end_mem_usage - event->begin_mem_usage;
176 std::string node_name(event->tag);
177 node_name += "/" + std::to_string(event->extra_event_metadata);
178 stats_calculator->AddNodeStats(node_name, event->tag, node_num, start_us,
179 node_exec_time,
180 node_mem_usage.max_rss_kb * 1000.0);
181 }
182
183 // Add total time except actual delegate ops since the elapsed time of the
184 // delegate ops inside are already combined at a fused DELEGATE op.
185 if (event->event_type !=
186 Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
187 total_us_per_subgraph_map[subgraph_index] += node_exec_time;
188 } else {
189 delegate_internal_total_us += node_exec_time;
190 }
191 ++node_num;
192 }
193
194 for (auto& total_us_per_subgraph_pair : total_us_per_subgraph_map) {
195 auto stats_calculator =
196 GetStatsCalculator(total_us_per_subgraph_pair.first);
197 stats_calculator->UpdateRunTotalUs(total_us_per_subgraph_pair.second);
198 }
199 if (delegate_internal_total_us > 0) {
200 delegate_stats_calculator_->UpdateRunTotalUs(delegate_internal_total_us);
201 }
202 }
203
GetStatsCalculator(uint32_t subgraph_index)204 tensorflow::StatsCalculator* ProfileSummarizer::GetStatsCalculator(
205 uint32_t subgraph_index) {
206 if (stats_calculator_map_.count(subgraph_index) == 0) {
207 stats_calculator_map_[subgraph_index] =
208 std::unique_ptr<tensorflow::StatsCalculator>(
209 new tensorflow::StatsCalculator(
210 summary_formatter_->GetStatSummarizerOptions()));
211 }
212 return stats_calculator_map_[subgraph_index].get();
213 }
214
215 } // namespace profiling
216 } // namespace tflite
217