1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/profiling/profile_summarizer.h"
17
18 #include <memory>
19 #include <sstream>
20
21 #include "tensorflow/lite/profiling/memory_info.h"
22 #include "tensorflow/lite/schema/schema_generated.h"
23
24 namespace tflite {
25 namespace profiling {
26 namespace {
27
28 struct OperatorDetails {
29 uint32_t subgraph_index;
30 uint32_t node_index;
31 std::string op_description;
32 std::vector<std::string> inputs;
33 std::vector<std::string> outputs;
34 };
35
GetTensorName(const tflite::Interpreter & interpreter,int tensor_index)36 std::string GetTensorName(const tflite::Interpreter& interpreter,
37 int tensor_index) {
38 const auto tensor = interpreter.tensor(tensor_index);
39 if (tensor == nullptr || tensor->name == nullptr) {
40 return "Unknown";
41 }
42 return tensor->name;
43 }
GetTensorNames(const tflite::Interpreter & interpreter,const TfLiteIntArray * tensor_indices)44 std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
45 const TfLiteIntArray* tensor_indices) {
46 std::vector<std::string> tensors;
47 tensors.reserve(tensor_indices->size);
48 for (int i = 0; i < tensor_indices->size; i++) {
49 tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
50 }
51 return tensors;
52 }
53
ToString(const std::vector<std::string> & str_vector)54 std::string ToString(const std::vector<std::string>& str_vector) {
55 std::stringstream stream;
56 stream << "[";
57 bool first = true;
58 for (const auto& s : str_vector) {
59 if (!first) {
60 stream << ", ";
61 } else {
62 first = false;
63 }
64 stream << s;
65 }
66 stream << "]";
67 return stream.str();
68 }
69
GetOperatorDetails(const tflite::Interpreter & interpreter,uint32_t subgraph_index,uint32_t node_index)70 OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
71 uint32_t subgraph_index,
72 uint32_t node_index) {
73 auto subgraph =
74 const_cast<tflite::Interpreter&>(interpreter).subgraph(subgraph_index);
75 auto node_reg = subgraph->node_and_registration(node_index);
76 auto inputs = node_reg->first.inputs;
77 auto outputs = node_reg->first.outputs;
78 const char* profiling_string =
79 interpreter.OpProfilingString(node_reg->second, &node_reg->first);
80 OperatorDetails details;
81 if (profiling_string) {
82 details.op_description = std::string(profiling_string);
83 }
84 details.inputs = GetTensorNames(interpreter, inputs);
85 details.outputs = GetTensorNames(interpreter, outputs);
86 return details;
87 }
88
89 } // namespace
90
ProfileSummarizer(std::shared_ptr<ProfileSummaryFormatter> summary_formatter)91 ProfileSummarizer::ProfileSummarizer(
92 std::shared_ptr<ProfileSummaryFormatter> summary_formatter)
93 : summary_formatter_(summary_formatter) {
94 // Create stats calculator for the primary graph.
95 stats_calculator_map_[0] = std::unique_ptr<tensorflow::StatsCalculator>(
96 new tensorflow::StatsCalculator(
97 summary_formatter_->GetStatSummarizerOptions()));
98
99 // Create stats calculator for the delegation op.
100 delegate_stats_calculator_ = std::unique_ptr<tensorflow::StatsCalculator>(
101 new tensorflow::StatsCalculator(
102 summary_formatter_->GetStatSummarizerOptions()));
103 }
ProcessProfiles(const std::vector<const ProfileEvent * > & profile_stats,const tflite::Interpreter & interpreter)104 void ProfileSummarizer::ProcessProfiles(
105 const std::vector<const ProfileEvent*>& profile_stats,
106 const tflite::Interpreter& interpreter) {
107 if (profile_stats.empty()) return;
108
109 std::vector<const ProfileEvent*> events;
110 std::copy_if(profile_stats.begin(), profile_stats.end(),
111 std::back_inserter(events), [](const ProfileEvent* e) {
112 return e->end_timestamp_us >= e->begin_timestamp_us;
113 });
114 // Sort with begin_time.
115 std::sort(events.begin(), events.end(),
116 [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
117 return a->begin_timestamp_us < b->begin_timestamp_us;
118 });
119 if (events.empty()) {
120 return;
121 }
122
123 int64_t base_start_us = events[0]->begin_timestamp_us;
124 int node_num = 0;
125
126 // Total time will be accumulated per subgraph.
127 std::map<uint32_t, int64_t> total_us_per_subgraph_map;
128 int64_t delegate_internal_total_us = 0;
129
130 for (auto event : events) {
131 const auto subgraph_index = event->extra_event_metadata;
132 auto stats_calculator = GetStatsCalculator(subgraph_index);
133 int64_t start_us = event->begin_timestamp_us - base_start_us;
134 int64_t node_exec_time =
135 event->end_timestamp_us - event->begin_timestamp_us;
136 if (event->event_type == Profiler::EventType::OPERATOR_INVOKE_EVENT) {
137 // When recording an OPERATOR_INVOKE_EVENT, we have recorded the node
138 // index as event_metadata. See the macro
139 // TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE defined in
140 // tensorflow/lite/core/api/profiler.h for details.
141 const auto node_index = event->event_metadata;
142
143 const auto op_details =
144 GetOperatorDetails(interpreter, subgraph_index, node_index);
145 std::string type_in_stats(event->tag);
146 if (!op_details.op_description.empty()) {
147 type_in_stats += "/" + op_details.op_description;
148 }
149
150 const auto node_name = ToString(op_details.outputs);
151 // Append node index to node name because 'stats_calculator' can not
152 // distinguish two nodes w/ the same 'node_name'.
153 const auto node_name_in_stats =
154 node_name + ":" + std::to_string(node_index);
155
156 stats_calculator->AddNodeStats(node_name_in_stats, type_in_stats,
157 node_num, start_us, node_exec_time,
158 0 /*memory */);
159 } else if (event->event_type ==
160 Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
161 const std::string node_name(event->tag);
162 // Append event_metadata to node name because 'stats_calculator' can not
163 // distinguish two nodes w/ the same 'node_name'.
164 const auto node_name_in_stats =
165 "Delegate/" + node_name + ":" + std::to_string(event->event_metadata);
166
167 delegate_stats_calculator_->AddNodeStats(
168 node_name_in_stats, "DelegateOpInvoke", node_num, start_us,
169 node_exec_time, 0 /*memory */);
170 } else {
171 // Note: a different stats_calculator could be used to record
172 // non-op-invoke events so that these could be separated from
173 // op-invoke-events in the final profiling stats report.
174 const memory::MemoryUsage node_mem_usage =
175 event->end_mem_usage - event->begin_mem_usage;
176 std::string node_name(event->tag);
177 if (node_name == "Invoke") {
178 // Don't count the overall Invoke for profiling.
179 continue;
180 }
181 node_name += "/" + std::to_string(event->extra_event_metadata);
182 stats_calculator->AddNodeStats(node_name, event->tag, node_num, start_us,
183 node_exec_time,
184 node_mem_usage.max_rss_kb * 1000.0);
185 }
186
187 // Add total time except actual delegate ops since the elapsed time of the
188 // delegate ops inside are already combined at a fused DELEGATE op.
189 if (event->event_type !=
190 Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
191 total_us_per_subgraph_map[subgraph_index] += node_exec_time;
192 } else {
193 delegate_internal_total_us += node_exec_time;
194 }
195 ++node_num;
196 }
197
198 for (auto& total_us_per_subgraph_pair : total_us_per_subgraph_map) {
199 auto stats_calculator =
200 GetStatsCalculator(total_us_per_subgraph_pair.first);
201 stats_calculator->UpdateRunTotalUs(total_us_per_subgraph_pair.second);
202 }
203 if (delegate_internal_total_us > 0) {
204 delegate_stats_calculator_->UpdateRunTotalUs(delegate_internal_total_us);
205 }
206 }
207
GetStatsCalculator(uint32_t subgraph_index)208 tensorflow::StatsCalculator* ProfileSummarizer::GetStatsCalculator(
209 uint32_t subgraph_index) {
210 if (stats_calculator_map_.count(subgraph_index) == 0) {
211 stats_calculator_map_[subgraph_index] =
212 std::unique_ptr<tensorflow::StatsCalculator>(
213 new tensorflow::StatsCalculator(
214 summary_formatter_->GetStatSummarizerOptions()));
215 }
216 return stats_calculator_map_[subgraph_index].get();
217 }
218
219 } // namespace profiling
220 } // namespace tflite
221