• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/profiling/profile_summarizer.h"
17 
18 #include <memory>
19 #include <sstream>
20 
21 #include "tensorflow/lite/profiling/memory_info.h"
22 #include "tensorflow/lite/schema/schema_generated.h"
23 
24 namespace tflite {
25 namespace profiling {
26 namespace {
27 
28 struct OperatorDetails {
29   uint32_t subgraph_index;
30   uint32_t node_index;
31   std::string op_description;
32   std::vector<std::string> inputs;
33   std::vector<std::string> outputs;
34 };
35 
GetTensorName(const tflite::Interpreter & interpreter,int tensor_index)36 std::string GetTensorName(const tflite::Interpreter& interpreter,
37                           int tensor_index) {
38   const auto tensor = interpreter.tensor(tensor_index);
39   if (tensor == nullptr || tensor->name == nullptr) {
40     return "Unknown";
41   }
42   return tensor->name;
43 }
GetTensorNames(const tflite::Interpreter & interpreter,const TfLiteIntArray * tensor_indices)44 std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
45                                         const TfLiteIntArray* tensor_indices) {
46   std::vector<std::string> tensors;
47   tensors.reserve(tensor_indices->size);
48   for (int i = 0; i < tensor_indices->size; i++) {
49     tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
50   }
51   return tensors;
52 }
53 
ToString(const std::vector<std::string> & str_vector)54 std::string ToString(const std::vector<std::string>& str_vector) {
55   std::stringstream stream;
56   stream << "[";
57   bool first = true;
58   for (const auto& s : str_vector) {
59     if (!first) {
60       stream << ", ";
61     } else {
62       first = false;
63     }
64     stream << s;
65   }
66   stream << "]";
67   return stream.str();
68 }
69 
GetOperatorDetails(const tflite::Interpreter & interpreter,uint32_t subgraph_index,uint32_t node_index)70 OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
71                                    uint32_t subgraph_index,
72                                    uint32_t node_index) {
73   auto subgraph =
74       const_cast<tflite::Interpreter&>(interpreter).subgraph(subgraph_index);
75   auto node_reg = subgraph->node_and_registration(node_index);
76   auto inputs = node_reg->first.inputs;
77   auto outputs = node_reg->first.outputs;
78   const char* profiling_string =
79       interpreter.OpProfilingString(node_reg->second, &node_reg->first);
80   OperatorDetails details;
81   if (profiling_string) {
82     details.op_description = std::string(profiling_string);
83   }
84   details.inputs = GetTensorNames(interpreter, inputs);
85   details.outputs = GetTensorNames(interpreter, outputs);
86   return details;
87 }
88 
89 }  // namespace
90 
ProfileSummarizer(std::shared_ptr<ProfileSummaryFormatter> summary_formatter)91 ProfileSummarizer::ProfileSummarizer(
92     std::shared_ptr<ProfileSummaryFormatter> summary_formatter)
93     : summary_formatter_(summary_formatter) {
94   // Create stats calculator for the primary graph.
95   stats_calculator_map_[0] = std::unique_ptr<tensorflow::StatsCalculator>(
96       new tensorflow::StatsCalculator(
97           summary_formatter_->GetStatSummarizerOptions()));
98 
99   // Create stats calculator for the delegation op.
100   delegate_stats_calculator_ = std::unique_ptr<tensorflow::StatsCalculator>(
101       new tensorflow::StatsCalculator(
102           summary_formatter_->GetStatSummarizerOptions()));
103 }
ProcessProfiles(const std::vector<const ProfileEvent * > & profile_stats,const tflite::Interpreter & interpreter)104 void ProfileSummarizer::ProcessProfiles(
105     const std::vector<const ProfileEvent*>& profile_stats,
106     const tflite::Interpreter& interpreter) {
107   if (profile_stats.empty()) return;
108 
109   std::vector<const ProfileEvent*> events;
110   std::copy_if(profile_stats.begin(), profile_stats.end(),
111                std::back_inserter(events), [](const ProfileEvent* e) {
112                  return e->end_timestamp_us >= e->begin_timestamp_us;
113                });
114   // Sort with begin_time.
115   std::sort(events.begin(), events.end(),
116             [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
117               return a->begin_timestamp_us < b->begin_timestamp_us;
118             });
119   if (events.empty()) {
120     return;
121   }
122 
123   int64_t base_start_us = events[0]->begin_timestamp_us;
124   int node_num = 0;
125 
126   // Total time will be accumulated per subgraph.
127   std::map<uint32_t, int64_t> total_us_per_subgraph_map;
128   int64_t delegate_internal_total_us = 0;
129 
130   for (auto event : events) {
131     const auto subgraph_index = event->extra_event_metadata;
132     auto stats_calculator = GetStatsCalculator(subgraph_index);
133     int64_t start_us = event->begin_timestamp_us - base_start_us;
134     int64_t node_exec_time =
135         event->end_timestamp_us - event->begin_timestamp_us;
136     if (event->event_type == Profiler::EventType::OPERATOR_INVOKE_EVENT) {
137       // When recording an OPERATOR_INVOKE_EVENT, we have recorded the node
138       // index as event_metadata. See the macro
139       // TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE defined in
140       // tensorflow/lite/core/api/profiler.h for details.
141       const auto node_index = event->event_metadata;
142 
143       const auto op_details =
144           GetOperatorDetails(interpreter, subgraph_index, node_index);
145       std::string type_in_stats(event->tag);
146       if (!op_details.op_description.empty()) {
147         type_in_stats += "/" + op_details.op_description;
148       }
149 
150       const auto node_name = ToString(op_details.outputs);
151       // Append node index to node name because 'stats_calculator' can not
152       // distinguish two nodes w/ the same 'node_name'.
153       const auto node_name_in_stats =
154           node_name + ":" + std::to_string(node_index);
155 
156       stats_calculator->AddNodeStats(node_name_in_stats, type_in_stats,
157                                      node_num, start_us, node_exec_time,
158                                      0 /*memory */);
159     } else if (event->event_type ==
160                Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
161       const std::string node_name(event->tag);
162       // Append event_metadata to node name because 'stats_calculator' can not
163       // distinguish two nodes w/ the same 'node_name'.
164       const auto node_name_in_stats =
165           "Delegate/" + node_name + ":" + std::to_string(event->event_metadata);
166 
167       delegate_stats_calculator_->AddNodeStats(
168           node_name_in_stats, "DelegateOpInvoke", node_num, start_us,
169           node_exec_time, 0 /*memory */);
170     } else {
171       // Note: a different stats_calculator could be used to record
172       // non-op-invoke events so that these could be separated from
173       // op-invoke-events in the final profiling stats report.
174       const memory::MemoryUsage node_mem_usage =
175           event->end_mem_usage - event->begin_mem_usage;
176       std::string node_name(event->tag);
177       if (node_name == "Invoke") {
178         // Don't count the overall Invoke for profiling.
179         continue;
180       }
181       node_name += "/" + std::to_string(event->extra_event_metadata);
182       stats_calculator->AddNodeStats(node_name, event->tag, node_num, start_us,
183                                      node_exec_time,
184                                      node_mem_usage.max_rss_kb * 1000.0);
185     }
186 
187     // Add total time except actual delegate ops since the elapsed time of the
188     // delegate ops inside are already combined at a fused DELEGATE op.
189     if (event->event_type !=
190         Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
191       total_us_per_subgraph_map[subgraph_index] += node_exec_time;
192     } else {
193       delegate_internal_total_us += node_exec_time;
194     }
195     ++node_num;
196   }
197 
198   for (auto& total_us_per_subgraph_pair : total_us_per_subgraph_map) {
199     auto stats_calculator =
200         GetStatsCalculator(total_us_per_subgraph_pair.first);
201     stats_calculator->UpdateRunTotalUs(total_us_per_subgraph_pair.second);
202   }
203   if (delegate_internal_total_us > 0) {
204     delegate_stats_calculator_->UpdateRunTotalUs(delegate_internal_total_us);
205   }
206 }
207 
GetStatsCalculator(uint32_t subgraph_index)208 tensorflow::StatsCalculator* ProfileSummarizer::GetStatsCalculator(
209     uint32_t subgraph_index) {
210   if (stats_calculator_map_.count(subgraph_index) == 0) {
211     stats_calculator_map_[subgraph_index] =
212         std::unique_ptr<tensorflow::StatsCalculator>(
213             new tensorflow::StatsCalculator(
214                 summary_formatter_->GetStatSummarizerOptions()));
215   }
216   return stats_calculator_map_[subgraph_index].get();
217 }
218 
219 }  // namespace profiling
220 }  // namespace tflite
221