• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/profiling/profile_summarizer.h"
17 
18 #include <memory>
19 #include <sstream>
20 #include <string>
21 
22 #include "tensorflow/lite/profiling/memory_info.h"
23 #include "tensorflow/lite/schema/schema_generated.h"
24 
25 namespace tflite {
26 namespace profiling {
27 namespace {
28 
29 struct OperatorDetails {
30   uint32_t subgraph_index;
31   uint32_t node_index;
32   std::string op_description;
33   std::vector<std::string> inputs;
34   std::vector<std::string> outputs;
35 };
36 
GetTensorName(const tflite::Interpreter & interpreter,int tensor_index)37 std::string GetTensorName(const tflite::Interpreter& interpreter,
38                           int tensor_index) {
39   const auto tensor = interpreter.tensor(tensor_index);
40   if (tensor == nullptr || tensor->name == nullptr) {
41     return "Unknown";
42   }
43   return tensor->name;
44 }
GetTensorNames(const tflite::Interpreter & interpreter,const TfLiteIntArray * tensor_indices)45 std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
46                                         const TfLiteIntArray* tensor_indices) {
47   std::vector<std::string> tensors;
48   tensors.reserve(tensor_indices->size);
49   for (int i = 0; i < tensor_indices->size; i++) {
50     tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
51   }
52   return tensors;
53 }
54 
ToString(const std::vector<std::string> & str_vector)55 std::string ToString(const std::vector<std::string>& str_vector) {
56   std::stringstream stream;
57   stream << "[";
58   bool first = true;
59   for (const auto& s : str_vector) {
60     if (!first) {
61       stream << ", ";
62     } else {
63       first = false;
64     }
65     stream << s;
66   }
67   stream << "]";
68   return stream.str();
69 }
70 
GetOperatorDetails(const tflite::Interpreter & interpreter,uint32_t subgraph_index,uint32_t node_index)71 OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
72                                    uint32_t subgraph_index,
73                                    uint32_t node_index) {
74   auto subgraph =
75       const_cast<tflite::Interpreter&>(interpreter).subgraph(subgraph_index);
76   auto node_reg = subgraph->node_and_registration(node_index);
77   auto inputs = node_reg->first.inputs;
78   auto outputs = node_reg->first.outputs;
79   const char* profiling_string =
80       interpreter.OpProfilingString(node_reg->second, &node_reg->first);
81   OperatorDetails details;
82   if (profiling_string) {
83     details.op_description = std::string(profiling_string);
84   }
85   details.inputs = GetTensorNames(interpreter, inputs);
86   details.outputs = GetTensorNames(interpreter, outputs);
87   return details;
88 }
89 
90 }  // namespace
91 
ProfileSummarizer(std::shared_ptr<ProfileSummaryFormatter> summary_formatter)92 ProfileSummarizer::ProfileSummarizer(
93     std::shared_ptr<ProfileSummaryFormatter> summary_formatter)
94     : summary_formatter_(summary_formatter) {
95   // Create stats calculator for the primary graph.
96   stats_calculator_map_[0] = std::make_unique<tensorflow::StatsCalculator>(
97 
98       summary_formatter_->GetStatSummarizerOptions());
99 
100   // Create stats calculator for the delegation op.
101   delegate_stats_calculator_ = std::make_unique<tensorflow::StatsCalculator>(
102 
103       summary_formatter_->GetStatSummarizerOptions());
104 }
ProcessProfiles(const std::vector<const ProfileEvent * > & profile_stats,const tflite::Interpreter & interpreter)105 void ProfileSummarizer::ProcessProfiles(
106     const std::vector<const ProfileEvent*>& profile_stats,
107     const tflite::Interpreter& interpreter) {
108   if (profile_stats.empty()) return;
109 
110   int node_num = 0;
111 
112   // Total time will be accumulated per subgraph.
113   std::map<uint32_t, int64_t> total_us_per_subgraph_map;
114   int64_t delegate_internal_total_us = 0;
115 
116   for (auto event : profile_stats) {
117     const auto subgraph_index = event->extra_event_metadata;
118     auto stats_calculator = GetStatsCalculator(subgraph_index);
119     int64_t node_exec_time = event->elapsed_time;
120     if (event->event_type == Profiler::EventType::OPERATOR_INVOKE_EVENT) {
121       // When recording an OPERATOR_INVOKE_EVENT, we have recorded the node
122       // index as event_metadata. See the macro
123       // TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE defined in
124       // tensorflow/lite/core/api/profiler.h for details.
125       const auto node_index = event->event_metadata;
126 
127       const auto op_details =
128           GetOperatorDetails(interpreter, subgraph_index, node_index);
129       std::string type_in_stats(event->tag);
130       if (!op_details.op_description.empty()) {
131         type_in_stats += "/" + op_details.op_description;
132       }
133 
134       const auto node_name = ToString(op_details.outputs);
135       // Append node index to node name because 'stats_calculator' can not
136       // distinguish two nodes w/ the same 'node_name'.
137       const auto node_name_in_stats =
138           node_name + ":" + std::to_string(node_index);
139 
140       stats_calculator->AddNodeStats(node_name_in_stats, type_in_stats,
141                                      node_num, node_exec_time, 0 /*memory */);
142     } else if (event->event_type ==
143                Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
144       const std::string node_name(event->tag);
145       // Append event_metadata to node name because 'stats_calculator' can not
146       // distinguish two nodes w/ the same 'node_name'.
147       const auto node_name_in_stats =
148           "Delegate/" + node_name + ":" + std::to_string(event->event_metadata);
149 
150       delegate_stats_calculator_->AddNodeStats(node_name_in_stats,
151                                                "DelegateOpInvoke", node_num,
152                                                node_exec_time, 0 /*memory */);
153     } else {
154       // Note: a different stats_calculator could be used to record
155       // non-op-invoke events so that these could be separated from
156       // op-invoke-events in the final profiling stats report.
157       const memory::MemoryUsage node_mem_usage =
158           event->end_mem_usage - event->begin_mem_usage;
159       std::string node_name(event->tag);
160       if (node_name == "Invoke") {
161         // Don't count the overall Invoke for profiling.
162         continue;
163       }
164       node_name += "/" + std::to_string(event->extra_event_metadata);
165       stats_calculator->AddNodeStats(node_name, event->tag, node_num,
166                                      node_exec_time,
167                                      node_mem_usage.max_rss_kb * 1000.0);
168     }
169 
170     // Add total time except actual delegate ops since the elapsed time of the
171     // delegate ops inside are already combined at a fused DELEGATE op.
172     if (event->event_type !=
173         Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
174       total_us_per_subgraph_map[subgraph_index] += node_exec_time;
175     } else {
176       delegate_internal_total_us += node_exec_time;
177     }
178     ++node_num;
179   }
180 
181   for (auto& total_us_per_subgraph_pair : total_us_per_subgraph_map) {
182     auto stats_calculator =
183         GetStatsCalculator(total_us_per_subgraph_pair.first);
184     stats_calculator->UpdateRunTotalUs(total_us_per_subgraph_pair.second);
185   }
186   if (delegate_internal_total_us > 0) {
187     delegate_stats_calculator_->UpdateRunTotalUs(delegate_internal_total_us);
188   }
189 }
190 
GetStatsCalculator(uint32_t subgraph_index)191 tensorflow::StatsCalculator* ProfileSummarizer::GetStatsCalculator(
192     uint32_t subgraph_index) {
193   if (stats_calculator_map_.count(subgraph_index) == 0) {
194     stats_calculator_map_[subgraph_index] =
195         std::make_unique<tensorflow::StatsCalculator>(
196 
197             summary_formatter_->GetStatSummarizerOptions());
198   }
199   return stats_calculator_map_[subgraph_index].get();
200 }
201 
202 }  // namespace profiling
203 }  // namespace tflite
204