1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/util/stats_calculator.h"
17
18 #include <iomanip>
19 #include <map>
20 #include <queue>
21 #include <sstream>
22 #include <string>
23
24 namespace tensorflow {
25
StatsCalculator(const StatSummarizerOptions & options)26 StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
27 : options_(options) {}
28
GetShortSummary() const29 std::string StatsCalculator::GetShortSummary() const {
30 std::stringstream stream;
31 stream << "Timings (microseconds): ";
32 run_total_us_.OutputToStream(&stream);
33 stream << std::endl;
34
35 stream << "Memory (bytes): ";
36 memory_.OutputToStream(&stream);
37 stream << std::endl;
38
39 stream << details_.size() << " nodes observed" << std::endl;
40 return stream.str();
41 }
42
InitField(std::ostream & stream,int width)43 std::ostream& InitField(std::ostream& stream, int width) {
44 stream << "\t" << std::right << std::setw(width) << std::fixed
45 << std::setprecision(3);
46 return stream;
47 }
48
HeaderString(const std::string & title) const49 std::string StatsCalculator::HeaderString(const std::string& title) const {
50 std::stringstream stream;
51
52 stream << "============================== " << title
53 << " ==============================" << std::endl;
54
55 InitField(stream, 24) << "[node type]";
56 InitField(stream, 17) << "[start]";
57 InitField(stream, 9) << "[first]";
58 InitField(stream, 9) << "[avg ms]";
59 InitField(stream, 8) << "[%]";
60 InitField(stream, 8) << "[cdf%]";
61 InitField(stream, 10) << "[mem KB]";
62 InitField(stream, 9) << "[times called]";
63 stream << "\t"
64 << "[Name]";
65 return stream.str();
66 }
67
ColumnString(const Detail & detail,const int64_t cumulative_stat_on_node,const Stat<int64_t> & stat) const68 std::string StatsCalculator::ColumnString(const Detail& detail,
69 const int64_t cumulative_stat_on_node,
70 const Stat<int64_t>& stat) const {
71 const double start_ms = detail.start_us.avg() / 1000.0;
72 const double first_time_ms = detail.rel_end_us.first() / 1000.0;
73 const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
74 const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
75 const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
76 const int64_t times_called = detail.times_called / num_runs();
77
78 std::stringstream stream;
79 InitField(stream, 24) << detail.type;
80 InitField(stream, 17) << start_ms;
81 InitField(stream, 9) << first_time_ms;
82 InitField(stream, 9) << avg_time_ms;
83 InitField(stream, 7) << percentage << "%";
84 InitField(stream, 7) << cdf_percentage << "%";
85 InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
86 InitField(stream, 9) << times_called;
87 stream << "\t" << detail.name;
88
89 return stream.str();
90 }
91
OrderNodesByMetric(SortingMetric metric,std::vector<const Detail * > * details) const92 void StatsCalculator::OrderNodesByMetric(
93 SortingMetric metric, std::vector<const Detail*>* details) const {
94 std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
95 const int num_nodes = details_.size();
96
97 for (const auto& det : details_) {
98 const Detail* detail = &(det.second);
99 std::stringstream stream;
100 stream << std::setw(20) << std::right << std::setprecision(10)
101 << std::fixed;
102
103 switch (metric) {
104 case BY_NAME:
105 stream << detail->name;
106 break;
107 case BY_RUN_ORDER:
108 stream << num_nodes - detail->run_order;
109 break;
110 case BY_TIME:
111 stream << detail->rel_end_us.avg();
112 break;
113 case BY_MEMORY:
114 stream << detail->mem_used.avg();
115 break;
116 case BY_TYPE:
117 stream << detail->type;
118 break;
119 default:
120 stream << "";
121 break;
122 }
123
124 sorted_list.emplace(stream.str(), detail);
125 }
126
127 while (!sorted_list.empty()) {
128 auto entry = sorted_list.top();
129 sorted_list.pop();
130 details->push_back(entry.second);
131 }
132 }
133
ComputeStatsByType(std::map<std::string,int64_t> * node_type_map_count,std::map<std::string,int64_t> * node_type_map_time,std::map<std::string,int64_t> * node_type_map_memory,std::map<std::string,int64_t> * node_type_map_times_called,int64_t * accumulated_us) const134 void StatsCalculator::ComputeStatsByType(
135 std::map<std::string, int64_t>* node_type_map_count,
136 std::map<std::string, int64_t>* node_type_map_time,
137 std::map<std::string, int64_t>* node_type_map_memory,
138 std::map<std::string, int64_t>* node_type_map_times_called,
139 int64_t* accumulated_us) const {
140 int64_t run_count = run_total_us_.count();
141
142 for (const auto& det : details_) {
143 const std::string node_name = det.first;
144 const Detail& detail = det.second;
145
146 int64_t curr_time_val =
147 static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
148 *accumulated_us += curr_time_val;
149
150 int64_t curr_memory_val = detail.mem_used.newest();
151
152 const std::string& node_type = detail.type;
153
154 (*node_type_map_count)[node_type] += 1;
155 (*node_type_map_time)[node_type] += curr_time_val;
156 (*node_type_map_memory)[node_type] += curr_memory_val;
157 (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
158 }
159 }
160
GetStatsByNodeType() const161 std::string StatsCalculator::GetStatsByNodeType() const {
162 std::stringstream stream;
163
164 stream << "Number of nodes executed: " << details_.size() << std::endl;
165
166 stream << "============================== Summary by node type "
167 "=============================="
168 << std::endl;
169
170 std::map<std::string, int64_t> node_type_map_count;
171 std::map<std::string, int64_t> node_type_map_time;
172 std::map<std::string, int64_t> node_type_map_memory;
173 std::map<std::string, int64_t> node_type_map_times_called;
174 int64_t accumulated_us = 0;
175
176 ComputeStatsByType(&node_type_map_count, &node_type_map_time,
177 &node_type_map_memory, &node_type_map_times_called,
178 &accumulated_us);
179
180 // Sort them.
181 std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
182 timings;
183 for (const auto& node_type : node_type_map_time) {
184 const int64_t mem_used = node_type_map_memory[node_type.first];
185 timings.emplace(node_type.second,
186 std::pair<std::string, int64_t>(node_type.first, mem_used));
187 }
188
189 InitField(stream, 24) << "[Node type]";
190 InitField(stream, 9) << "[count]";
191 InitField(stream, 10) << "[avg ms]";
192 InitField(stream, 11) << "[avg %]";
193 InitField(stream, 11) << "[cdf %]";
194 InitField(stream, 10) << "[mem KB]";
195 InitField(stream, 10) << "[times called]";
196 stream << std::endl;
197
198 float cdf = 0.0f;
199 while (!timings.empty()) {
200 auto entry = timings.top();
201 timings.pop();
202
203 const std::string node_type = entry.second.first;
204 const float memory = entry.second.second / 1000.0f;
205
206 const int64_t node_type_total_us = entry.first;
207 const float time_per_run_ms = node_type_total_us / 1000.0f;
208
209 const float percentage =
210 ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
211 cdf += percentage;
212
213 InitField(stream, 24) << node_type;
214 InitField(stream, 9) << node_type_map_count[node_type];
215 InitField(stream, 10) << time_per_run_ms;
216 InitField(stream, 10) << percentage << "%";
217 InitField(stream, 10) << cdf << "%";
218 InitField(stream, 10) << memory;
219 InitField(stream, 9) << node_type_map_times_called[node_type];
220 stream << std::endl;
221 }
222 stream << std::endl;
223 return stream.str();
224 }
225
GetStatsByMetric(const std::string & title,SortingMetric sorting_metric,int num_stats) const226 std::string StatsCalculator::GetStatsByMetric(const std::string& title,
227 SortingMetric sorting_metric,
228 int num_stats) const {
229 std::vector<const Detail*> details;
230 OrderNodesByMetric(sorting_metric, &details);
231
232 double cumulative_stat_on_node = 0;
233
234 std::stringstream stream;
235 stream << HeaderString(title) << std::endl;
236 int stat_num = 0;
237 for (auto detail : details) {
238 ++stat_num;
239 if (num_stats > 0 && stat_num > num_stats) {
240 break;
241 }
242
243 // TODO(andrewharp): Make this keep track of the particular metric for cdf.
244 cumulative_stat_on_node += detail->rel_end_us.sum();
245 stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
246 << std::endl;
247 }
248 stream << std::endl;
249 return stream.str();
250 }
251
GetOutputString() const252 std::string StatsCalculator::GetOutputString() const {
253 std::stringstream stream;
254 if (options_.show_run_order) {
255 stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
256 options_.run_order_limit);
257 }
258 if (options_.show_time) {
259 stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
260 options_.time_limit);
261 }
262 if (options_.show_memory) {
263 stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
264 options_.memory_limit);
265 }
266 if (options_.show_type) {
267 stream << GetStatsByNodeType();
268 }
269 if (options_.show_summary) {
270 stream << GetShortSummary() << std::endl;
271 }
272 return stream.str();
273 }
274
AddNodeStats(const std::string & name,const std::string & type,int64_t run_order,int64_t start_us,int64_t rel_end_us,int64_t mem_used)275 void StatsCalculator::AddNodeStats(const std::string& name,
276 const std::string& type, int64_t run_order,
277 int64_t start_us, int64_t rel_end_us,
278 int64_t mem_used) {
279 Detail* detail = nullptr;
280 if (details_.find(name) == details_.end()) {
281 details_.insert({name, {}});
282 detail = &details_.at(name);
283 detail->type = type;
284 detail->name = name;
285 detail->run_order = run_order;
286 } else {
287 detail = &details_.at(name);
288 }
289 detail->start_us.UpdateStat(start_us);
290 detail->rel_end_us.UpdateStat(rel_end_us);
291 detail->mem_used.UpdateStat(mem_used);
292 detail->times_called++;
293 }
294
295 } // namespace tensorflow
296