1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ 17 #define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ 18 19 #include <stdlib.h> 20 21 #include <algorithm> 22 #include <cmath> 23 #include <limits> 24 #include <map> 25 #include <sstream> 26 #include <string> 27 #include <vector> 28 29 #include "tensorflow/core/util/stat_summarizer_options.h" 30 31 namespace tensorflow { 32 33 template <typename ValueType, typename HighPrecisionValueType = double> 34 class Stat { 35 public: UpdateStat(ValueType v)36 void UpdateStat(ValueType v) { 37 if (count_ == 0) { 38 first_ = v; 39 } 40 41 newest_ = v; 42 max_ = std::max(v, max_); 43 min_ = std::min(v, min_); 44 ++count_; 45 sum_ += v; 46 squared_sum_ += static_cast<HighPrecisionValueType>(v) * v; 47 } 48 Reset()49 void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); } 50 empty()51 bool empty() const { return count_ == 0; } 52 first()53 ValueType first() const { return first_; } 54 newest()55 ValueType newest() const { return newest_; } 56 max()57 ValueType max() const { return max_; } 58 min()59 ValueType min() const { return min_; } 60 count()61 int64_t count() const { return count_; } 62 sum()63 ValueType sum() const { return sum_; } 64 squared_sum()65 HighPrecisionValueType squared_sum() const { return squared_sum_; } 66 all_same()67 bool all_same() const { return (count_ == 0 || min_ == max_); } 68 avg()69 HighPrecisionValueType avg() const { 70 return empty() ? std::numeric_limits<ValueType>::quiet_NaN() 71 : static_cast<HighPrecisionValueType>(sum_) / count_; 72 } 73 std_deviation()74 ValueType std_deviation() const { 75 return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg()); 76 } 77 OutputToStream(std::ostream * stream)78 void OutputToStream(std::ostream* stream) const { 79 if (empty()) { 80 *stream << "count=0"; 81 } else if (all_same()) { 82 *stream << "count=" << count_ << " curr=" << newest_; 83 if (count_ > 1) *stream << "(all same)"; 84 } else { 85 *stream << "count=" << count_ << " first=" << first_ 86 << " curr=" << newest_ << " min=" << min_ << " max=" << max_ 87 << " avg=" << avg() << " std=" << std_deviation(); 88 } 89 } 90 91 friend std::ostream& operator<<(std::ostream& stream, 92 const Stat<ValueType>& stat) { 93 stat.OutputToStream(&stream); 94 return stream; 95 } 96 97 private: 98 ValueType first_ = 0; 99 ValueType newest_ = 0; 100 ValueType max_ = std::numeric_limits<ValueType>::min(); 101 ValueType min_ = std::numeric_limits<ValueType>::max(); 102 int64_t count_ = 0; 103 ValueType sum_ = 0; 104 HighPrecisionValueType squared_sum_ = 0; 105 }; 106 107 // A StatsCalculator assists in performance analysis of Graph executions. 108 // 109 // It summarizes time spent executing (on GPU/CPU), memory used etc for 110 // graph execution. 111 // 112 // For example usage see StatsSummarizer. 113 class StatsCalculator { 114 public: 115 enum SortingMetric { 116 BY_NAME, 117 BY_RUN_ORDER, 118 BY_TIME, 119 BY_MEMORY, 120 BY_TYPE, 121 }; 122 123 explicit StatsCalculator(const StatSummarizerOptions& options); 124 125 // Returns a string detailing the accumulated runtime stats in a tab-separated 126 // format which can be pasted into a spreadsheet for further analysis. 127 std::string GetOutputString() const; 128 129 std::string GetShortSummary() const; 130 131 void ComputeStatsByType( 132 std::map<std::string, int64_t>* node_type_map_count, 133 std::map<std::string, int64_t>* node_type_map_time, 134 std::map<std::string, int64_t>* node_type_map_memory, 135 std::map<std::string, int64_t>* node_type_map_times_called, 136 int64_t* accumulated_us) const; 137 138 std::string GetStatsByNodeType() const; 139 140 std::string GetStatsByMetric(const std::string& title, 141 SortingMetric sorting_metric, 142 int num_stats) const; 143 144 // Returns number of runs. num_runs()145 int num_runs() const { return static_cast<int>(run_total_us_.count()); } 146 147 // Returns stats of total microseconds spent by all nodes in each run. run_total_us()148 const Stat<int64_t>& run_total_us() const { return run_total_us_; } 149 UpdateRunTotalUs(int64_t run_total_us)150 void UpdateRunTotalUs(int64_t run_total_us) { 151 run_total_us_.UpdateStat(run_total_us); 152 } 153 UpdateMemoryUsed(int64_t memory)154 void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); } 155 156 struct Detail { 157 std::string name; 158 std::string type; 159 int64_t run_order; 160 Stat<int64_t> start_us; 161 Stat<int64_t> rel_end_us; 162 Stat<int64_t> mem_used; 163 int64_t times_called; 164 }; 165 GetDetails()166 const std::map<std::string, Detail>& GetDetails() const { return details_; } 167 168 void AddNodeStats(const std::string& name, const std::string& type, 169 int64_t run_order, int64_t start_us, int64_t rel_end_us, 170 int64_t mem_used); 171 172 private: 173 void OrderNodesByMetric(SortingMetric sorting_metric, 174 std::vector<const Detail*>* details) const; 175 176 std::string HeaderString(const std::string& title) const; 177 std::string ColumnString(const Detail& detail, 178 const int64_t cumulative_stat_on_node, 179 const Stat<int64_t>& stat) const; 180 181 Stat<int64_t> run_total_us_; 182 Stat<int64_t> memory_; 183 184 std::map<std::string, Detail> details_; 185 StatSummarizerOptions options_; 186 }; 187 188 } // namespace tensorflow 189 190 #endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ 191