1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ 17 #define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ 18 19 #include <stdlib.h> 20 21 #include <algorithm> 22 #include <cmath> 23 #include <limits> 24 #include <map> 25 #include <sstream> 26 #include <string> 27 #include <vector> 28 29 #include "tensorflow/core/util/stat_summarizer_options.h" 30 31 namespace tensorflow { 32 33 template <typename ValueType, typename HighPrecisionValueType = double> 34 class Stat { 35 public: UpdateStat(ValueType v)36 void UpdateStat(ValueType v) { 37 if (count_ == 0) { 38 first_ = v; 39 } 40 41 newest_ = v; 42 max_ = std::max(v, max_); 43 min_ = std::min(v, min_); 44 ++count_; 45 sum_ += v; 46 squared_sum_ += static_cast<HighPrecisionValueType>(v) * v; 47 } 48 Reset()49 void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); } 50 empty()51 bool empty() const { return count_ == 0; } 52 first()53 ValueType first() const { return first_; } 54 newest()55 ValueType newest() const { return newest_; } 56 max()57 ValueType max() const { return max_; } 58 min()59 ValueType min() const { return min_; } 60 count()61 int64_t count() const { return count_; } 62 sum()63 ValueType sum() const { return sum_; } 64 squared_sum()65 HighPrecisionValueType squared_sum() const { return squared_sum_; } 66 all_same()67 bool all_same() const { return (count_ == 0 || min_ == max_); } 68 avg()69 HighPrecisionValueType avg() const { 70 return empty() ? std::numeric_limits<ValueType>::quiet_NaN() 71 : static_cast<HighPrecisionValueType>(sum_) / count_; 72 } 73 74 // Returns sample variance. sample_variance()75 ValueType sample_variance() const { 76 return all_same() 77 ? 0 78 : (squared_sum_ - std::pow(sum_, 2.0) / count_) / (count_ - 1); 79 } 80 81 // Returns population variance. variance()82 ValueType variance() const { 83 return all_same() ? 0 : (squared_sum_ / count_) - (avg() * avg()); 84 } 85 86 // Returns population stddev. std_deviation()87 ValueType std_deviation() const { 88 return all_same() ? 0 : std::sqrt(variance()); 89 } 90 OutputToStream(std::ostream * stream)91 void OutputToStream(std::ostream* stream) const { 92 if (empty()) { 93 *stream << "count=0"; 94 } else if (all_same()) { 95 *stream << "count=" << count_ << " curr=" << newest_; 96 if (count_ > 1) *stream << "(all same)"; 97 } else { 98 *stream << "count=" << count_ << " first=" << first_ 99 << " curr=" << newest_ << " min=" << min_ << " max=" << max_ 100 << " avg=" << avg() << " std=" << std_deviation(); 101 } 102 } 103 104 friend std::ostream& operator<<(std::ostream& stream, 105 const Stat<ValueType>& stat) { 106 stat.OutputToStream(&stream); 107 return stream; 108 } 109 110 private: 111 ValueType first_ = 0; 112 ValueType newest_ = 0; 113 ValueType max_ = std::numeric_limits<ValueType>::min(); 114 ValueType min_ = std::numeric_limits<ValueType>::max(); 115 int64_t count_ = 0; 116 ValueType sum_ = 0; 117 HighPrecisionValueType squared_sum_ = 0; 118 }; 119 120 // A StatsCalculator assists in performance analysis of Graph executions. 121 // 122 // It summarizes time spent executing (on GPU/CPU), memory used etc for 123 // graph execution. 124 // 125 // For example usage see StatsSummarizer. 126 class StatsCalculator { 127 public: 128 enum SortingMetric { 129 BY_NAME, 130 BY_RUN_ORDER, 131 BY_TIME, 132 BY_MEMORY, 133 BY_TYPE, 134 }; 135 136 explicit StatsCalculator(const StatSummarizerOptions& options); 137 138 // Returns a string detailing the accumulated runtime stats in a tab-separated 139 // format which can be pasted into a spreadsheet for further analysis. 140 std::string GetOutputString() const; 141 142 std::string GetShortSummary() const; 143 144 void ComputeStatsByType( 145 std::map<std::string, int64_t>* node_type_map_count, 146 std::map<std::string, int64_t>* node_type_map_time, 147 std::map<std::string, int64_t>* node_type_map_memory, 148 std::map<std::string, int64_t>* node_type_map_times_called, 149 int64_t* accumulated_us) const; 150 151 std::string GetStatsByNodeType() const; 152 153 std::string GetStatsByMetric(const std::string& title, 154 SortingMetric sorting_metric, 155 int num_stats) const; 156 157 // Returns number of runs. num_runs()158 int num_runs() const { return static_cast<int>(run_total_us_.count()); } 159 160 // Returns stats of total microseconds spent by all nodes in each run. run_total_us()161 const Stat<int64_t>& run_total_us() const { return run_total_us_; } 162 UpdateRunTotalUs(int64_t run_total_us)163 void UpdateRunTotalUs(int64_t run_total_us) { 164 run_total_us_.UpdateStat(run_total_us); 165 } 166 UpdateMemoryUsed(int64_t memory)167 void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); } 168 169 struct Detail { 170 std::string name; 171 std::string type; 172 int64_t run_order; 173 Stat<int64_t> start_us; 174 Stat<int64_t> rel_end_us; 175 Stat<int64_t> mem_used; 176 int64_t times_called; 177 }; 178 GetDetails()179 const std::map<std::string, Detail>& GetDetails() const { return details_; } 180 181 void AddNodeStats(const std::string& name, const std::string& type, 182 int64_t run_order, int64_t start_us, int64_t rel_end_us, 183 int64_t mem_used); 184 185 private: 186 void OrderNodesByMetric(SortingMetric sorting_metric, 187 std::vector<const Detail*>* details) const; 188 189 std::string HeaderString(const std::string& title) const; 190 std::string ColumnString(const Detail& detail, 191 const int64_t cumulative_stat_on_node, 192 const Stat<int64_t>& stat) const; 193 194 Stat<int64_t> run_total_us_; 195 Stat<int64_t> memory_; 196 197 std::map<std::string, Detail> details_; 198 StatSummarizerOptions options_; 199 }; 200 201 } // namespace tensorflow 202 203 #endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ 204