1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ 17 #define TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ 18 19 #include <unordered_map> 20 #include <vector> 21 22 #include "tensorflow/core/framework/cost_graph.pb.h" 23 #include "tensorflow/core/framework/step_stats.pb.h" 24 #include "tensorflow/core/framework/tensor_shape.pb.h" 25 #include "tensorflow/core/graph/graph.h" 26 #include "tensorflow/core/graph/types.h" 27 #include "tensorflow/core/lib/core/stringpiece.h" 28 #include "tensorflow/core/lib/gtl/array_slice.h" 29 #include "tensorflow/core/platform/macros.h" 30 #include "tensorflow/core/platform/protobuf.h" 31 32 namespace tensorflow { 33 typedef std::unordered_map<StringPiece, int32, StringPieceHasher> 34 NodeNameToCostIdMap; 35 36 class StepStats; 37 38 // CostModel keeps track of the following runtime statistics for nodes 39 // of a single Graph: 40 // * The total number of times a node has executed. 41 // * The accumulated execution time (in microseconds) of a node. 42 // * The accumulated size (in bytes) of each node's output. 43 // 44 // This class is NOT thread-safe. 45 class CostModel { 46 public: 47 // If "global" is true, maintains costs based on Node::cost_id, otherwise 48 // maintains costs based on Node::id. CostModel(bool is_global)49 explicit CostModel(bool is_global) : is_global_(is_global) { 50 unknown_shape_.set_unknown_rank(true); 51 } 52 53 // Assigns min_count_ as a function of the median count for a Node. 54 // This value is then used for suppressing the time/size costs of 55 // infrequent operations. 56 // NOTE(tucker): Maybe this should move to a subclass of CostModel. 57 void SuppressInfrequent(); 58 is_global()59 bool is_global() const { return is_global_; } 60 Id(const Node * n)61 inline int Id(const Node* n) const { 62 if (is_global_) { 63 return n->cost_id(); 64 } else { 65 return n->id(); 66 } 67 } 68 69 // Initializes cost model for 'g'. 70 void InitFromGraph(const Graph& g); 71 72 // Merges costs from cm. 73 // REQUIRES: is_global_ is true for this and for "cm" 74 void MergeFromGlobal(const CostModel& cm); 75 76 // Merges costs from "cm", which has been computed relative to "g". 77 // REQUIRES: is_global_ is true for this, and false for "cm". 78 void MergeFromLocal(const Graph& g, const CostModel& cm); 79 80 void MergeFromStats(const NodeNameToCostIdMap& map, const StepStats& ss); 81 82 // Sets the number of outputs of "node". 83 void SetNumOutputs(const Node* node, int num_outputs); 84 85 // Records that "node" has executed "num_count" more times. 86 void RecordCount(const Node* node, int num_count); 87 88 // Returns how many times "node" has been executed. 89 int32 TotalCount(const Node* node) const; 90 91 // Records that "output_slot" of "node" has produced tensors of 92 // aggregated "bytes". 93 void RecordSize(const Node* node, int output_slot, Bytes bytes); 94 95 // Returns total bytes of tensors produced by "node"s output slot. 96 Bytes TotalBytes(const Node* node, int output_slot) const; 97 98 // Returns a prediction for the size of the tensor at the 99 // output_slot produced by one execution of "node". 100 Bytes SizeEstimate(const Node* node, int output_slot) const; 101 102 // Records that Executions of "node" have taken "time" microseconds. 103 void RecordTime(const Node* node, Microseconds time); 104 105 // Returns the total execution time for "node". 106 Microseconds TotalTime(const Node* node) const; 107 108 // Returns a prediction for one execution of "node". 109 Microseconds TimeEstimate(const Node* node) const; 110 111 // Check that an estimate is available for every OP node in graph. 112 void CheckInitialized(const Graph& graph) const; 113 114 // Records the maximum size in bytes and optionally the corresponding shape of 115 // the tensor generated by "output_slot" of "node". If 116 void RecordMaxMemorySize(const Node* node, int output_slot, Bytes bytes, 117 const TensorShapeProto& tensor_shape, 118 const DataType& dtype); 119 120 // Returns the maximum size in bytes of the tensor generated by "output_slot" 121 // of "node". 122 Bytes MaxMemorySize(const Node* node, int output_slot) const; 123 124 // Returns the shape corresponding to the largest memory size of the tensor 125 // generated by "output_slot" of "node". 126 const TensorShapeProto& MaxMemoryShape(const Node* node, 127 int output_slot) const; 128 129 // Returns the shape corresponding to the largest memory size of the tensor 130 // generated by "output_slot" of "node". 131 DataType MaxMemoryType(const Node* node, int output_slot) const; 132 133 // Returns the size in bytes of temporary memory consumed by "node". 134 Bytes TempMemorySize(const Node* node) const; 135 136 // Returns the size of persistent memory allocated by "node". 137 Bytes PersistentMemorySize(const Node* node) const; 138 139 // Records memory stats such as temp momory and persistent memory. 140 void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats); 141 142 // Records the maximum execution time (in microseconds) of "node". 143 void RecordMaxExecutionTime(const Node* node, Microseconds time); 144 145 // Returns the maximum execution time (in microseconds) of "node". 146 Microseconds MaxExecutionTime(const Node* node) const; 147 148 // Record the unique id of the tensor generated by "output_slot" of "node". 149 // Any other tensor sharing the same id will be an alias, i.e. it will share 150 // the same underlying memory storage area. 151 void RecordAllocationId(const Node* node, int output_slot, int64 alloc_id); 152 153 // Return the unique id of the tensor generated by "output_slot" of "node". 154 int64 AllocationId(const Node* node, int output_slot) const; 155 156 bool IsPersistentTensor(const Node* node, int64 alloc_id) const; 157 158 // Helper routines to encapsulate static estimation heuristics 159 160 // Compute an estimate of the time to copy "b" bytes over the network, 161 // given a fixed cost of "network_latency_millis" milliseconds and 162 // an estimated bandwidth of "estimated_gbps" gigabits per second (note that 163 // this value is in gigabits, not gigabytes). 164 static Microseconds CopyTimeEstimate(Bytes b, double network_latency_millis, 165 double estimated_gbps); 166 static Microseconds ComputationTimeEstimate(int64 mathops); 167 168 // Add this CostModel into the CostGraphDef. 169 void AddToCostGraphDef(const Graph* graph, CostGraphDef* cost_graph) const; 170 171 // Write the contents of the CostModel to the INFO log. 172 void WriteSummaryToLog() const; 173 174 // Increment the times that the cost model is updated. 175 void IncrementUpdateTimes(); 176 177 // Get the times that the cost model is updated. 178 int32 GetUpdateTimes() const; 179 180 private: 181 static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape, 182 const DataType& dtype); 183 184 const bool is_global_; 185 186 // Resizes vectors so that they are large enough for "id" and id's outputs. 187 void Ensure(int id, int num_outputs); 188 189 // Nodes and Edges whose count is < this value 190 // get type/byte estimates of 0. 191 int32 min_count_ = 0; 192 193 // The number of times the cost model is updated. 194 int32 update_times_ = 0; 195 196 // Number of times each Node has been executed. 197 std::vector<int32> count_; 198 // Cumulative execution time. 199 std::vector<Microseconds> time_; 200 // Cumulative Bytes output on each channel. 201 std::vector<gtl::InlinedVector<Bytes, 2>> slot_bytes_; 202 203 // Maximum execution time 204 std::vector<Microseconds> max_exec_time_; 205 206 // Maximum memory usage 207 struct MemUsage { MemUsageMemUsage208 MemUsage() : temp_memory_size(0), persistent_memory_size(0) {} 209 210 // TODO(yuefengz): temp_memory_size is not being used, remove it. 211 Bytes temp_memory_size; 212 Bytes persistent_memory_size; 213 214 gtl::InlinedVector<Bytes, 2> output_port_mem; 215 gtl::InlinedVector<TensorShapeProto, 2> output_port_shape; 216 gtl::InlinedVector<DataType, 2> output_port_type; 217 }; 218 std::vector<MemUsage> max_mem_usage_; 219 220 std::vector<gtl::InlinedVector<int64, 2>> output_port_alloc_ids_; 221 222 std::set<int64> persistent_alloc_ids_; 223 std::map<string, std::set<int64>> persistent_alloc_ids_by_devices_; 224 225 TensorShapeProto unknown_shape_; 226 227 TF_DISALLOW_COPY_AND_ASSIGN(CostModel); 228 }; 229 230 } // namespace tensorflow 231 232 #endif // TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ 233