1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ 17 #define TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ 18 19 #include <unordered_map> 20 #include <vector> 21 22 #include "tensorflow/core/framework/cost_graph.pb.h" 23 #include "tensorflow/core/framework/step_stats.pb.h" 24 #include "tensorflow/core/framework/tensor_shape.pb.h" 25 #include "tensorflow/core/graph/graph.h" 26 #include "tensorflow/core/graph/types.h" 27 #include "tensorflow/core/lib/core/stringpiece.h" 28 #include "tensorflow/core/lib/gtl/array_slice.h" 29 #include "tensorflow/core/platform/macros.h" 30 #include "tensorflow/core/platform/protobuf.h" 31 32 namespace tensorflow { 33 typedef std::unordered_map<StringPiece, int32, StringPieceHasher> 34 NodeNameToCostIdMap; 35 36 class StepStats; 37 38 // CostModel keeps track of the following runtime statistics for nodes 39 // of a single Graph: 40 // * The total number of times a node has executed. 41 // * The accumulated execution time (in microseconds) of a node. 42 // * The accumulated size (in bytes) of each node's output. 43 // 44 // This class is NOT thread-safe. 45 class CostModel { 46 public: 47 // If "global" is true, maintains costs based on Node::cost_id, otherwise 48 // maintains costs based on Node::id. CostModel(bool is_global)49 explicit CostModel(bool is_global) : is_global_(is_global) { 50 unknown_shape_.set_unknown_rank(true); 51 } 52 53 // Assigns min_count_ as a function of the median count for a Node. 54 // This value is then used for suppressing the time/size costs of 55 // infrequent operations. 56 // NOTE(tucker): Maybe this should move to a subclass of CostModel. 57 void SuppressInfrequent(); 58 is_global()59 bool is_global() const { return is_global_; } 60 Id(const Node * n)61 inline int Id(const Node* n) const { 62 if (is_global_) { 63 return n->cost_id(); 64 } else { 65 return n->id(); 66 } 67 } 68 GlobalId(const Node * n,int offset)69 inline int GlobalId(const Node* n, int offset) const { 70 if (is_global_) { 71 return n->cost_id(); 72 } else { 73 return n->id() + offset; 74 } 75 } 76 77 // Initializes cost model for 'g'. 78 void InitFromGraph(const Graph& g); 79 80 // Merges costs from cm. 81 // REQUIRES: is_global_ is true for this and for "cm" 82 void MergeFromGlobal(const CostModel& cm); 83 84 // Merges costs from "cm", which has been computed relative to "g". 85 // REQUIRES: is_global_ is true for this, and false for "cm". 86 void MergeFromLocal(const Graph& g, const CostModel& cm); 87 88 void MergeFromStats(const NodeNameToCostIdMap& map, const StepStats& ss); 89 90 // Sets the number of outputs of "node". 91 void SetNumOutputs(const Node* node, int num_outputs); 92 93 // Records that "node" has executed "num_count" more times. 94 void RecordCount(const Node* node, int num_count); 95 96 // Returns how many times "node" has been executed. 97 int32 TotalCount(const Node* node) const; 98 99 // Records that "output_slot" of "node" has produced tensors of 100 // aggregated "bytes". 101 void RecordSize(const Node* node, int output_slot, Bytes bytes); 102 103 // Returns total bytes of tensors produced by "node"s output slot. 104 Bytes TotalBytes(const Node* node, int output_slot) const; 105 106 // Returns a prediction for the size of the tensor at the 107 // output_slot produced by one execution of "node". 108 Bytes SizeEstimate(const Node* node, int output_slot) const; 109 110 // Records that Executions of "node" have taken "time" microseconds. 111 void RecordTime(const Node* node, Microseconds time); 112 113 // Returns the total execution time for "node". 114 Microseconds TotalTime(const Node* node) const; 115 116 // Returns a prediction for one execution of "node". 117 Microseconds TimeEstimate(const Node* node) const; 118 119 // Check that an estimate is available for every OP node in graph. 120 void CheckInitialized(const Graph& graph) const; 121 122 // Records the maximum size in bytes and optionally the corresponding shape of 123 // the tensor generated by "output_slot" of "node". If 124 void RecordMaxMemorySize(const Node* node, int output_slot, Bytes bytes, 125 const TensorShapeProto& tensor_shape, 126 const DataType& dtype); 127 128 // Returns the maximum size in bytes of the tensor generated by "output_slot" 129 // of "node". 130 Bytes MaxMemorySize(const Node* node, int output_slot) const; 131 132 // Returns the shape corresponding to the largest memory size of the tensor 133 // generated by "output_slot" of "node". 134 const TensorShapeProto& MaxMemoryShape(const Node* node, 135 int output_slot) const; 136 137 // Returns the shape corresponding to the largest memory size of the tensor 138 // generated by "output_slot" of "node". 139 DataType MaxMemoryType(const Node* node, int output_slot) const; 140 141 // Returns the size in bytes of temporary memory consumed by "node". 142 Bytes TempMemorySize(const Node* node) const; 143 144 // Returns the size of persistent memory allocated by "node". 145 Bytes PersistentMemorySize(const Node* node) const; 146 147 // Records memory stats such as temp momory and persistent memory. 148 void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats); 149 150 // Records the maximum execution time (in microseconds) of "node". 151 void RecordMaxExecutionTime(const Node* node, Microseconds time); 152 153 // Returns the maximum execution time (in microseconds) of "node". 154 Microseconds MaxExecutionTime(const Node* node) const; 155 156 // Record the unique id of the tensor generated by "output_slot" of "node". 157 // Any other tensor sharing the same id will be an alias, i.e. it will share 158 // the same underlying memory storage area. 159 void RecordAllocationId(const Node* node, int output_slot, int64 alloc_id); 160 161 // Return the unique id of the tensor generated by "output_slot" of "node". 162 int64 AllocationId(const Node* node, int output_slot) const; 163 164 bool IsPersistentTensor(const Node* node, int64 alloc_id) const; 165 166 // Helper routines to encapsulate static estimation heuristics 167 168 // Compute an estimate of the time to copy "b" bytes over the network, 169 // given a fixed cost of "network_latency_millis" milliseconds and 170 // an estimated bandwidth of "estimated_gbps" gigabits per second (note that 171 // this value is in gigabits, not gigabytes). 172 static Microseconds CopyTimeEstimate(Bytes b, double network_latency_millis, 173 double estimated_gbps); 174 static Microseconds ComputationTimeEstimate(int64 mathops); 175 176 // Add this CostModel into the CostGraphDef. 177 void AddToCostGraphDef(const Graph* graph, CostGraphDef* cost_graph) const; 178 179 // Write the contents of the CostModel to the INFO log. 180 void WriteSummaryToLog() const; 181 182 // Increment the times that the cost model is updated. 183 void IncrementUpdateTimes(); 184 185 // Get the times that the cost model is updated. 186 int32 GetUpdateTimes() const; 187 188 private: 189 static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape, 190 const DataType& dtype); 191 192 const bool is_global_; 193 194 // Resizes vectors so that they are large enough for "id" and id's outputs. 195 void Ensure(int id, int num_outputs); 196 197 // Nodes and Edges whose count is < this value 198 // get type/byte estimates of 0. 199 int32 min_count_ = 0; 200 201 // The number of times the cost model is updated. 202 int32 update_times_ = 0; 203 204 // Number of times each Node has been executed. 205 std::vector<int32> count_; 206 // Cumulative execution time. 207 std::vector<Microseconds> time_; 208 // Cumulative Bytes output on each channel. 209 std::vector<gtl::InlinedVector<Bytes, 2>> slot_bytes_; 210 211 // Maximum execution time 212 std::vector<Microseconds> max_exec_time_; 213 214 // Maximum memory usage 215 struct MemUsage { MemUsageMemUsage216 MemUsage() : temp_memory_size(0), persistent_memory_size(0) {} 217 218 // TODO(yuefengz): temp_memory_size is not being used, remove it. 219 Bytes temp_memory_size; 220 Bytes persistent_memory_size; 221 222 gtl::InlinedVector<Bytes, 2> output_port_mem; 223 gtl::InlinedVector<TensorShapeProto, 2> output_port_shape; 224 gtl::InlinedVector<DataType, 2> output_port_type; 225 }; 226 std::vector<MemUsage> max_mem_usage_; 227 228 std::vector<gtl::InlinedVector<int64, 2>> output_port_alloc_ids_; 229 230 std::set<int64> persistent_alloc_ids_; 231 std::map<string, std::set<int64>> persistent_alloc_ids_by_devices_; 232 233 TensorShapeProto unknown_shape_; 234 235 TF_DISALLOW_COPY_AND_ASSIGN(CostModel); 236 }; 237 238 } // namespace tensorflow 239 240 #endif // TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ 241