1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_GRAPPLER_COSTS_OP_LEVEL_COST_ESTIMATOR_H_ 17 #define TENSORFLOW_CORE_GRAPPLER_COSTS_OP_LEVEL_COST_ESTIMATOR_H_ 18 19 #include "tensorflow/core/grappler/costs/cost_estimator.h" 20 #include "tensorflow/core/grappler/costs/op_context.h" 21 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h" 22 #include "tensorflow/core/util/padding.h" 23 24 namespace tensorflow { 25 namespace grappler { 26 27 bool GetTensorShapeProtoFromTensorProto(const TensorProto& tensor_proto, 28 TensorShapeProto* tensor_shape_proto); 29 TensorShapeProto MaybeGetMinimumShape(const TensorShapeProto& original_shape, 30 int rank, bool* found_unknown_shapes); 31 32 class OpLevelCostEstimator { 33 public: 34 OpLevelCostEstimator(); ~OpLevelCostEstimator()35 virtual ~OpLevelCostEstimator() {} 36 37 virtual Costs PredictCosts(const OpContext& op_context) const; 38 39 // Returns basic device performance info. 40 virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const; 41 42 protected: 43 // Predict cost of an op for which no accurate estimator is defined. 44 Costs PredictCostOfAnUnknownOp(const OpContext& op_context) const; 45 46 // Naive cost estimate based on the given operations count and total 47 // input/output tensor sizes of the given op_info combined. 48 Costs PredictOpCountBasedCost(double operations, const OpInfo& op_info) const; 49 50 // Naive cost estimate based on the given operations count and the given total 51 // io size in bytes. Sizes of op_info inputs and outputs are not taken into 52 // consideration. 53 Costs PredictOpCountBasedCost(double operations, double input_io_bytes, 54 double output_io_bytes, 55 const OpInfo& op_info) const; 56 57 // This family of routines counts the number of operations to perform the 58 // specified TensorFlow Op. 59 struct MatMulDimensions { 60 int m; 61 int n; 62 int k; 63 }; 64 struct ConvolutionDimensions { 65 int64 batch; // Batch size. 66 int64 ix; // Input size x. 67 int64 iy; // Input size y. 68 int64 iz; // Input depth. 69 int64 kx; // Kernel x. 70 int64 ky; // Kernel y. 71 int64 oz; // Output depth. 72 int64 ox; // Output size x. 73 int64 oy; // Output size y. 74 int64 sx; // Stride x. 75 int64 sy; // Stride y. 76 Padding padding; // SAME or VALID. 77 }; 78 int64 CountConv2DOperations(const OpInfo& op_info, 79 bool* found_unknown_shapes) const; 80 int64 CountConv2DOperations(const OpInfo& op_info, 81 ConvolutionDimensions* conv_info, 82 bool* found_unknown_shapes) const; 83 int64 CountMatMulOperations(const OpInfo& op_info, 84 bool* found_unknown_shapes) const; 85 int64 CountMatMulOperations(const OpInfo& op_info, MatMulDimensions* mat_mul, 86 bool* found_unknown_shapes) const; 87 int64 CountBatchMatMulOperations(const OpInfo& op_info, 88 bool* found_unknown_shapes) const; 89 int64 CountConv2DBackpropInputOperations( 90 const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims, 91 bool* found_unknown_shapes) const; 92 int64 CountConv2DBackpropFilterOperations( 93 const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims, 94 bool* found_unknown_shapes) const; 95 96 // Calculate the element count of an input/output tensor. 97 int64 CalculateTensorElementCount(const OpInfo::TensorProperties& tensor, 98 bool* found_unknown_shapes) const; 99 100 // Calculate the total size in bytes of an input/output tensor. 101 int64 CalculateTensorSize(const OpInfo::TensorProperties& tensor, 102 bool* found_unknown_shapes) const; 103 104 // Calculate the element count of the largest 105 // input of specified TensorFlow op. 106 int64 CalculateLargestInputCount(const OpInfo& op_info, 107 bool* found_unknown_shapes) const; 108 109 // Calculate the total size in bytes of the all 110 // the inputs of specified TensorFlow op. 111 int64 CalculateInputSize(const OpInfo& op_info, 112 bool* found_unknown_shapes) const; 113 114 // Calculate the total size in bytes of the all 115 // the outputs of specified TensorFlow op. 116 int64 CalculateOutputSize(const OpInfo& op_info, 117 bool* found_unknown_shapes) const; 118 119 // This family of routines predicts the costs to 120 // perform the specified TensorFlow Op on the 121 // device represented by a subclass. The default 122 // implementation just divides the operations to 123 // perform the op (from the "Count" routines, 124 // above) by the device peak operations per 125 // second. 126 // Implementation of costs other than 127 // execution_time is optional, depending on the 128 // device. 129 Costs PredictConv2D(const OpContext& op_context) const; 130 Costs PredictCwiseOp(const OpContext& op_context) const; 131 Costs PredictConv2DBackpropInput(const OpContext& op_context) const; 132 Costs PredictConv2DBackpropFilter(const OpContext& op_context) const; 133 Costs PredictFusedConv2DBiasActivation(const OpContext& op_context) const; 134 Costs PredictMatMul(const OpContext& op_context) const; 135 Costs PredictSparseTensorDenseMatMul(const OpContext& op_context) const; 136 Costs PredictNoOp(const OpContext& op_context) const; 137 Costs PredictIdentity(const OpContext& op_context) const; 138 Costs PredictVariable(const OpContext& op_context) const; 139 Costs PredictBatchMatMul(const OpContext& op_context) const; 140 Costs PredictMetadata(const OpContext& op_context) const; 141 Costs PredictGatherOrSlice(const OpContext& op_context) const; 142 Costs PredictMaxPool(const OpContext& op_context) const; 143 Costs PredictMaxPoolGrad(const OpContext& op_context) const; 144 Costs PredictAvgPool(const OpContext& op_context) const; 145 Costs PredictAvgPoolGrad(const OpContext& op_context) const; 146 Costs PredictFusedBatchNorm(const OpContext& op_context) const; 147 Costs PredictFusedBatchNormGrad(const OpContext& op_context) const; 148 149 // Generic cost prediction method for fused operations. 150 Costs PredictFusedOp(const OpContext& op_context, 151 const std::vector<OpContext>& fused_op_contexts) const; 152 153 // Utility function for safe division. Returns 0 154 // if rhs is 0 or negative. SafeDiv(const double lhs,const double rhs)155 static double SafeDiv(const double lhs, const double rhs) { 156 if (rhs > 0) { 157 return lhs / rhs; 158 } else { 159 return 0.0; 160 } 161 } 162 163 // For convolution and its grad ops. 164 static ConvolutionDimensions ConvolutionDimensionsFromInputs( 165 const TensorShapeProto& original_image_shape, 166 const TensorShapeProto& original_filter_shape, const OpInfo& op_info, 167 bool* found_unknown_shapes); 168 169 // For Pooling, FusedBatchNorm, and their grad ops. 170 static ConvolutionDimensions OpDimensionsFromInputs( 171 const TensorShapeProto& original_image_shape, const OpInfo& op_info, 172 bool* found_unknown_shapes); 173 174 // Helper to construct child operation contexts for the component operations 175 // of fused ops. 176 static OpContext FusedChildContext( 177 const OpContext& parent, const string& op_name, 178 const OpInfo::TensorProperties& output, 179 const std::vector<OpInfo::TensorProperties>& inputs); 180 181 // Helper to construct tensor shapes. 182 static OpInfo::TensorProperties DescribeTensor( 183 DataType type, const std::vector<int64>& dims); 184 185 // This method calculates the execution time depending on whether IO can 186 // overlap with computation. It assumes the memory and the compute times have 187 // already been calculated. 188 void CombineCostsAndUpdateExecutionTime(Costs* costs) const; 189 190 protected: 191 std::map<string, int> elementwise_ops_; 192 typedef std::function<Costs(const OpContext& op_context)> CostImpl; 193 std::map<string, CostImpl> device_cost_impl_; 194 // If true, assume compute and memory overlap; hence, the op cost is max of 195 // compute_time and memory_time, insteaf of sum of those two. 196 bool compute_memory_overlap_; 197 std::set<string> persistent_ops_; 198 199 private: 200 friend class OpLevelCostEstimatorTest; 201 }; 202 203 } // end namespace grappler 204 } // end namespace tensorflow 205 206 #endif // TENSORFLOW_CORE_GRAPPLER_COSTS_OP_LEVEL_COST_ESTIMATOR_H_ 207