• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_GRAPPLER_COSTS_OP_LEVEL_COST_ESTIMATOR_H_
17 #define TENSORFLOW_CORE_GRAPPLER_COSTS_OP_LEVEL_COST_ESTIMATOR_H_
18 
19 #include "tensorflow/core/grappler/costs/cost_estimator.h"
20 #include "tensorflow/core/grappler/costs/op_context.h"
21 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
22 #include "tensorflow/core/util/padding.h"
23 
24 namespace tensorflow {
25 namespace grappler {
26 
27 bool GetTensorShapeProtoFromTensorProto(const TensorProto& tensor_proto,
28                                         TensorShapeProto* tensor_shape_proto);
29 TensorShapeProto MaybeGetMinimumShape(const TensorShapeProto& original_shape,
30                                       int rank, bool* found_unknown_shapes);
31 
32 class OpLevelCostEstimator {
33  public:
34   OpLevelCostEstimator();
~OpLevelCostEstimator()35   virtual ~OpLevelCostEstimator() {}
36 
37   virtual Costs PredictCosts(const OpContext& op_context) const;
38 
39   // Returns basic device performance info.
40   virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const;
41 
42  protected:
43   // Predict cost of an op for which no accurate estimator is defined.
44   Costs PredictCostOfAnUnknownOp(const OpContext& op_context) const;
45 
46   // Naive cost estimate based on the given operations count and total
47   // input/output tensor sizes of the given op_info combined.
48   Costs PredictOpCountBasedCost(double operations, const OpInfo& op_info) const;
49 
50   // Naive cost estimate based on the given operations count and the given total
51   // io size in bytes. Sizes of op_info inputs and outputs are not taken into
52   // consideration.
53   Costs PredictOpCountBasedCost(double operations, double input_io_bytes,
54                                 double output_io_bytes,
55                                 const OpInfo& op_info) const;
56 
57   // This family of routines counts the number of operations to perform the
58   // specified TensorFlow Op.
59   struct MatMulDimensions {
60     int m;
61     int n;
62     int k;
63   };
64   struct ConvolutionDimensions {
65     int64 batch;      // Batch size.
66     int64 ix;         // Input size x.
67     int64 iy;         // Input size y.
68     int64 iz;         // Input depth.
69     int64 kx;         // Kernel x.
70     int64 ky;         // Kernel y.
71     int64 oz;         // Output depth.
72     int64 ox;         // Output size x.
73     int64 oy;         // Output size y.
74     int64 sx;         // Stride x.
75     int64 sy;         // Stride y.
76     Padding padding;  // SAME or VALID.
77   };
78   int64 CountConv2DOperations(const OpInfo& op_info,
79                               bool* found_unknown_shapes) const;
80   int64 CountConv2DOperations(const OpInfo& op_info,
81                               ConvolutionDimensions* conv_info,
82                               bool* found_unknown_shapes) const;
83   int64 CountMatMulOperations(const OpInfo& op_info,
84                               bool* found_unknown_shapes) const;
85   int64 CountMatMulOperations(const OpInfo& op_info, MatMulDimensions* mat_mul,
86                               bool* found_unknown_shapes) const;
87   int64 CountBatchMatMulOperations(const OpInfo& op_info,
88                                    bool* found_unknown_shapes) const;
89   int64 CountConv2DBackpropInputOperations(
90       const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
91       bool* found_unknown_shapes) const;
92   int64 CountConv2DBackpropFilterOperations(
93       const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
94       bool* found_unknown_shapes) const;
95 
96   // Calculate the element count of an input/output tensor.
97   int64 CalculateTensorElementCount(const OpInfo::TensorProperties& tensor,
98                                     bool* found_unknown_shapes) const;
99 
100   // Calculate the total size in bytes of an input/output tensor.
101   int64 CalculateTensorSize(const OpInfo::TensorProperties& tensor,
102                             bool* found_unknown_shapes) const;
103 
104   // Calculate the element count of the largest
105   // input of specified TensorFlow op.
106   int64 CalculateLargestInputCount(const OpInfo& op_info,
107                                    bool* found_unknown_shapes) const;
108 
109   // Calculate the total size in bytes of the all
110   // the inputs of specified TensorFlow op.
111   int64 CalculateInputSize(const OpInfo& op_info,
112                            bool* found_unknown_shapes) const;
113 
114   // Calculate the total size in bytes of the all
115   // the outputs of specified TensorFlow op.
116   int64 CalculateOutputSize(const OpInfo& op_info,
117                             bool* found_unknown_shapes) const;
118 
119   // This family of routines predicts the costs to
120   // perform the specified TensorFlow Op on the
121   // device represented by a subclass. The default
122   // implementation just divides the operations to
123   // perform the op (from the "Count" routines,
124   // above) by the device peak operations per
125   // second.
126   // Implementation of costs other than
127   // execution_time is optional, depending on the
128   // device.
129   Costs PredictConv2D(const OpContext& op_context) const;
130   Costs PredictCwiseOp(const OpContext& op_context) const;
131   Costs PredictConv2DBackpropInput(const OpContext& op_context) const;
132   Costs PredictConv2DBackpropFilter(const OpContext& op_context) const;
133   Costs PredictFusedConv2DBiasActivation(const OpContext& op_context) const;
134   Costs PredictMatMul(const OpContext& op_context) const;
135   Costs PredictSparseTensorDenseMatMul(const OpContext& op_context) const;
136   Costs PredictNoOp(const OpContext& op_context) const;
137   Costs PredictIdentity(const OpContext& op_context) const;
138   Costs PredictVariable(const OpContext& op_context) const;
139   Costs PredictBatchMatMul(const OpContext& op_context) const;
140   Costs PredictMetadata(const OpContext& op_context) const;
141   Costs PredictGatherOrSlice(const OpContext& op_context) const;
142   Costs PredictMaxPool(const OpContext& op_context) const;
143   Costs PredictMaxPoolGrad(const OpContext& op_context) const;
144   Costs PredictAvgPool(const OpContext& op_context) const;
145   Costs PredictAvgPoolGrad(const OpContext& op_context) const;
146   Costs PredictFusedBatchNorm(const OpContext& op_context) const;
147   Costs PredictFusedBatchNormGrad(const OpContext& op_context) const;
148 
149   // Generic cost prediction method for fused operations.
150   Costs PredictFusedOp(const OpContext& op_context,
151                        const std::vector<OpContext>& fused_op_contexts) const;
152 
153   // Utility function for safe division. Returns 0
154   // if rhs is 0 or negative.
SafeDiv(const double lhs,const double rhs)155   static double SafeDiv(const double lhs, const double rhs) {
156     if (rhs > 0) {
157       return lhs / rhs;
158     } else {
159       return 0.0;
160     }
161   }
162 
163   // For convolution and its grad ops.
164   static ConvolutionDimensions ConvolutionDimensionsFromInputs(
165       const TensorShapeProto& original_image_shape,
166       const TensorShapeProto& original_filter_shape, const OpInfo& op_info,
167       bool* found_unknown_shapes);
168 
169   // For Pooling, FusedBatchNorm, and their grad ops.
170   static ConvolutionDimensions OpDimensionsFromInputs(
171       const TensorShapeProto& original_image_shape, const OpInfo& op_info,
172       bool* found_unknown_shapes);
173 
174   // Helper to construct child operation contexts for the component operations
175   // of fused ops.
176   static OpContext FusedChildContext(
177       const OpContext& parent, const string& op_name,
178       const OpInfo::TensorProperties& output,
179       const std::vector<OpInfo::TensorProperties>& inputs);
180 
181   // Helper to construct tensor shapes.
182   static OpInfo::TensorProperties DescribeTensor(
183       DataType type, const std::vector<int64>& dims);
184 
185   // This method calculates the execution time depending on whether IO can
186   // overlap with computation. It assumes the memory and the compute times have
187   // already been calculated.
188   void CombineCostsAndUpdateExecutionTime(Costs* costs) const;
189 
190  protected:
191   std::map<string, int> elementwise_ops_;
192   typedef std::function<Costs(const OpContext& op_context)> CostImpl;
193   std::map<string, CostImpl> device_cost_impl_;
194   // If true, assume compute and memory overlap; hence, the op cost is max of
195   // compute_time and memory_time, insteaf of sum of those two.
196   bool compute_memory_overlap_;
197   std::set<string> persistent_ops_;
198 
199  private:
200   friend class OpLevelCostEstimatorTest;
201 };
202 
203 }  // end namespace grappler
204 }  // end namespace tensorflow
205 
206 #endif  // TENSORFLOW_CORE_GRAPPLER_COSTS_OP_LEVEL_COST_ESTIMATOR_H_
207