1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_ 18 #define PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_ 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 #include "frontend/parallel/auto_parallel/costmodel.h" 26 #include "frontend/parallel/ops_info/operator_info.h" 27 #include "frontend/parallel/tensor_layout/tensor_info.h" 28 #include "frontend/parallel/tensor_layout/tensor_layout.h" 29 #include "utils/ms_utils.h" 30 31 namespace mindspore { 32 namespace parallel { 33 using CostPtrKey = std::pair<StrategyPtr, StrategyPtr>; 34 using OperatorInfoPtr = std::shared_ptr<mindspore::parallel::OperatorInfo>; 35 using EdgePtr = std::shared_ptr<mindspore::parallel::Edge>; 36 37 class Edge { 38 // An 'Edge' connects two Operators in the CostGraph. 39 public: Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const size_t & output_index_,const size_t & input_index_,const bool & is_com)40 Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op, 41 const std::shared_ptr<OperatorInfo> &next_op, const size_t &output_index_, const size_t &input_index_, 42 const bool &is_com) 43 : edge_name_(edge_name), 44 prev_op_(prev_op), 45 next_op_(next_op), 46 prev_op_output_index_(output_index_), 47 next_op_input_index_(input_index_), 48 is_combined_(is_com) { 49 is_identity_edge = false; 50 } 51 Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const size_t & output_index_,const size_t & input_index_,const bool & is_com,const bool & is_iden)52 Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op, 53 const std::shared_ptr<OperatorInfo> &next_op, const size_t &output_index_, const size_t &input_index_, 54 const bool &is_com, const bool &is_iden) 55 : edge_name_(edge_name), 56 prev_op_(prev_op), 57 next_op_(next_op), 58 prev_op_output_index_(output_index_), 59 next_op_input_index_(input_index_), 60 is_combined_(is_com), 61 is_identity_edge(is_iden) {} 62 Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const std::vector<size_t> & output_indexs_,const std::vector<size_t> & input_indexs_,const bool & is_com)63 Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op, 64 const std::shared_ptr<OperatorInfo> &next_op, const std::vector<size_t> &output_indexs_, 65 const std::vector<size_t> &input_indexs_, const bool &is_com) 66 : edge_name_(edge_name), 67 prev_op_(prev_op), 68 next_op_(next_op), 69 pre_op_output_indexs_(output_indexs_), 70 next_op_input_indexs_(input_indexs_), 71 is_combined_(is_com) { 72 prev_op_output_index_ = 0; 73 next_op_input_index_ = 0; 74 is_identity_edge = false; 75 } 76 77 ~Edge() = default; prev_operator()78 std::shared_ptr<OperatorInfo> prev_operator() const { return prev_op_; } next_operator()79 std::shared_ptr<OperatorInfo> next_operator() const { return next_op_; } edge_name()80 std::string edge_name() const { return edge_name_; } 81 // Init cost_map_: for each output layout and input layout, calculate the cost 82 Status InitEdgeCost(); GetCostMap()83 std::map<CostPtrKey, CostPtrList> GetCostMap() { return cost_map_; } 84 CostPtr GetCostByStrategyPair(const CostPtrKey &); 85 StrategyPtr GetNextOpStrategyByPrevOpStrategyWithZeroComm(const StrategyPtr &); 86 StrategyPtr GetPrevOpStrategyByNextOpStrategyWithZeroComm(const StrategyPtr &); 87 void SetCostMapAndInputOutput(std::map<CostPtrKey, CostPtrList> &); 88 // For two operators u--->v, given the output tensor layout of u, 89 // and the input tensor layout of v, return the redistribution cost, 90 // and the op_list to carry out the redistribution. 91 Status GetRedistributionCost(const TensorLayout &prev_op_output_layout, const TensorLayout &next_op_input_layout, 92 size_t, const TypePtr &type, CostPtr *cost); 93 set_pre_op_output(const std::vector<std::pair<std::shared_ptr<Strategy>,std::vector<TensorInfo>>> & output_set)94 void set_pre_op_output(const std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> &output_set) { 95 pre_op_output_ = output_set; 96 } set_next_op_input(const std::vector<std::pair<std::shared_ptr<Strategy>,std::vector<TensorInfo>>> & input_set)97 void set_next_op_input(const std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> &input_set) { 98 next_op_input_ = input_set; 99 } 100 101 // Given a pair of output strategy and input strategy, return the corresponding costlist 102 CostPtrList GetCostList(StrategyPtr output_str, StrategyPtr input_str); 103 prev_op_output()104 std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> prev_op_output() const { 105 return pre_op_output_; 106 } next_op_input()107 std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> next_op_input() const { 108 return next_op_input_; 109 } 110 is_combined()111 bool is_combined() const { return is_combined_; } prev_op_output_index()112 size_t prev_op_output_index() const { return prev_op_output_index_; } next_op_input_index()113 size_t next_op_input_index() const { return next_op_input_index_; } prev_op_output_indexs()114 std::vector<size_t> prev_op_output_indexs() const { return pre_op_output_indexs_; } next_op_input_indexs()115 std::vector<size_t> next_op_input_indexs() const { return next_op_input_indexs_; } 116 117 CostPtrList CreateEdgeEliminationCostList(const StrategyPtr &output_st_ptr, 118 const std::vector<std::shared_ptr<Edge>> &edges, 119 const StrategyPtr &input_st_ptr); 120 // In the Edge Elimination operation in DP algorithm, 'edges' is replaced by a new edge. This method is used to 121 // set cost for this new edge 122 void EdgeEliminationSetNewCost(std::shared_ptr<OperatorInfo> u, const std::vector<std::shared_ptr<Edge>> &edges, 123 std::shared_ptr<OperatorInfo> v); 124 void CreateOpEliminationSubCostList(StrategyPtr op_strategy, const CostPtrList &left_cost_list, 125 const CostPtrList &middle_cost_list, const CostPtrList &right_cost_list, 126 CostPtrList *ret_cost_list); 127 128 CostPtrList CreateOpEliminationCostList(const std::shared_ptr<Edge> &e1, const StrategyPtr &output_st_ptr, 129 const std::shared_ptr<OperatorInfo> &op, const std::shared_ptr<Edge> &e2, 130 const StrategyPtr &input_st_ptr); 131 // In the Operation Elimination operation in DP algorithm, 'op', 'e1' and 'e2' are replaced by a new edge. 132 // This method is used to set cost for this new edge 133 void OpEliminationSetNewCost(const std::shared_ptr<Edge> &e1, const std::shared_ptr<OperatorInfo> &op, 134 const std::shared_ptr<Edge> &e2); 135 set_selected_cost(const CostPtr & cost)136 void set_selected_cost(const CostPtr &cost) { selected_cost_ = cost; } selected_cost()137 const CostPtr &selected_cost() const { return selected_cost_; } set_parameter_involve(int64_t para_invol)138 void set_parameter_involve(int64_t para_invol) { is_output_parameter_involve_ = para_invol; } 139 // In the training phase, when the input of a operator contains WEIGHT or a output from other operators involving 140 // WEIGHT, then these input should stay in memory until it is used in the backward phase, which is kept in memory 141 // at the end of forward phase. 142 Status CalculateMemoryCost(); 143 // In the inference phase, 144 Status CalculateMemoryCostForInference(); mark_output_critical()145 void mark_output_critical() { is_output_critical_ = 1; } 146 // Whether there exists any available strategy in 'cost_map_' 147 bool CheckStrategyCostPossibility() const; 148 149 private: 150 std::string edge_name_; 151 std::shared_ptr<OperatorInfo> prev_op_, next_op_; 152 std::map<CostPtrKey, CostPtrList> cost_map_; 153 // pre_op_output_ 154 std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> pre_op_output_; 155 std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> next_op_input_; 156 // the index of outputs of prev_op, and the index of inputs of next_op 157 size_t prev_op_output_index_, next_op_input_index_; 158 159 // pre_op_output_indexs_ and next_op_input_indexs_ store the indices of inputs and outputs if is_combined = true 160 std::vector<size_t> pre_op_output_indexs_; 161 std::vector<size_t> next_op_input_indexs_; 162 // is this edge constructed by combining multiple edges? If is is, then is_combined = true, else is_combined = false 163 bool is_combined_; 164 // When a Parameter in the ANF graph being used by multiple operators, we include the Parameter in the costgraph by 165 // replace the Parameter by a TmpIdentity operator, and connecting this TmpIdentity operator with subsequent 166 // operators. The resulting edges are different from those normal edges, thus this Bool variable distinguishes them. 167 // If it is true, then we should guarantee that the strategy for output tensor consistent with the input tensor. 168 bool is_identity_edge; 169 CostPtr selected_cost_; 170 // In the training phase, 'is_output_parameter_involve_' is used to mark whether the output of the previous operator 171 // is parameter-involved 172 int64_t is_output_parameter_involve_ = -1; // -1: unset; 0: not parameter_involved; 1: parameter_involved 173 // In the inference phase, this is used to mark whether the output of the previous operator is critical. 174 int64_t is_output_critical_ = 0; 175 }; 176 } // namespace parallel 177 } // namespace mindspore 178 #endif // PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_ 179