1 /** 2 * Copyright 2019-2024 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_TENSOR_LAYOUT_TENSOR_REDISTRIBUTION_H_ 18 #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_TENSOR_LAYOUT_TENSOR_REDISTRIBUTION_H_ 19 20 #include <map> 21 #include <set> 22 #include <vector> 23 #include <utility> 24 #include <string> 25 #include "ir/value.h" 26 #include "frontend/parallel/status.h" 27 #include "frontend/parallel/tensor_layout/construct_operator.h" 28 #include "frontend/parallel/tensor_layout/redistribution_operator_infer.h" 29 #include "frontend/parallel/tensor_layout/tensor_layout.h" 30 31 namespace mindspore { 32 namespace parallel { 33 constexpr double ALLTOALL_SCALE_FACTOR = 2.0; 34 constexpr double ALLGATHER_REDUCESCATTER_SCALE_FACTOR = 0.5; 35 using AssembledDynamicDimsMapping = std::map<int64_t, std::pair<size_t, AnfNodePtr>>; 36 using ReplacementMemo = std::map<size_t, int64_t>; 37 38 class TensorRedistribution { 39 public: 40 explicit TensorRedistribution(bool construct_op_flag = true, bool keep_reshape = false) reshape_flag_(false)41 : reshape_flag_(false), 42 comm_cost_(0.0), 43 forward_comm_cost_(0.0), 44 backward_comm_cost_(0.0), 45 computation_cost_(0.0), 46 memory_cost_(0.0), 47 construct_op_flag_(construct_op_flag), 48 keep_reshape_(keep_reshape) { 49 this->is_inited_ = false; 50 } 51 ~TensorRedistribution() = default; 52 SetPreAndNextCNode(const AnfNodePtr & pre_cnode,const CNodePtr & next_cnode)53 void SetPreAndNextCNode(const AnfNodePtr &pre_cnode, const CNodePtr &next_cnode) { 54 this->pre_cnode_ = pre_cnode; 55 this->next_cnode_ = next_cnode; 56 } 57 PrintRedistribution()58 std::string PrintRedistribution() { 59 return this->pre_cnode_->fullname_with_scope() + "->" + this->next_cnode_->fullname_with_scope(); 60 } 61 set_original_reshape_shape(const AnfNodePtr & original_reshape_shape)62 void set_original_reshape_shape(const AnfNodePtr &original_reshape_shape) { 63 this->original_reshape_shape_ = original_reshape_shape; 64 } 65 original_reshape_shape()66 const AnfNodePtr original_reshape_shape() { return this->original_reshape_shape_; } is_dynamic_shape()67 bool is_dynamic_shape() { return this->is_dynamic_shape_; } 68 Status Init(const TensorLayout &from, const TensorLayout &to, const RankList &dev_list); 69 RedistributionOpListPtr InferTensorRedistributionOperatorList(bool is_cost_model = false); 70 std::vector<RedistributionOpListPtr> InferTensorRedistributionOperatorVirtualGraphs(); 71 RedistributionOpListPtr InferTensorRedistributionOperatorListForMultiDynamicReshape(bool is_cost_model = false); operator_list()72 OperatorList operator_list() const { return operator_list_; } reshape_flag()73 bool reshape_flag() const { return reshape_flag_; } IsInited()74 bool IsInited() const { return this->is_inited_; } 75 Status ComputeCost(); comm_cost()76 double comm_cost() const { return comm_cost_; } computation_cost()77 double computation_cost() const { return computation_cost_; } forward_comm_cost()78 double forward_comm_cost() const { return forward_comm_cost_; } backward_comm_cost()79 double backward_comm_cost() const { return backward_comm_cost_; } memory_cost()80 double memory_cost() const { return memory_cost_; } input_shape()81 Shape input_shape() const { return from_origin_.slice_shape().array(); } ResetLayoutTransfer()82 Status ResetLayoutTransfer() { return this->RollbackToDynamicShape(); } 83 Status RollbackToDynamicShape(); from_origin_layout()84 TensorLayout from_origin_layout() const { return this->from_origin_; } from_layout()85 TensorLayout from_layout() const { return this->from_; } assembled_static_origin_from()86 TensorLayout assembled_static_origin_from() const { return this->assembled_static_origin_from_; } from_origin_no_assembled()87 TensorLayout from_origin_no_assembled() const { return this->from_origin_no_assembled_; } to_origin_no_assembled()88 TensorLayout to_origin_no_assembled() const { return this->to_origin_no_assembled_; } IsAssembledStaticShape()89 bool IsAssembledStaticShape() const { return this->is_assembled_static_shape_; } layout_transfer()90 RedistributionLayoutTransfer layout_transfer() const { return this->layout_transfer_; } GetDynamicDimsMapping()91 AssembledDynamicDimsMapping GetDynamicDimsMapping() const { return this->dynamic_dim_mapping_; } 92 void CreateAssembledDynamicMapping(const CNodePtr &cur_cnode, const AnfNodePtr &pre_cnode, 93 const FuncGraphPtr &func_graph, int64_t redistribution_index); SetVirtualRank(const int64_t virtual_rank)94 void SetVirtualRank(const int64_t virtual_rank) { virtual_rank_ = virtual_rank; } 95 96 private: 97 Status CalculateToTensorShapeUsingEnumeration(const Shape &from_tsr_shape, Shape *to_tsr_shape, const Array &factors); 98 Status CalculateToTensorShape(const Shape &from_shape, const Shape &origin_to_shape, const Array &to_in_factors, 99 Shape *to_shape); 100 Status CalculateFromTensorShape(Shape *from_shape, const Array &from_factors, const Shape &to_shape, 101 const Array &to_factors); 102 Status AssembleStaticTensorShape(const TensorLayout &from_in, const TensorLayout &to_in, 103 TensorLayout *new_from_layout, TensorLayout *new_to_layout); 104 void UnifyAssembledMapping(); 105 void UnifyAssembledMappingWithSqueezedFromShape(); 106 void UnifyAssembledMappingWithSameSize(const std::set<int64_t> &index_mapping); 107 void UnifyAssembledMappingWithDiffSize(const std::set<int64_t> &index_mapping); 108 Status InferReshape(const TensorLayout &from_layout, const TensorLayout &to_layout, 109 OperatorVector *const operator_vector, OutPutInfoVector *const output_info_vector); 110 Status InferRedistribution(const TensorLayout &from_layout, const TensorLayout &to_layout, 111 OperatorVector *const operator_vector, OutPutInfoVector *const output_info_vector, 112 bool is_cost_model); 113 Status ComputeConcatCost(double input_size, const Shape &attrs); 114 Status ComputePermuteCost(double input_size, const Shape &attrs); 115 RedistributionOpListPtr InferTensorRedistributionOperatorListUnExpand(bool is_cost_model = false); 116 Status MakeFromToLayout(const TensorLayout &from, const TensorLayout &to); 117 Status OperatorListIsEmpty(ConstructOperator *constructor, OperatorVector *const operator_vector, 118 OutPutInfoVector *const output_info_vector); 119 RedistributionLayoutTransfer layout_transfer_; 120 AssembledDynamicDimsMapping dynamic_dim_mapping_; 121 TensorLayout from_origin_no_assembled_; 122 TensorLayout to_origin_no_assembled_; 123 TensorLayout from_origin_; 124 TensorLayout to_origin_; 125 TensorLayout from_; 126 TensorLayout to_; 127 TensorLayout assembled_static_origin_from_; 128 bool is_inited_; 129 RankList dev_list_; 130 OperatorList operator_list_; 131 bool reshape_flag_; 132 // communication cost, which is the sum of forward communication cost and backward communication cost 133 double comm_cost_; 134 // forward communication cost 135 double forward_comm_cost_; 136 // backward communication cost 137 double backward_comm_cost_; 138 // computation_cost models the time spending on computing in this tensor redistribution, which is calculated by the 139 // inputs. This is calculated ONLY for forward phase. 140 double computation_cost_; 141 // memory_cost models the PEAK memory cost in a training iteration contributed by this tensor redistribution, which is 142 // calculated by the outputs. 143 double memory_cost_; 144 bool construct_op_flag_; 145 bool keep_reshape_; 146 bool expand_able_ = true; 147 bool is_assembled_static_shape_ = false; 148 bool is_dynamic_shape_ = false; 149 ReplacementMemo from_dims_replace_memo_; 150 ReplacementMemo to_dims_replace_memo_; 151 AnfNodePtr pre_cnode_; 152 CNodePtr next_cnode_; 153 int64_t virtual_rank_ = -1; 154 std::vector<int64_t> virtual_rank_list_; 155 AnfNodePtr original_reshape_shape_ = nullptr; 156 }; 157 } // namespace parallel 158 } // namespace mindspore 159 #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_TENSOR_LAYOUT_TENSOR_REDISTRIBUTION_H_ 160