• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_TENSOR_LAYOUT_TENSOR_REDISTRIBUTION_H_
18 #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_TENSOR_LAYOUT_TENSOR_REDISTRIBUTION_H_
19 
20 #include <map>
21 #include <set>
22 #include <vector>
23 #include <utility>
24 #include <string>
25 #include "ir/value.h"
26 #include "frontend/parallel/status.h"
27 #include "frontend/parallel/tensor_layout/construct_operator.h"
28 #include "frontend/parallel/tensor_layout/redistribution_operator_infer.h"
29 #include "frontend/parallel/tensor_layout/tensor_layout.h"
30 
31 namespace mindspore {
32 namespace parallel {
33 constexpr double ALLTOALL_SCALE_FACTOR = 2.0;
34 constexpr double ALLGATHER_REDUCESCATTER_SCALE_FACTOR = 0.5;
35 using AssembledDynamicDimsMapping = std::map<int64_t, std::pair<size_t, AnfNodePtr>>;
36 using ReplacementMemo = std::map<size_t, int64_t>;
37 
38 class TensorRedistribution {
39  public:
40   explicit TensorRedistribution(bool construct_op_flag = true, bool keep_reshape = false)
reshape_flag_(false)41       : reshape_flag_(false),
42         comm_cost_(0.0),
43         forward_comm_cost_(0.0),
44         backward_comm_cost_(0.0),
45         computation_cost_(0.0),
46         memory_cost_(0.0),
47         construct_op_flag_(construct_op_flag),
48         keep_reshape_(keep_reshape) {
49     this->is_inited_ = false;
50   }
51   ~TensorRedistribution() = default;
52 
SetPreAndNextCNode(const AnfNodePtr & pre_cnode,const CNodePtr & next_cnode)53   void SetPreAndNextCNode(const AnfNodePtr &pre_cnode, const CNodePtr &next_cnode) {
54     this->pre_cnode_ = pre_cnode;
55     this->next_cnode_ = next_cnode;
56   }
57 
PrintRedistribution()58   std::string PrintRedistribution() {
59     return this->pre_cnode_->fullname_with_scope() + "->" + this->next_cnode_->fullname_with_scope();
60   }
61 
set_original_reshape_shape(const AnfNodePtr & original_reshape_shape)62   void set_original_reshape_shape(const AnfNodePtr &original_reshape_shape) {
63     this->original_reshape_shape_ = original_reshape_shape;
64   }
65 
original_reshape_shape()66   const AnfNodePtr original_reshape_shape() { return this->original_reshape_shape_; }
is_dynamic_shape()67   bool is_dynamic_shape() { return this->is_dynamic_shape_; }
68   Status Init(const TensorLayout &from, const TensorLayout &to, const RankList &dev_list);
69   RedistributionOpListPtr InferTensorRedistributionOperatorList(bool is_cost_model = false);
70   std::vector<RedistributionOpListPtr> InferTensorRedistributionOperatorVirtualGraphs();
71   RedistributionOpListPtr InferTensorRedistributionOperatorListForMultiDynamicReshape(bool is_cost_model = false);
operator_list()72   OperatorList operator_list() const { return operator_list_; }
reshape_flag()73   bool reshape_flag() const { return reshape_flag_; }
IsInited()74   bool IsInited() const { return this->is_inited_; }
75   Status ComputeCost();
comm_cost()76   double comm_cost() const { return comm_cost_; }
computation_cost()77   double computation_cost() const { return computation_cost_; }
forward_comm_cost()78   double forward_comm_cost() const { return forward_comm_cost_; }
backward_comm_cost()79   double backward_comm_cost() const { return backward_comm_cost_; }
memory_cost()80   double memory_cost() const { return memory_cost_; }
input_shape()81   Shape input_shape() const { return from_origin_.slice_shape().array(); }
ResetLayoutTransfer()82   Status ResetLayoutTransfer() { return this->RollbackToDynamicShape(); }
83   Status RollbackToDynamicShape();
from_origin_layout()84   TensorLayout from_origin_layout() const { return this->from_origin_; }
from_layout()85   TensorLayout from_layout() const { return this->from_; }
assembled_static_origin_from()86   TensorLayout assembled_static_origin_from() const { return this->assembled_static_origin_from_; }
from_origin_no_assembled()87   TensorLayout from_origin_no_assembled() const { return this->from_origin_no_assembled_; }
to_origin_no_assembled()88   TensorLayout to_origin_no_assembled() const { return this->to_origin_no_assembled_; }
IsAssembledStaticShape()89   bool IsAssembledStaticShape() const { return this->is_assembled_static_shape_; }
layout_transfer()90   RedistributionLayoutTransfer layout_transfer() const { return this->layout_transfer_; }
GetDynamicDimsMapping()91   AssembledDynamicDimsMapping GetDynamicDimsMapping() const { return this->dynamic_dim_mapping_; }
92   void CreateAssembledDynamicMapping(const CNodePtr &cur_cnode, const AnfNodePtr &pre_cnode,
93                                      const FuncGraphPtr &func_graph, int64_t redistribution_index);
SetVirtualRank(const int64_t virtual_rank)94   void SetVirtualRank(const int64_t virtual_rank) { virtual_rank_ = virtual_rank; }
95 
96  private:
97   Status CalculateToTensorShapeUsingEnumeration(const Shape &from_tsr_shape, Shape *to_tsr_shape, const Array &factors);
98   Status CalculateToTensorShape(const Shape &from_shape, const Shape &origin_to_shape, const Array &to_in_factors,
99                                 Shape *to_shape);
100   Status CalculateFromTensorShape(Shape *from_shape, const Array &from_factors, const Shape &to_shape,
101                                   const Array &to_factors);
102   Status AssembleStaticTensorShape(const TensorLayout &from_in, const TensorLayout &to_in,
103                                    TensorLayout *new_from_layout, TensorLayout *new_to_layout);
104   void UnifyAssembledMapping();
105   void UnifyAssembledMappingWithSqueezedFromShape();
106   void UnifyAssembledMappingWithSameSize(const std::set<int64_t> &index_mapping);
107   void UnifyAssembledMappingWithDiffSize(const std::set<int64_t> &index_mapping);
108   Status InferReshape(const TensorLayout &from_layout, const TensorLayout &to_layout,
109                       OperatorVector *const operator_vector, OutPutInfoVector *const output_info_vector);
110   Status InferRedistribution(const TensorLayout &from_layout, const TensorLayout &to_layout,
111                              OperatorVector *const operator_vector, OutPutInfoVector *const output_info_vector,
112                              bool is_cost_model);
113   Status ComputeConcatCost(double input_size, const Shape &attrs);
114   Status ComputePermuteCost(double input_size, const Shape &attrs);
115   RedistributionOpListPtr InferTensorRedistributionOperatorListUnExpand(bool is_cost_model = false);
116   Status MakeFromToLayout(const TensorLayout &from, const TensorLayout &to);
117   Status OperatorListIsEmpty(ConstructOperator *constructor, OperatorVector *const operator_vector,
118                              OutPutInfoVector *const output_info_vector);
119   RedistributionLayoutTransfer layout_transfer_;
120   AssembledDynamicDimsMapping dynamic_dim_mapping_;
121   TensorLayout from_origin_no_assembled_;
122   TensorLayout to_origin_no_assembled_;
123   TensorLayout from_origin_;
124   TensorLayout to_origin_;
125   TensorLayout from_;
126   TensorLayout to_;
127   TensorLayout assembled_static_origin_from_;
128   bool is_inited_;
129   RankList dev_list_;
130   OperatorList operator_list_;
131   bool reshape_flag_;
132   // communication cost, which is the sum of forward communication cost and backward communication cost
133   double comm_cost_;
134   // forward communication cost
135   double forward_comm_cost_;
136   // backward communication cost
137   double backward_comm_cost_;
138   // computation_cost models the time spending on computing in this tensor redistribution, which is calculated by the
139   // inputs. This is calculated ONLY for forward phase.
140   double computation_cost_;
141   // memory_cost models the PEAK memory cost in a training iteration contributed by this tensor redistribution, which is
142   // calculated by the outputs.
143   double memory_cost_;
144   bool construct_op_flag_;
145   bool keep_reshape_;
146   bool expand_able_ = true;
147   bool is_assembled_static_shape_ = false;
148   bool is_dynamic_shape_ = false;
149   ReplacementMemo from_dims_replace_memo_;
150   ReplacementMemo to_dims_replace_memo_;
151   AnfNodePtr pre_cnode_;
152   CNodePtr next_cnode_;
153   int64_t virtual_rank_ = -1;
154   std::vector<int64_t> virtual_rank_list_;
155   AnfNodePtr original_reshape_shape_ = nullptr;
156 };
157 }  // namespace parallel
158 }  // namespace mindspore
159 #endif  // MINDSPORE_CCSRC_FRONTEND_PARALLEL_TENSOR_LAYOUT_TENSOR_REDISTRIBUTION_H_
160