1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "frontend/parallel/ops_info/reshape_info.h"
18
19 #include <memory>
20 #include <vector>
21 #include <utility>
22
23 #include "frontend/parallel/device_manager.h"
24 #include "frontend/parallel/device_matrix.h"
25 #include "frontend/parallel/step_parallel.h"
26 #include "frontend/parallel/auto_parallel/graph_costmodel.h"
27 #include "utils/convert_utils.h"
28 #include "utils/log_adapter.h"
29
30 namespace mindspore {
31 namespace parallel {
CheckStrategy(const StrategyPtr & strategy)32 Status ReshapeInfo::CheckStrategy(const StrategyPtr &strategy) { return CheckStrategyValue(strategy, inputs_shape_); }
33
34 /*
35 * support parallel degree smaller than device number, set the duplicate device dimension to the first dimension of
36 * device matrix
37 * only support batch parallel reshape operator in ReID (batch parallel degree can be smaller than device number)
38 */
InferDevMatrixShape()39 Status ReshapeInfo::InferDevMatrixShape() {
40 Strategys stra = strategy_->GetInputDim();
41 input_strategy_ = stra.at(0);
42 dev_matrix_shape_ = stra.at(0);
43 return SUCCESS;
44 }
45
46 /*
47 * there is no Parameter for Reshape Primitive, so no need to do allreduce
48 */
InferMirrorOps()49 Status ReshapeInfo::InferMirrorOps() {
50 mirror_ops_.clear();
51 Shape input_tensor_map = input_layout_.tensor_map().array();
52 std::vector<Group> input_group;
53 if (CreateGroupByTensorMap(input_tensor_map, &input_group) != SUCCESS) {
54 MS_LOG(ERROR) << name_ << ": Infer MirrorOps failed.";
55 return FAILED;
56 }
57
58 OperatorVector op_for_input;
59 if (input_group.empty()) {
60 MS_LOG(INFO) << name_ << ": The mirror ops is empty.";
61 return SUCCESS;
62 }
63 if (!input_group.empty()) {
64 op_for_input = CreateMirrorOps(input_group[0].name(), input_group[0].GetDevNum());
65 std::string group_name = input_group[0].name();
66 MS_LOG(INFO) << name_ << ": Create the mirror ops for input_a success, group is " << group_name;
67 }
68 mirror_ops_.push_back(op_for_input);
69 OperatorVector op_for_input_empty;
70 mirror_ops_.push_back(op_for_input_empty);
71
72 return SUCCESS;
73 }
74
75 /*
76 * there is no reduction dimension for forward computation of Reshape Primitive, so no need to do allreduce
77 */
InferForwardCommunication()78 Status ReshapeInfo::InferForwardCommunication() { return SUCCESS; }
79
80 /*
81 * get shape input of Reshape Primitive
82 * the result is saved in parameter_input_v_
83 * not support -1
84 */
GetParameterInput()85 Status ReshapeInfo::GetParameterInput() {
86 if (input_value_[1] == nullptr) {
87 MS_LOG(ERROR) << name_ << ": input_value_[1] is nullptr.";
88 return FAILED;
89 }
90 std::vector<ValuePtr> elements;
91 ValueTuplePtr dim_tuple = input_value_[1]->cast<ValueTuplePtr>();
92 if (dim_tuple == nullptr) {
93 MS_LOG(ERROR) << name_ << ": Input_value_[1] must be ValueTuplePtr.";
94 return FAILED;
95 }
96 elements = dim_tuple->value();
97 if (elements.size() != outputs_shape_[0].size()) {
98 MS_LOG(ERROR) << name_ << ": Elements size must equal to outputs shape[0] size.";
99 return FAILED;
100 }
101
102 for (auto &element : elements) {
103 MS_EXCEPTION_IF_NULL(element);
104 if (element->isa<Int64Imm>()) {
105 int64_t axis = element->cast<Int64ImmPtr>()->value();
106 parameter_input_v_.push_back(axis);
107 } else {
108 MS_LOG(ERROR) << name_ << ": The value of axis must be int32.";
109 return FAILED;
110 }
111 }
112 return SUCCESS;
113 }
114
ComputeReplaceOp()115 Status ReshapeInfo::ComputeReplaceOp() {
116 RankList dev_list = stage_device_list();
117 TensorRedistribution tensor_redistribution(!is_generating_costs_, true);
118 if (tensor_redistribution.Init(input_layout_, output_layout_, dev_list) == FAILED) {
119 if (is_generating_costs_) {
120 MS_LOG(DEBUG) << name_ << ": tensor_redistribution init failed.";
121 } else {
122 MS_LOG(ERROR) << name_ << ": tensor_redistribution init failed.";
123 }
124 return FAILED;
125 }
126 MS_LOG(DEBUG) << name_ << ": input " << input_layout_.ToString();
127 MS_LOG(DEBUG) << name_ << ": output " << output_layout_.ToString();
128 MS_LOG(DEBUG) << name_ << ": dev_list " << dev_list.size();
129 if (is_skip_) {
130 ConstructOperator constructor;
131 replace_op_ = constructor.SkipRedisReshapeOP(output_layout_.slice_shape().array());
132 replace_op_info_.clear();
133 MS_LOG(INFO) << "skip reshape redistribution and reshape slice_shape is "
134 << ShapeToString(output_layout_.slice_shape().array());
135 } else {
136 RedistributionOpListPtr redistribution_oplist_ptr = tensor_redistribution.InferTensorRedistributionOperatorList();
137 if (redistribution_oplist_ptr == nullptr) {
138 if (is_generating_costs_) {
139 MS_LOG(DEBUG) << name_ << "InferTensorRedistribution failed.";
140 } else {
141 MS_LOG(ERROR) << name_ << "InferTensorRedistribution failed.";
142 }
143 return FAILED;
144 }
145 replace_op_ = redistribution_oplist_ptr->first;
146 replace_op_info_ = redistribution_oplist_ptr->second;
147 }
148 MS_LOG(DEBUG) << name_ << ": replace op size = " << replace_op_.size();
149 return SUCCESS;
150 }
151
152 /*
153 * the first dimension of input tensor map and output tensor map is set to the last dimension of device arrangement,
154 * all other dimension is set to None
155 * only support batch parallel reshape operator in ReID (batch parallel degree can be smaller than device number)
156 */
InferTensorMap()157 Status ReshapeInfo::InferTensorMap() {
158 if ((inputs_shape_.size() != 1) || (outputs_shape_.size() != 1)) {
159 MS_LOG(ERROR) << name_ << ": inputs shape and outputs shape size must be 1. inputs shape and outputs shape are "
160 << inputs_shape_.size() << " and " << outputs_shape_.size();
161 return FAILED;
162 }
163
164 Shape tensor_map_index_input;
165 for (size_t j = 0; j < inputs_shape_[0].size(); ++j) {
166 tensor_map_index_input.push_back((int64_t)(inputs_shape_[0].size() - j - 1));
167 }
168 inputs_tensor_map_.push_back(tensor_map_index_input);
169
170 Shape tensor_map_index_output;
171 for (size_t j = 0; j < outputs_shape_[0].size(); ++j) {
172 tensor_map_index_output.push_back(MAP_NONE);
173 }
174 outputs_tensor_map_.push_back(tensor_map_index_output);
175 return SUCCESS;
176 }
177
178 /*
179 * the output tensor strategy is the same as input tensor strategy
180 * only support batch parallel reshape operator in ReID (batch parallel degree can be smaller than device number)
181 */
GetOutputsStrategy()182 Strategys ReshapeInfo::GetOutputsStrategy() {
183 Strategys outputs_strategy;
184 Dimensions strategy;
185 for (size_t j = 0; j < outputs_shape_[0].size(); ++j) {
186 strategy.push_back(1);
187 }
188 outputs_strategy.push_back(strategy);
189 return outputs_strategy;
190 }
191
InferTensorLayout(TensorLayouts * inputs_layout,TensorLayouts * outputs_layout)192 Status ReshapeInfo::InferTensorLayout(TensorLayouts *inputs_layout, TensorLayouts *outputs_layout) {
193 if (inputs_layout == nullptr || outputs_layout == nullptr) {
194 MS_LOG(ERROR) << name_ << ": InferTensorLayout: the layout is null.";
195 return FAILED;
196 }
197 Arrangement dev_matrix;
198 Status status = dev_matrix.Init(dev_matrix_shape_);
199 if (status != Status::SUCCESS) {
200 return status;
201 }
202 // infer input tensor info
203 Shape shape_array_in = inputs_shape_.at(0);
204 TensorMap tensor_map_array_in = inputs_tensor_map_.at(0);
205 TensorLayout tensor_layout_in;
206 Map tensor_map_in;
207 status = tensor_map_in.Init(tensor_map_array_in);
208 if (status != Status::SUCCESS) {
209 return status;
210 }
211 Arrangement shape_in;
212 status = shape_in.Init(shape_array_in);
213 if (status != Status::SUCCESS) {
214 return status;
215 }
216 (void)tensor_layout_in.Init(dev_matrix, tensor_map_in, shape_in);
217 inputs_layout->push_back(tensor_layout_in);
218 // infer output tensor info
219 Shape shape_array_out = outputs_shape_.at(0);
220
221 TensorMap tensor_map_array_out = outputs_tensor_map_.at(0);
222 TensorLayout tensor_layout_out;
223 Map tensor_map_out;
224 status = tensor_map_out.Init(tensor_map_array_out);
225 if (status != Status::SUCCESS) {
226 return status;
227 }
228 Arrangement shape_out;
229 status = shape_out.Init(shape_array_out);
230 if (status != Status::SUCCESS) {
231 return status;
232 }
233 (void)tensor_layout_out.Init(dev_matrix, tensor_map_out, shape_out);
234 outputs_layout->push_back(tensor_layout_out);
235
236 input_layout_ = tensor_layout_in;
237 output_layout_ = tensor_layout_out;
238 return SUCCESS;
239 }
240
InferTensorInfo()241 Status ReshapeInfo::InferTensorInfo() {
242 // skip reshape infer if skip_redistribution is true
243 if (is_skip_) {
244 TensorLayout layout;
245 Shape shape;
246 Shape slice_shape;
247 layout.set_skip_redistribution(true);
248 TensorInfo tensor_info_in(layout, shape, slice_shape);
249 inputs_tensor_info_.push_back(tensor_info_in);
250 outputs_tensor_info_.push_back(tensor_info_in);
251 MS_LOG(DEBUG) << name() << "skip redistribution reshape InferTensorInfo";
252 return SUCCESS;
253 }
254
255 Shapes inputs_slice_shape, outputs_slice_shape;
256 Strategys inputs_strategy = strategy_->GetInputDim();
257 Strategys outputs_strategy = GetOutputsStrategy();
258 if (InferSliceShape(inputs_strategy, outputs_strategy, &inputs_slice_shape, &outputs_slice_shape) != SUCCESS) {
259 return FAILED;
260 }
261
262 TensorLayouts inputs_layout, outputs_layout;
263 if (InferTensorLayout(&inputs_layout, &outputs_layout) != SUCCESS) {
264 return FAILED;
265 }
266 TensorLayout tensor_layout_in = inputs_layout.at(0);
267 TensorLayout tensor_layout_out = outputs_layout.at(0);
268 Shape shape_array_in = inputs_shape_.at(0);
269 Shape slice_shape_in = inputs_slice_shape.at(0);
270 Shape shape_array_out = outputs_shape_.at(0);
271 Shape slice_shape_out = outputs_slice_shape.at(0);
272 TensorInfo tensor_info_in(tensor_layout_in, shape_array_in, slice_shape_in);
273 TensorInfo tensor_info_out(tensor_layout_out, shape_array_out, slice_shape_out);
274 inputs_tensor_info_.push_back(tensor_info_in);
275 outputs_tensor_info_.push_back(tensor_info_out);
276 return SUCCESS;
277 }
278
InferTensorInfoByLayout()279 void ReshapeInfo::InferTensorInfoByLayout() {
280 TensorInfo tensor_info_in(input_layout_);
281 TensorInfo tensor_info_out(output_layout_);
282 inputs_tensor_info_.push_back(tensor_info_in);
283 outputs_tensor_info_.push_back(tensor_info_out);
284 }
285
286 /*
287 * compute parameter_input_v_ during this method
288 */
GetAttrs()289 Status ReshapeInfo::GetAttrs() { return GetParameterInput(); }
290
device_number()291 void ReshapeInfo::device_number() {
292 dev_num_ = stage_device_size_;
293 MS_ASSERT(dev_num_ > 0);
294 }
295
InferDefaultLayout(const Shape & shape,TensorLayout * const layout)296 Status ReshapeInfo::InferDefaultLayout(const Shape &shape, TensorLayout *const layout) {
297 Shape tensor_map_index;
298 for (size_t i = 0; i < shape.size(); i++) {
299 tensor_map_index.push_back(MAP_NONE);
300 }
301 Status status = layout->InitFromVector({dev_num_}, tensor_map_index, shape);
302 if (status != Status::SUCCESS) {
303 MS_LOG(ERROR) << name_ << ": InferDefaultLayout failed.";
304 return status;
305 }
306 return Status::SUCCESS;
307 }
308
Init(const StrategyPtr & strategy)309 Status ReshapeInfo::Init(const StrategyPtr &strategy) {
310 auto reshape_skip_redis_iter = attrs_.find(SKIP_REDISTRIBUTION);
311 if (reshape_skip_redis_iter != attrs_.end()) {
312 MS_EXCEPTION_IF_NULL(reshape_skip_redis_iter->second);
313 if (!reshape_skip_redis_iter->second->isa<BoolImm>()) {
314 MS_LOG(ERROR) << name_ << ": skip_redistribution is not a bool.";
315 return FAILED;
316 }
317 is_skip_ = reshape_skip_redis_iter->second->cast<BoolImmPtr>()->value();
318 }
319
320 ResetQueueMember();
321 device_number();
322 if (strategy) {
323 if (InitWithAutoRepeatCalc(strategy) != SUCCESS) {
324 MS_LOG(ERROR) << name_ << ": Init failed.";
325 return FAILED;
326 }
327 } else {
328 if (!input_layout_set_flag_) {
329 MS_ASSERT(inputs_shape_.size() == 1);
330 Status status = InferDefaultLayout(inputs_shape_.at(0), &input_layout_);
331 if (status != SUCCESS) {
332 MS_LOG(ERROR) << name_ << ": infer input default layout failed.";
333 return status;
334 }
335 }
336 if (!output_layout_set_flag_) {
337 MS_ASSERT(output_layout_.size() == 1);
338 Status status = InferDefaultLayout(outputs_shape_.at(0), &output_layout_);
339 if (status != SUCCESS) {
340 MS_LOG(ERROR) << name_ << ": infer output default layout failed.";
341 return status;
342 }
343 }
344 inputs_tensor_map_.push_back(input_layout_.tensor_map().array());
345 outputs_tensor_map_.push_back(output_layout_.tensor_map().array());
346 InferTensorInfoByLayout();
347 // change dev_matrix_shape_ to input_layout_ device_arrangement before InferMirrorOps
348 dev_matrix_shape_ = input_layout_.device_arrangement().array();
349 if (InferMirrorOps() != SUCCESS) {
350 MS_LOG(ERROR) << name_ << ": InferMirrorOps failed.";
351 return FAILED;
352 }
353 // change dev_matrix_shape_ to output_layout_ device_arrangement before InferVirtualDivOps
354 dev_matrix_shape_ = output_layout_.device_arrangement().array();
355 if (InferVirtualDivOps() != SUCCESS) {
356 MS_LOG(ERROR) << name_ << ": InferVirtualDivOps failed.";
357 return FAILED;
358 }
359 }
360 Status status = ComputeReplaceOp();
361 if (status != SUCCESS) {
362 MS_LOG(ERROR) << name_ << ": ComputeReplaceOp failed.";
363 return status;
364 }
365 return SUCCESS;
366 }
367
InitForCostModel(const StrategyPtr & strategy)368 Status ReshapeInfo::InitForCostModel(const StrategyPtr &strategy) {
369 if (InitForCostModelWithAutoRepeatCalc(strategy) != SUCCESS) {
370 MS_LOG(ERROR) << name_ << ": Init for cost model failed.";
371 return FAILED;
372 }
373
374 MS_LOG(INFO) << name_ << ": Init for cost model success.";
375 return SUCCESS;
376 }
377
SetCostUnderStrategy(const mindspore::parallel::StrategyPtr & strategy)378 Status ReshapeInfo::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr &strategy) {
379 return SetCostUnderStrategyBase(strategy);
380 }
381
SetCostForReshapeWithParameter()382 void ReshapeInfo::SetCostForReshapeWithParameter() {
383 size_t success = 0;
384 for (auto &sp : sp_vector_) {
385 if (SetCostUnderStrategy(sp) == SUCCESS) {
386 success++;
387 MS_LOG(INFO) << name_ << ": Successfully generated " << success << " strategy.";
388 PrintStrategy(sp);
389 }
390 }
391 }
392
SetCostForReshape(const mindspore::parallel::StrategyPtr & strategy)393 void ReshapeInfo::SetCostForReshape(const mindspore::parallel::StrategyPtr &strategy) {
394 MS_EXCEPTION_IF_NULL(strategy);
395 int64_t stage_id = strategy->GetInputStage();
396 double computation_cost =
397 operator_cost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
398 double communication_cost = operator_cost()->GetCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
399 const auto gamma = CostModelContext::GetInstance()->costmodel_gamma();
400 std::shared_ptr<Cost> result = std::make_shared<Cost>(computation_cost, communication_cost);
401 result->communication_without_parameter_ =
402 operator_cost()->GetForwardCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
403 result->communication_with_partial_para_ =
404 result->communication_without_parameter_ + gamma * (communication_cost - result->communication_without_parameter_);
405
406 // Breaking ties for preferring data parallelization
407 BreakingTiesForPerferringDataParallel(strategy, result);
408 // refine communication cost calculation for practice
409 RefineForPracticalCost(result, false);
410
411 std::shared_ptr<StrategyWithCost> swc =
412 std::make_shared<StrategyWithCost>(strategy, inputs_tensor_info_, outputs_tensor_info_);
413 swc->cost_list.push_back(result);
414 strategy_cost_.emplace_back(swc);
415 }
416
GenerateStrategies(int64_t stage_id)417 Status ReshapeInfo::GenerateStrategies(int64_t stage_id) {
418 if (GetAttrs() != SUCCESS) {
419 MS_LOG(ERROR) << name_ << ": GetAttrs failed.";
420 return FAILED;
421 }
422 if ((inputs_shape_.size() != 1) || (outputs_shape_.size() != 1)) {
423 MS_LOG(ERROR) << name_ << ": Inputs shape size or outputs shape size is wrong, " << inputs_shape_.size() << ", "
424 << outputs_shape_.size();
425 return FAILED;
426 }
427 Shape input0_split;
428 (void)input0_split.insert(input0_split.end(), inputs_shape_[0].size(), 1);
429 Shapes splittable_inputs = {input0_split};
430 // strategy used only in the input node is parameter,
431 // in other case, use the input node's output_layout as input_layout.
432 if (GenerateStrategiesForIndependentInputs(stage_id, inputs_shape_, splittable_inputs, &sp_vector_) != SUCCESS) {
433 MS_LOG(ERROR) << name_ << ": GenerateStrategiesForIndependentInputs failed.";
434 return FAILED;
435 }
436 return SUCCESS;
437 }
438
GenerateOpStrategies(int64_t)439 std::vector<StrategyPtr> ReshapeInfo::GenerateOpStrategies(int64_t) {
440 std::vector<StrategyPtr> sp_vector;
441 return sp_vector;
442 }
443
GenetateStrategyCosts(const std::vector<std::shared_ptr<StrategyWithCost>> & pre_stra_costs,const std::vector<std::shared_ptr<StrategyWithCost>> & next_stra_costs,int64_t out_index,int64_t in_index,bool is_prev_param,bool is_next_reshape)444 Status ReshapeInfo::GenetateStrategyCosts(const std::vector<std::shared_ptr<StrategyWithCost>> &pre_stra_costs,
445 const std::vector<std::shared_ptr<StrategyWithCost>> &next_stra_costs,
446 int64_t out_index, int64_t in_index, bool is_prev_param,
447 bool is_next_reshape) {
448 is_generating_costs_ = true;
449 for (auto pre_stra_cost : pre_stra_costs) {
450 std::vector<TensorInfo> pre_out_tensor_infos;
451 if (is_prev_param) {
452 pre_out_tensor_infos = pre_stra_cost->inputs_ptr;
453 } else {
454 pre_out_tensor_infos = pre_stra_cost->outputs_ptr;
455 }
456 if (pre_out_tensor_infos.size() <= LongToSize(out_index)) {
457 MS_LOG(ERROR) << "out_index is out of range of the tensor_infos in setting reshape's input_layout";
458 return FAILED;
459 }
460 TensorInfo pre_out_tensor_info = pre_out_tensor_infos[LongToSize(out_index)];
461 SetInputLayout(pre_out_tensor_info.tensor_layout());
462 // infer pre_node output strategy from output_layout.
463 Dimensions stra = pre_out_tensor_info.InferStrategy();
464 if (stra.empty()) {
465 MS_LOG(ERROR) << "Infer strategy by tensor_info failed";
466 return FAILED;
467 }
468 Strategys stra_inputs = {stra};
469 StrategyPtr reshape_stra = std::make_shared<Strategy>(pre_stra_cost->strategy_ptr->GetInputStage(), stra_inputs);
470 if (is_next_reshape) {
471 SetOutputLayout(pre_out_tensor_info.tensor_layout());
472 ResetQueueMember();
473 InferTensorInfoByLayout();
474 SetCostForReshape(reshape_stra);
475 } else if (next_stra_costs.empty()) {
476 if (Init(nullptr) == FAILED) {
477 MS_LOG(ERROR) << "Failure:operator reshape init failed";
478 return FAILED;
479 }
480 SetCostForReshape(reshape_stra);
481 continue;
482 }
483 for (auto next_stra_cost : next_stra_costs) {
484 std::vector<TensorInfo> next_in_tensor_infos = next_stra_cost->inputs_ptr;
485 if (next_in_tensor_infos.size() <= LongToSize(in_index)) {
486 MS_LOG(ERROR) << "in_index is out of range of the tensor_infos in setting reshape's output_layout";
487 return FAILED;
488 }
489 TensorInfo next_in_tensor_info = next_in_tensor_infos[LongToSize(in_index)];
490
491 SetOutputLayout(next_in_tensor_info.tensor_layout());
492 ResetQueueMember();
493 InferTensorInfoByLayout();
494 SetCostForReshape(reshape_stra);
495 }
496 }
497 is_generating_costs_ = false;
498 if (strategy_cost_.empty()) {
499 return FAILED;
500 }
501 return SUCCESS;
502 }
503 } // namespace parallel
504 } // namespace mindspore
505