• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "backend/optimizer/gpu/batch_norm_relu_grad_fusion.h"
17 
18 #include <memory>
19 #include <vector>
20 #include <string>
21 
22 #include "backend/session/anf_runtime_algorithm.h"
23 #include "ir/primitive.h"
24 #include "utils/utils.h"
25 #include "backend/optimizer/common/helper.h"
26 #include "runtime/device/gpu/kernel_info_setter.h"
27 #include "utils/ms_context.h"
28 
29 namespace mindspore {
30 namespace opt {
DefinePattern() const31 const BaseRef BatchNormReluGradFusion::DefinePattern() const {
32   VectorRef relu_grad = VectorRef({prim::kPrimReluGrad, dy_, y_});
33   VectorRef batch_norm_grad =
34     VectorRef({prim::kPrimBatchNormGrad, relu_grad, x_, scale_, save_mean_, save_var_, reserve_});
35   return batch_norm_grad;
36 }
37 
Process(const FuncGraphPtr & graph,const AnfNodePtr & node,const EquivPtr &) const38 const AnfNodePtr BatchNormReluGradFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
39                                                   const EquivPtr &) const {
40   MS_EXCEPTION_IF_NULL(graph);
41   MS_EXCEPTION_IF_NULL(node);
42   auto is_train = AnfAlgo::GetCNodePrimitive(node)->GetAttr("is_training");
43   MS_EXCEPTION_IF_NULL(is_train);
44   if (!GetValue<bool>(is_train)) {
45     return nullptr;
46   }
47   auto format_attr = AnfAlgo::GetCNodePrimitive(node)->GetAttr("format");
48   MS_EXCEPTION_IF_NULL(format_attr);
49   auto format = GetValue<std::string>(format_attr);
50   auto ms_context = MsContext::GetInstance();
51   MS_EXCEPTION_IF_NULL(ms_context);
52   if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
53     return nullptr;
54   }
55   if (AnfAlgo::GetInputFormat(node, 0) != kOpFormat_NHWC && format != "NHWC") {
56     return nullptr;
57   }
58   auto shape = AnfAlgo::GetInputDeviceShape(node, 0);
59   if (shape.back() % kBNChannelMultipleFactor != 0) {
60     return nullptr;
61   }
62 
63   auto relu_grad = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
64   MS_EXCEPTION_IF_NULL(relu_grad);
65 
66   auto outlist = GetRealNodeUsedList(graph, relu_grad);
67   const size_t node_user_num_upper_bound = 2;
68   if (outlist->size() >= node_user_num_upper_bound) {
69     return nullptr;
70   }
71 
72   auto dy = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 0);
73   MS_EXCEPTION_IF_NULL(dy);
74   auto y = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 1);
75   MS_EXCEPTION_IF_NULL(y);
76   auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 1);
77   MS_EXCEPTION_IF_NULL(x);
78   auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 2);
79   MS_EXCEPTION_IF_NULL(scale);
80   auto save_mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 3);
81   MS_EXCEPTION_IF_NULL(save_mean);
82   auto save_var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 4);
83   MS_EXCEPTION_IF_NULL(save_var);
84   auto reserve = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 5);
85   MS_EXCEPTION_IF_NULL(reserve);
86   auto batch_norm = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(save_mean), 0);
87   MS_EXCEPTION_IF_NULL(batch_norm);
88   auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 2);
89   MS_EXCEPTION_IF_NULL(bias);
90 
91   auto prim = std::make_shared<Primitive>(kBatchNormGradWithActivation);
92   MS_EXCEPTION_IF_NULL(prim);
93   std::vector<AnfNodePtr> inputs = {NewValueNode(prim), dy, x, scale, save_mean, save_var, reserve, bias, y};
94   auto fused_batch_norm_grad_with_relu = graph->NewCNode(inputs);
95   MS_EXCEPTION_IF_NULL(fused_batch_norm_grad_with_relu);
96 
97   std::vector<TypeId> outputs_type;
98   std::vector<std::vector<size_t>> outputs_shape;
99   auto output_num = AnfAlgo::GetOutputTensorNum(node);
100   for (size_t i = 0; i < output_num; i++) {
101     outputs_type.push_back(AnfAlgo::GetOutputInferDataType(node, i));
102     outputs_shape.push_back(AnfAlgo::GetOutputInferShape(node, i));
103   }
104   AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_grad_with_relu.get());
105   AnfAlgo::CopyNodeAttrs(node, fused_batch_norm_grad_with_relu);
106   device::gpu::SetKernelInfo(fused_batch_norm_grad_with_relu);
107   return fused_batch_norm_grad_with_relu;
108 }
109 }  // namespace opt
110 }  // namespace mindspore
111