• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "backend/optimizer/gpu/post_batch_norm_add_relu_fusion.h"
17 
18 #include <memory>
19 #include <vector>
20 #include <string>
21 
22 #include "backend/session/anf_runtime_algorithm.h"
23 #include "ir/primitive.h"
24 #include "utils/utils.h"
25 #include "backend/optimizer/common/helper.h"
26 #include "runtime/device/gpu/kernel_info_setter.h"
27 
28 namespace mindspore {
29 namespace opt {
DefinePattern() const30 const BaseRef PostBatchNormAddReluFusion::DefinePattern() const {
31   VectorRef batch_norm = VectorRef({prim::kPrimBatchNorm, x_, scale_, bias_, mean_, var_});
32   VectorRef tuple_get_item = VectorRef({prim::kPrimTupleGetItem, batch_norm, index_});
33   VectorRef tensor_add = VectorRef({prim::kPrimAdd, z_, tuple_get_item});
34   VectorRef relu = VectorRef({prim::kPrimRelu, tensor_add});
35   return relu;
36 }
37 
Process(const FuncGraphPtr & graph,const AnfNodePtr & node,const EquivPtr &) const38 const AnfNodePtr PostBatchNormAddReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
39                                                      const EquivPtr &) const {
40   MS_EXCEPTION_IF_NULL(graph);
41   MS_EXCEPTION_IF_NULL(node);
42 
43   auto tensor_add = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
44   MS_EXCEPTION_IF_NULL(tensor_add);
45   auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 1);
46   MS_EXCEPTION_IF_NULL(tuple_get_item);
47   auto batch_norm = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0);
48   MS_EXCEPTION_IF_NULL(batch_norm);
49   auto is_train = AnfAlgo::GetCNodePrimitive(batch_norm)->GetAttr("is_training");
50   MS_EXCEPTION_IF_NULL(is_train);
51   if (!GetValue<bool>(is_train)) {
52     return nullptr;
53   }
54   auto format_attr = AnfAlgo::GetCNodePrimitive(batch_norm)->GetAttr("format");
55   MS_EXCEPTION_IF_NULL(format_attr);
56   auto format = GetValue<std::string>(format_attr);
57   if (AnfAlgo::GetInputFormat(batch_norm, 0) != kOpFormat_NHWC && format != "NHWC") {
58     return nullptr;
59   }
60   auto shape = AnfAlgo::GetInputDeviceShape(batch_norm, 0);
61   if (shape.back() % kBNChannelMultipleFactor != 0) {
62     return nullptr;
63   }
64 
65   auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 0);
66   auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 1);
67   auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 2);
68   auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 3);
69   auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 4);
70   auto z = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 0);
71 
72   MS_EXCEPTION_IF_NULL(x);
73   MS_EXCEPTION_IF_NULL(scale);
74   MS_EXCEPTION_IF_NULL(bias);
75   MS_EXCEPTION_IF_NULL(mean);
76   MS_EXCEPTION_IF_NULL(var);
77   MS_EXCEPTION_IF_NULL(z);
78 
79   auto prim = std::make_shared<Primitive>(kBatchNormWithAddAndActivation);
80   MS_EXCEPTION_IF_NULL(prim);
81   std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var, z};
82   auto fused_batch_norm_with_add_relu = graph->NewCNode(inputs);
83   MS_EXCEPTION_IF_NULL(fused_batch_norm_with_add_relu);
84 
85   std::vector<TypeId> outputs_type;
86   std::vector<std::vector<size_t>> outputs_shape;
87   auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm);
88   for (size_t i = 0; i < output_num; i++) {
89     outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm, i));
90     outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm, i));
91   }
92   AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_add_relu.get());
93   AnfAlgo::CopyNodeAttrs(batch_norm, fused_batch_norm_with_add_relu);
94 
95   auto manager = graph->manager();
96   MS_EXCEPTION_IF_NULL(manager);
97   manager->Replace(batch_norm, fused_batch_norm_with_add_relu);
98   device::gpu::SetKernelInfo(fused_batch_norm_with_add_relu);
99   return tuple_get_item;
100 }
101 }  // namespace opt
102 }  // namespace mindspore
103