1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "backend/optimizer/gpu/batch_norm_relu_fusion.h"
17
18 #include <memory>
19 #include <vector>
20 #include <string>
21
22 #include "backend/session/anf_runtime_algorithm.h"
23 #include "ir/primitive.h"
24 #include "utils/utils.h"
25 #include "backend/optimizer/common/helper.h"
26 #include "runtime/device/gpu/kernel_info_setter.h"
27
28 namespace mindspore {
29 namespace opt {
DefinePattern() const30 const BaseRef BatchNormReluFusion::DefinePattern() const {
31 VectorRef batch_norm = VectorRef({prim::kPrimBatchNorm, x_, scale_, bias_, mean_, var_});
32 VectorRef tuple_get = VectorRef({prim::kPrimTupleGetItem, batch_norm, index_});
33 VectorRef relu = VectorRef({prim::kPrimRelu, tuple_get});
34 return relu;
35 }
36
Process(const FuncGraphPtr & graph,const AnfNodePtr & node,const EquivPtr &) const37 const AnfNodePtr BatchNormReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
38 const EquivPtr &) const {
39 MS_EXCEPTION_IF_NULL(graph);
40 MS_EXCEPTION_IF_NULL(node);
41
42 auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
43 MS_EXCEPTION_IF_NULL(tuple_get_item);
44 auto batch_norm = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0);
45 MS_EXCEPTION_IF_NULL(batch_norm);
46 auto is_train = AnfAlgo::GetCNodePrimitive(batch_norm)->GetAttr("is_training");
47 MS_EXCEPTION_IF_NULL(is_train);
48 if (!GetValue<bool>(is_train)) {
49 return nullptr;
50 }
51 auto format_attr = AnfAlgo::GetCNodePrimitive(batch_norm)->GetAttr("format");
52 MS_EXCEPTION_IF_NULL(format_attr);
53 auto format = GetValue<std::string>(format_attr);
54 if (AnfAlgo::GetInputFormat(batch_norm, 0) != kOpFormat_NHWC && format != "NHWC") {
55 return nullptr;
56 }
57 auto shape = AnfAlgo::GetInputDeviceShape(batch_norm, 0);
58 if (shape.back() % kBNChannelMultipleFactor != 0) {
59 return nullptr;
60 }
61
62 auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 0);
63 auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 1);
64 auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 2);
65 auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 3);
66 auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 4);
67
68 MS_EXCEPTION_IF_NULL(x);
69 MS_EXCEPTION_IF_NULL(scale);
70 MS_EXCEPTION_IF_NULL(bias);
71 MS_EXCEPTION_IF_NULL(mean);
72 MS_EXCEPTION_IF_NULL(var);
73
74 auto prim = std::make_shared<Primitive>(kBatchNormWithActivation);
75 MS_EXCEPTION_IF_NULL(prim);
76 std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var};
77 auto fused_batch_norm_with_relu = graph->NewCNode(inputs);
78 MS_EXCEPTION_IF_NULL(fused_batch_norm_with_relu);
79
80 std::vector<TypeId> outputs_type;
81 std::vector<std::vector<size_t>> outputs_shape;
82 auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm);
83 for (size_t i = 0; i < output_num; i++) {
84 outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm, i));
85 outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm, i));
86 }
87 AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_relu.get());
88 AnfAlgo::CopyNodeAttrs(batch_norm, fused_batch_norm_with_relu);
89
90 auto manager = graph->manager();
91 MS_EXCEPTION_IF_NULL(manager);
92 manager->Replace(batch_norm, fused_batch_norm_with_relu);
93 device::gpu::SetKernelInfo(fused_batch_norm_with_relu);
94 return tuple_get_item;
95 }
96 } // namespace opt
97 } // namespace mindspore
98