1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/delegate/npu/npu_subgraph.h"
18 #include <set>
19 #include <unordered_map>
20 #include <utility>
21 #include "include/errorcode.h"
22 #include "include/graph/operator.h"
23 #include "include/graph/graph.h"
24 #include "include/graph/op/const_defs.h"
25 #include "include/graph/model.h"
26 #include "include/hiai_ir_build.h"
27 #include "include/version.h"
28 #include "src/common/utils.h"
29 #include "src/delegate/npu/npu_converter_utils.h"
30 namespace mindspore {
31 static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = {
32 schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_PadFusion,
33 schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm,
34 schema::PrimitiveType_TileFusion};
35
~NPUSubGraph()36 NPUSubGraph::~NPUSubGraph() {
37 subgraph_input_ops_.clear();
38 subgraph_output_ops_.clear();
39 out_tensor_sorted_.clear();
40 for (auto op : op_buffer_) {
41 delete op;
42 }
43 if (executor_ != nullptr) {
44 delete executor_;
45 }
46 op_buffer_.clear();
47 }
48
set_input(mindspore::MSTensor in_tensor,int index)49 void NPUSubGraph::set_input(mindspore::MSTensor in_tensor, int index) {
50 MS_ASSERT(index < inputs_.size());
51 auto origin_tensor = inputs_[index];
52 // only in_ops_ input tensors list used in execute function
53 for (auto op : in_ops_) {
54 for (size_t i = 0; i < op->inputs().size(); i++) {
55 if (op->inputs()[i] == origin_tensor) {
56 op->set_input(in_tensor, i);
57 }
58 }
59 }
60 this->inputs_[index] = in_tensor;
61 }
62
set_output(mindspore::MSTensor out_tensor,int index)63 void NPUSubGraph::set_output(mindspore::MSTensor out_tensor, int index) {
64 MS_ASSERT(index < out_tensor_sorted_.size());
65 auto origin_tensor = outputs_[index];
66 for (size_t i = 0; i < out_tensor_sorted_.size(); i++) {
67 if (out_tensor_sorted_[i] == origin_tensor) {
68 out_tensor_sorted_[i] = out_tensor;
69 }
70 }
71 outputs_[index] = out_tensor;
72 }
73
GetGraphInOutOps()74 int NPUSubGraph::GetGraphInOutOps() {
75 for (auto in_tensor : this->inputs()) {
76 for (auto op : npu_ops_) {
77 if (find(op->inputs().begin(), op->inputs().end(), in_tensor) != op->inputs().end() &&
78 find(in_ops_.begin(), in_ops_.end(), op) == in_ops_.end()) {
79 in_ops_.push_back(op);
80 }
81 }
82 }
83 if (in_ops_.empty()) {
84 MS_LOG(ERROR) << "Can't find the input ops for npu sub graph.";
85 return RET_ERROR;
86 }
87
88 for (auto out_tensor : this->outputs()) {
89 for (auto op : npu_ops_) {
90 if (find(op->outputs().begin(), op->outputs().end(), out_tensor) != op->outputs().end() &&
91 find(out_ops_.begin(), out_ops_.end(), op) == out_ops_.end()) {
92 out_ops_.push_back(op);
93 }
94 }
95 }
96 if (out_ops_.empty()) {
97 MS_LOG(ERROR) << "Can't find the output ops for npu sub graph.";
98 return RET_ERROR;
99 }
100 return RET_OK;
101 }
102
FindPreOps(NPUOp * cur_op)103 std::vector<NPUOp *> NPUSubGraph::FindPreOps(NPUOp *cur_op) {
104 std::vector<NPUOp *> in_ops;
105 for (auto in_tensor : cur_op->inputs()) {
106 for (auto op : npu_ops_) {
107 if (find(op->outputs().begin(), op->outputs().end(), in_tensor) != op->outputs().end()) {
108 in_ops.push_back(op);
109 }
110 }
111 }
112 return in_ops;
113 }
114
BuildIRModel()115 std::shared_ptr<domi::ModelBufferData> NPUSubGraph::BuildIRModel() {
116 ge::Graph graph("NPUGraph");
117
118 auto ret = BuildNPUInputOp();
119 if (ret != RET_OK) {
120 MS_LOG(ERROR) << "Build NPU input operator failed.";
121 return nullptr;
122 }
123 ret = BuildNPUOutputOp();
124 if (ret != RET_OK) {
125 MS_LOG(ERROR) << "Build NPU output operator failed.";
126 return nullptr;
127 }
128 graph.SetInputs(subgraph_input_ops_).SetOutputs(subgraph_output_ops_);
129 ge::Model model(GetOMModelName(), mindspore::lite::Version());
130 model.SetGraph(graph);
131 domi::HiaiIrBuild ir_build;
132 auto om_model_buff = std::make_shared<domi::ModelBufferData>();
133 if (om_model_buff == nullptr) {
134 MS_LOG(ERROR) << "OM model buffer is nullptr.";
135 return nullptr;
136 }
137 if (!ir_build.CreateModelBuff(model, *om_model_buff)) {
138 MS_LOG(ERROR) << "Create model buffer failed.";
139 return nullptr;
140 }
141 if (!ir_build.BuildIRModel(model, *om_model_buff)) {
142 MS_LOG(ERROR) << "Build IR model failed.";
143 ir_build.ReleaseModelBuff(*om_model_buff);
144 return nullptr;
145 }
146 return om_model_buff;
147 }
148
Execute()149 int NPUSubGraph::Execute() { return executor_->Run(inputs(), out_tensor_sorted_, in_ops_); }
150
BuildNPUInputOp()151 int NPUSubGraph::BuildNPUInputOp() {
152 int count = 0;
153 subgraph_input_ops_.clear();
154 op_buffer_.clear();
155 for (auto op : this->npu_ops_) {
156 std::vector<ge::Operator *> input_ops;
157 std::unordered_map<int, std::pair<ge::Operator *, int>> index2_multi_out_index;
158 for (int i = 0; i < op->inputs().size(); ++i) {
159 auto in_tensor = op->inputs()[i];
160 if (IsSubGraphInputTensor(in_tensor)) {
161 auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name();
162 hiai::op::Data *data = nullptr;
163 data = ConverterToNPUData(in_tensor, tensor_name);
164 subgraph_input_ops_.push_back(*data);
165 input_ops.push_back(data);
166 op_buffer_.push_back(data);
167 continue;
168 }
169
170 bool is_weight_tensor = true;
171 auto pre_ops = FindPreOps(op);
172 for (auto pre_op : pre_ops) {
173 if (find(pre_op->outputs().begin(), pre_op->outputs().end(), in_tensor) != pre_op->outputs().end()) {
174 // input come from npu
175 auto npu_op = reinterpret_cast<NPUOp *>(pre_op)->GetNPUOp();
176 if (npu_op == nullptr) {
177 MS_LOG(ERROR) << pre_op->name() << "'s npu operator is nullptr.";
178 return RET_ERROR;
179 }
180 input_ops.push_back(npu_op);
181 if (pre_op->outputs().size() != 1) { // in_op has multi output, we record which output we want.
182 int out_index =
183 std::find(pre_op->outputs().begin(), pre_op->outputs().end(), in_tensor) - pre_op->outputs().begin();
184 index2_multi_out_index[i] = {npu_op, out_index};
185 }
186 is_weight_tensor = false;
187 break;
188 }
189 }
190
191 // weight tensor
192 if (is_weight_tensor) {
193 if (npu_specific_weight_nodes.find(op->type()) == npu_specific_weight_nodes.end()) {
194 auto name = op->name() + "_" + std::to_string(count++);
195 auto weight_const = new (std::nothrow) hiai::op::Const(op->name() + "_" + std::to_string(count++));
196 if (weight_const == nullptr) {
197 MS_LOG(ERROR) << "New weight const failed.";
198 return RET_ERROR;
199 }
200 auto weight_tensor = ConverterToNPUTensor(in_tensor);
201 weight_const->set_attr_value(weight_tensor);
202 input_ops.push_back(weight_const);
203 op_buffer_.push_back(weight_const);
204 }
205 }
206 }
207 // set input to NPU
208 int ret =
209 reinterpret_cast<NPUOp *>(op)->SetNPUInputs(op->inputs(), op->outputs(), input_ops, index2_multi_out_index);
210 if (ret != RET_OK) {
211 MS_LOG(ERROR) << op->name() << " set npu inputs failed.";
212 return RET_ERROR;
213 }
214 }
215 return RET_OK;
216 }
217
IsSubGraphInputTensor(mindspore::MSTensor input)218 bool NPUSubGraph::IsSubGraphInputTensor(mindspore::MSTensor input) {
219 if (find(this->inputs().begin(), this->inputs().end(), input) != this->inputs().end()) {
220 return true;
221 }
222 return false;
223 }
224
GetNPUOperators(const vector<NPUOp * > & ops)225 int NPUSubGraph::GetNPUOperators(const vector<NPUOp *> &ops) {
226 subgraph_output_ops_.reserve(ops.size());
227 for (int i = 0; i < ops.size(); i++) {
228 auto npu_op = reinterpret_cast<NPUOp *>(ops[i])->GetNPUOp();
229 if (npu_op == nullptr) {
230 MS_LOG(ERROR) << "Get NPU operator for " << ops[i]->name() << " failed.";
231 return RET_ERROR;
232 }
233 subgraph_output_ops_.push_back(*npu_op);
234 }
235 return RET_OK;
236 }
237
BuildNPUOutputOp()238 int NPUSubGraph::BuildNPUOutputOp() {
239 subgraph_output_ops_.clear();
240 auto ret = GetNPUOperators(out_ops_);
241 if (ret != RET_OK) {
242 MS_LOG(ERROR) << "Get NPU operators failed.";
243 return RET_ERROR;
244 }
245 out_tensor_sorted_.resize(outputs().size());
246 int i = 0;
247 for (auto node : out_ops_) {
248 for (auto tensor : node->outputs()) {
249 if (std::find(outputs().begin(), outputs().end(), tensor) != outputs().end())
250 this->out_tensor_sorted_[i++] = tensor;
251 }
252 }
253 if (subgraph_output_ops_.empty()) {
254 MS_LOG(ERROR) << "NPU subgraph output op is empty.";
255 return RET_ERROR;
256 }
257 return RET_OK;
258 }
259
GetOMModelName()260 std::string NPUSubGraph::GetOMModelName() { return this->name_ + ".om"; }
261
Init()262 int NPUSubGraph::Init() {
263 auto ret = GetGraphInOutOps();
264 if (ret != RET_OK) {
265 MS_LOG(ERROR) << "Get NPU subgraph input and output ops failed.";
266 return RET_ERROR;
267 }
268 name_ = "kNpuSubGraph" + std::to_string(npu_manager_->SubGraphIndex());
269 auto model_buffer_data = BuildIRModel();
270 if (model_buffer_data == nullptr) {
271 MS_LOG(ERROR) << "Build IR model failed.";
272 return RET_ERROR;
273 }
274
275 MS_ASSERT(npu_manager_ != nullptr);
276 npu_manager_->AddModel(model_buffer_data, GetOMModelName(), npu_manager_->GetFrequency());
277
278 executor_ = new (std::nothrow) NPUExecutor(GetOMModelName(), npu_manager_);
279 if (executor_ == nullptr) {
280 MS_LOG(ERROR) << "Create NPUExecutor failed.";
281 return RET_ERROR;
282 }
283 return RET_OK;
284 }
285
Prepare()286 int NPUSubGraph::Prepare() {
287 if (executor_->Prepare() != RET_OK) {
288 MS_LOG(ERROR) << "NPU executor prepare failed.";
289 return RET_ERROR;
290 }
291 return RET_OK;
292 }
293 } // namespace mindspore
294