1 /**
2 * Copyright 2021-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/litert/delegate/npu/npu_subgraph.h"
18 #include <set>
19 #include <unordered_map>
20 #include <utility>
21 #include "include/errorcode.h"
22 #include "include/graph/operator.h"
23 #include "include/graph/graph.h"
24 #include "include/graph/op/const_defs.h"
25 #include "include/graph/model.h"
26 #include "include/hiai_ir_build.h"
27 #include "src/common/utils.h"
28 #include "src/litert/delegate/npu/npu_converter_utils.h"
29 namespace mindspore::lite {
30 static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = {
31 schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_PadFusion,
32 schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm,
33 schema::PrimitiveType_TileFusion};
34
~NPUSubGraph()35 NPUSubGraph::~NPUSubGraph() {
36 subgraph_input_ops_.clear();
37 subgraph_output_ops_.clear();
38 out_tensor_sorted_.clear();
39 all_tensors_from_out_ops_.clear();
40 for (auto op : op_buffer_) {
41 delete op;
42 }
43 if (executor_ != nullptr) {
44 delete executor_;
45 }
46 op_buffer_.clear();
47 }
48
set_input(mindspore::MSTensor in_tensor,int index)49 void NPUSubGraph::set_input(mindspore::MSTensor in_tensor, int index) {
50 MS_ASSERT(index < inputs_.size());
51 auto origin_tensor = inputs_[index];
52 // only in_ops_ input tensors list used in execute function
53 for (auto op : in_ops_) {
54 for (size_t i = 0; i < op->inputs().size(); i++) {
55 if (op->inputs()[i] == origin_tensor) {
56 op->set_input(in_tensor, i);
57 }
58 }
59 }
60 this->inputs_[index] = in_tensor;
61 }
62
set_output(mindspore::MSTensor out_tensor,int index)63 void NPUSubGraph::set_output(mindspore::MSTensor out_tensor, int index) {
64 MS_ASSERT(index < outputs_.size());
65 auto origin_tensor = outputs_[index];
66 for (size_t i = 0; i < all_tensors_from_out_ops_.size(); i++) {
67 if (all_tensors_from_out_ops_[i] == origin_tensor) {
68 all_tensors_from_out_ops_[i] = out_tensor;
69 }
70 }
71 outputs_[index] = out_tensor;
72 }
73
GetGraphInOutOps()74 int NPUSubGraph::GetGraphInOutOps() {
75 for (const auto &in_tensor : this->inputs()) {
76 for (auto op : npu_ops_) {
77 if (find(op->inputs().begin(), op->inputs().end(), in_tensor) != op->inputs().end() &&
78 find(in_ops_.begin(), in_ops_.end(), op) == in_ops_.end()) {
79 in_ops_.push_back(op);
80 }
81 }
82 }
83 if (in_ops_.empty()) {
84 MS_LOG(ERROR) << "Can't find the input ops for npu sub graph.";
85 return RET_ERROR;
86 }
87
88 for (const auto &out_tensor : this->outputs()) {
89 for (auto op : npu_ops_) {
90 if (find(op->outputs().begin(), op->outputs().end(), out_tensor) != op->outputs().end() &&
91 find(out_ops_.begin(), out_ops_.end(), op) == out_ops_.end()) {
92 out_ops_.push_back(op);
93 }
94 }
95 }
96 if (out_ops_.empty()) {
97 MS_LOG(ERROR) << "Can't find the output ops for npu sub graph.";
98 return RET_ERROR;
99 }
100 return RET_OK;
101 }
102
FindPreOps(NPUOp * cur_op)103 std::vector<NPUOp *> NPUSubGraph::FindPreOps(NPUOp *cur_op) {
104 std::vector<NPUOp *> in_ops;
105 for (const auto &in_tensor : cur_op->inputs()) {
106 for (auto op : npu_ops_) {
107 if (find(op->outputs().begin(), op->outputs().end(), in_tensor) != op->outputs().end()) {
108 in_ops.push_back(op);
109 }
110 }
111 }
112 return in_ops;
113 }
114
BuildIRModel()115 std::shared_ptr<domi::ModelBufferData> NPUSubGraph::BuildIRModel() {
116 ge::Graph graph("NPUGraph");
117
118 auto ret = BuildNPUInputOp();
119 if (ret != RET_OK) {
120 MS_LOG(ERROR) << "Build NPU input operator failed.";
121 return nullptr;
122 }
123 ret = BuildNPUOutputOp();
124 if (ret != RET_OK) {
125 MS_LOG(ERROR) << "Build NPU output operator failed.";
126 return nullptr;
127 }
128 // For om Cache, StoreCache
129 auto om_model_buffer = npu_manager_->LoadCache(GetOMModelName());
130 if (om_model_buffer == nullptr) {
131 // No om Cache, build IR model.
132 graph.SetInputs(subgraph_input_ops_).SetOutputs(subgraph_output_ops_);
133 ge::Model model(GetOMModelName(), mindspore::Version());
134 model.SetGraph(graph);
135 domi::HiaiIrBuild ir_build;
136 om_model_buffer = std::make_shared<domi::ModelBufferData>();
137 if (om_model_buffer == nullptr) {
138 MS_LOG(ERROR) << "OM model buffer is nullptr.";
139 return nullptr;
140 }
141 if (!ir_build.CreateModelBuff(model, *om_model_buffer)) {
142 MS_LOG(ERROR) << "Create model buffer failed.";
143 return nullptr;
144 }
145 if (!ir_build.BuildIRModel(model, *om_model_buffer)) {
146 MS_LOG(ERROR) << "Build IR model failed.";
147 ir_build.ReleaseModelBuff(*om_model_buffer);
148 return nullptr;
149 }
150 if (npu_manager_->StoreCache(GetOMModelName(), om_model_buffer) != RET_OK) {
151 MS_LOG(ERROR) << "Store Cache failed.";
152 ir_build.ReleaseModelBuff(*om_model_buffer);
153 return nullptr;
154 }
155 }
156 return om_model_buffer;
157 }
158
Execute()159 int NPUSubGraph::Execute() { return executor_->Run(inputs(), outputs(), all_tensors_from_out_ops_, out_ops_); }
160
BuildNPUInputOp()161 int NPUSubGraph::BuildNPUInputOp() {
162 int count = 0;
163 subgraph_input_ops_.clear();
164 op_buffer_.clear();
165 for (auto op : this->npu_ops_) {
166 std::vector<ge::Operator *> input_ops;
167 std::unordered_map<int, std::pair<ge::Operator *, int>> index2_multi_out_index;
168 for (int i = 0; i < op->inputs().size(); ++i) {
169 auto in_tensor = op->inputs()[i];
170 if (IsSubGraphInputTensor(in_tensor)) {
171 auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name();
172 hiai::op::Data *data = ConverterToNPUData(in_tensor, tensor_name);
173 if(data == nullptr){
174 MS_LOG(ERROR) << "data is nullptr.";
175 return RET_ERROR;
176 }
177 subgraph_input_ops_.push_back(*data);
178 input_ops.push_back(data);
179 op_buffer_.push_back(data);
180 continue;
181 }
182
183 bool is_weight_tensor = true;
184 auto pre_ops = FindPreOps(op);
185 for (auto pre_op : pre_ops) {
186 if (find(pre_op->outputs().begin(), pre_op->outputs().end(), in_tensor) != pre_op->outputs().end()) {
187 // input come from npu
188 auto npu_op = reinterpret_cast<NPUOp *>(pre_op)->GetNPUOp();
189 if (npu_op == nullptr) {
190 MS_LOG(ERROR) << pre_op->name() << "'s npu operator is nullptr.";
191 return RET_ERROR;
192 }
193 input_ops.push_back(npu_op);
194 if (pre_op->outputs().size() != 1) { // in_op has multi output, we record which output we want.
195 int out_index =
196 std::find(pre_op->outputs().begin(), pre_op->outputs().end(), in_tensor) - pre_op->outputs().begin();
197 index2_multi_out_index[i] = {npu_op, out_index};
198 }
199 is_weight_tensor = false;
200 break;
201 }
202 }
203
204 // weight tensor
205 if (is_weight_tensor) {
206 if (npu_specific_weight_nodes.find(op->type()) == npu_specific_weight_nodes.end()) {
207 auto name = op->name() + "_const_input_" + std::to_string(i);
208 auto weight_const = new (std::nothrow) hiai::op::Const(name);
209 if (weight_const == nullptr) {
210 MS_LOG(ERROR) << "New weight const " << name << " failed.";
211 return RET_ERROR;
212 }
213 auto weight_tensor = ConverterToNPUTensor(in_tensor);
214 weight_const->set_attr_value(weight_tensor);
215 input_ops.push_back(weight_const);
216 op_buffer_.push_back(weight_const);
217 }
218 }
219 }
220 // set input to NPU
221 int ret =
222 reinterpret_cast<NPUOp *>(op)->SetNPUInputs(op->inputs(), op->outputs(), input_ops, index2_multi_out_index);
223 if (ret != RET_OK) {
224 MS_LOG(ERROR) << op->name() << " set npu inputs failed.";
225 return RET_ERROR;
226 }
227 }
228 return RET_OK;
229 }
230
IsSubGraphInputTensor(const mindspore::MSTensor & input)231 bool NPUSubGraph::IsSubGraphInputTensor(const mindspore::MSTensor &input) {
232 if (find(this->inputs().begin(), this->inputs().end(), input) != this->inputs().end()) {
233 return true;
234 }
235 return false;
236 }
237
GetNPUOperators(const std::vector<NPUOp * > & ops)238 int NPUSubGraph::GetNPUOperators(const std::vector<NPUOp *> &ops) {
239 subgraph_output_ops_.reserve(ops.size());
240 for (int i = 0; i < ops.size(); i++) {
241 auto npu_op = reinterpret_cast<NPUOp *>(ops[i])->GetNPUOp();
242 if (npu_op == nullptr) {
243 MS_LOG(ERROR) << "Get NPU operator for " << ops[i]->name() << " failed.";
244 return RET_ERROR;
245 }
246 subgraph_output_ops_.push_back(*npu_op);
247 }
248 return RET_OK;
249 }
250
BuildNPUOutputOp()251 int NPUSubGraph::BuildNPUOutputOp() {
252 subgraph_output_ops_.clear();
253 auto ret = GetNPUOperators(out_ops_);
254 if (ret != RET_OK) {
255 MS_LOG(ERROR) << "Get NPU operators failed.";
256 return RET_ERROR;
257 }
258 for (auto node : out_ops_) {
259 for (const auto &tensor : node->outputs()) {
260 all_tensors_from_out_ops_.emplace_back(tensor);
261 }
262 }
263 if (subgraph_output_ops_.empty()) {
264 MS_LOG(ERROR) << "NPU subgraph output op is empty.";
265 return RET_ERROR;
266 }
267 return RET_OK;
268 }
269
GetOMModelName()270 std::string NPUSubGraph::GetOMModelName() { return this->name_ + ".om"; }
271
GetModelHash()272 std::string NPUSubGraph::GetModelHash() {
273 if (npu_ops_.empty()) {
274 return "";
275 }
276 std::string npu_ops_name;
277 for (auto &npu_op : npu_ops_) {
278 npu_ops_name += npu_op->name();
279 }
280 std::hash<std::string> hash;
281 auto hash_model = hash(npu_ops_name);
282 return std::to_string(hash_model);
283 }
284
Init()285 int NPUSubGraph::Init() {
286 auto ret = GetGraphInOutOps();
287 if (ret != RET_OK) {
288 MS_LOG(ERROR) << "Get NPU subgraph input and output ops failed.";
289 return RET_ERROR;
290 }
291 auto model_hash = GetModelHash();
292 if (!model_hash.empty()) {
293 name_ = model_hash;
294 } else {
295 name_ = "kNpuSubGraph" + std::to_string(npu_manager_->SubGraphIndex());
296 }
297 auto model_buffer_data = BuildIRModel();
298 if (model_buffer_data == nullptr) {
299 MS_LOG(ERROR) << "Build IR model failed.";
300 return RET_ERROR;
301 }
302
303 MS_ASSERT(npu_manager_ != nullptr);
304 npu_manager_->AddModel(model_buffer_data, GetOMModelName(), npu_manager_->GetFrequency());
305
306 executor_ = new (std::nothrow) NPUExecutor(GetOMModelName(), npu_manager_);
307 if (executor_ == nullptr) {
308 MS_LOG(ERROR) << "Create NPUExecutor failed.";
309 return RET_ERROR;
310 }
311 executor_->InitInputMappingRelationShip(input_relationship_);
312 return RET_OK;
313 }
314
Prepare()315 int NPUSubGraph::Prepare() {
316 if (executor_->Prepare() != RET_OK) {
317 MS_LOG(ERROR) << "NPU executor prepare failed.";
318 return RET_ERROR;
319 }
320 return RET_OK;
321 }
322 } // namespace mindspore::lite
323