• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/litert/delegate/npu/npu_subgraph.h"
18 #include <set>
19 #include <unordered_map>
20 #include <utility>
21 #include "include/errorcode.h"
22 #include "include/graph/operator.h"
23 #include "include/graph/graph.h"
24 #include "include/graph/op/const_defs.h"
25 #include "include/graph/model.h"
26 #include "include/hiai_ir_build.h"
27 #include "src/common/utils.h"
28 #include "src/litert/delegate/npu/npu_converter_utils.h"
29 namespace mindspore::lite {
30 static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = {
31   schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_PadFusion,
32   schema::PrimitiveType_BatchNorm,    schema::PrimitiveType_FullConnection,        schema::PrimitiveType_InstanceNorm,
33   schema::PrimitiveType_TileFusion};
34 
~NPUSubGraph()35 NPUSubGraph::~NPUSubGraph() {
36   subgraph_input_ops_.clear();
37   subgraph_output_ops_.clear();
38   out_tensor_sorted_.clear();
39   all_tensors_from_out_ops_.clear();
40   for (auto op : op_buffer_) {
41     delete op;
42   }
43   if (executor_ != nullptr) {
44     delete executor_;
45   }
46   op_buffer_.clear();
47 }
48 
set_input(mindspore::MSTensor in_tensor,int index)49 void NPUSubGraph::set_input(mindspore::MSTensor in_tensor, int index) {
50   MS_ASSERT(index < inputs_.size());
51   auto origin_tensor = inputs_[index];
52   // only in_ops_ input tensors list used in execute function
53   for (auto op : in_ops_) {
54     for (size_t i = 0; i < op->inputs().size(); i++) {
55       if (op->inputs()[i] == origin_tensor) {
56         op->set_input(in_tensor, i);
57       }
58     }
59   }
60   this->inputs_[index] = in_tensor;
61 }
62 
set_output(mindspore::MSTensor out_tensor,int index)63 void NPUSubGraph::set_output(mindspore::MSTensor out_tensor, int index) {
64   MS_ASSERT(index < outputs_.size());
65   auto origin_tensor = outputs_[index];
66   for (size_t i = 0; i < all_tensors_from_out_ops_.size(); i++) {
67     if (all_tensors_from_out_ops_[i] == origin_tensor) {
68       all_tensors_from_out_ops_[i] = out_tensor;
69     }
70   }
71   outputs_[index] = out_tensor;
72 }
73 
GetGraphInOutOps()74 int NPUSubGraph::GetGraphInOutOps() {
75   for (const auto &in_tensor : this->inputs()) {
76     for (auto op : npu_ops_) {
77       if (find(op->inputs().begin(), op->inputs().end(), in_tensor) != op->inputs().end() &&
78           find(in_ops_.begin(), in_ops_.end(), op) == in_ops_.end()) {
79         in_ops_.push_back(op);
80       }
81     }
82   }
83   if (in_ops_.empty()) {
84     MS_LOG(ERROR) << "Can't find the input ops for npu sub graph.";
85     return RET_ERROR;
86   }
87 
88   for (const auto &out_tensor : this->outputs()) {
89     for (auto op : npu_ops_) {
90       if (find(op->outputs().begin(), op->outputs().end(), out_tensor) != op->outputs().end() &&
91           find(out_ops_.begin(), out_ops_.end(), op) == out_ops_.end()) {
92         out_ops_.push_back(op);
93       }
94     }
95   }
96   if (out_ops_.empty()) {
97     MS_LOG(ERROR) << "Can't find the output ops for npu sub graph.";
98     return RET_ERROR;
99   }
100   return RET_OK;
101 }
102 
FindPreOps(NPUOp * cur_op)103 std::vector<NPUOp *> NPUSubGraph::FindPreOps(NPUOp *cur_op) {
104   std::vector<NPUOp *> in_ops;
105   for (const auto &in_tensor : cur_op->inputs()) {
106     for (auto op : npu_ops_) {
107       if (find(op->outputs().begin(), op->outputs().end(), in_tensor) != op->outputs().end()) {
108         in_ops.push_back(op);
109       }
110     }
111   }
112   return in_ops;
113 }
114 
BuildIRModel()115 std::shared_ptr<domi::ModelBufferData> NPUSubGraph::BuildIRModel() {
116   ge::Graph graph("NPUGraph");
117 
118   auto ret = BuildNPUInputOp();
119   if (ret != RET_OK) {
120     MS_LOG(ERROR) << "Build NPU input operator failed.";
121     return nullptr;
122   }
123   ret = BuildNPUOutputOp();
124   if (ret != RET_OK) {
125     MS_LOG(ERROR) << "Build NPU output operator failed.";
126     return nullptr;
127   }
128   // For om Cache, StoreCache
129   auto om_model_buffer = npu_manager_->LoadCache(GetOMModelName());
130   if (om_model_buffer == nullptr) {
131     // No om Cache, build IR model.
132     graph.SetInputs(subgraph_input_ops_).SetOutputs(subgraph_output_ops_);
133     ge::Model model(GetOMModelName(), mindspore::Version());
134     model.SetGraph(graph);
135     domi::HiaiIrBuild ir_build;
136     om_model_buffer = std::make_shared<domi::ModelBufferData>();
137     if (om_model_buffer == nullptr) {
138       MS_LOG(ERROR) << "OM model buffer is nullptr.";
139       return nullptr;
140     }
141     if (!ir_build.CreateModelBuff(model, *om_model_buffer)) {
142       MS_LOG(ERROR) << "Create model buffer failed.";
143       return nullptr;
144     }
145     if (!ir_build.BuildIRModel(model, *om_model_buffer)) {
146       MS_LOG(ERROR) << "Build IR model failed.";
147       ir_build.ReleaseModelBuff(*om_model_buffer);
148       return nullptr;
149     }
150     if (npu_manager_->StoreCache(GetOMModelName(), om_model_buffer) != RET_OK) {
151       MS_LOG(ERROR) << "Store Cache failed.";
152       ir_build.ReleaseModelBuff(*om_model_buffer);
153       return nullptr;
154     }
155   }
156   return om_model_buffer;
157 }
158 
Execute()159 int NPUSubGraph::Execute() { return executor_->Run(inputs(), outputs(), all_tensors_from_out_ops_, out_ops_); }
160 
BuildNPUInputOp()161 int NPUSubGraph::BuildNPUInputOp() {
162   int count = 0;
163   subgraph_input_ops_.clear();
164   op_buffer_.clear();
165   for (auto op : this->npu_ops_) {
166     std::vector<ge::Operator *> input_ops;
167     std::unordered_map<int, std::pair<ge::Operator *, int>> index2_multi_out_index;
168     for (int i = 0; i < op->inputs().size(); ++i) {
169       auto in_tensor = op->inputs()[i];
170       if (IsSubGraphInputTensor(in_tensor)) {
171         auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name();
172         hiai::op::Data *data = ConverterToNPUData(in_tensor, tensor_name);
173         if(data == nullptr){
174           MS_LOG(ERROR) << "data is nullptr.";
175           return RET_ERROR;
176         }
177         subgraph_input_ops_.push_back(*data);
178         input_ops.push_back(data);
179         op_buffer_.push_back(data);
180         continue;
181       }
182 
183       bool is_weight_tensor = true;
184       auto pre_ops = FindPreOps(op);
185       for (auto pre_op : pre_ops) {
186         if (find(pre_op->outputs().begin(), pre_op->outputs().end(), in_tensor) != pre_op->outputs().end()) {
187           // input come from npu
188           auto npu_op = reinterpret_cast<NPUOp *>(pre_op)->GetNPUOp();
189           if (npu_op == nullptr) {
190             MS_LOG(ERROR) << pre_op->name() << "'s npu operator is nullptr.";
191             return RET_ERROR;
192           }
193           input_ops.push_back(npu_op);
194           if (pre_op->outputs().size() != 1) {  // in_op has multi output, we record which output we want.
195             int out_index =
196               std::find(pre_op->outputs().begin(), pre_op->outputs().end(), in_tensor) - pre_op->outputs().begin();
197             index2_multi_out_index[i] = {npu_op, out_index};
198           }
199           is_weight_tensor = false;
200           break;
201         }
202       }
203 
204       // weight tensor
205       if (is_weight_tensor) {
206         if (npu_specific_weight_nodes.find(op->type()) == npu_specific_weight_nodes.end()) {
207           auto name = op->name() + "_const_input_" + std::to_string(i);
208           auto weight_const = new (std::nothrow) hiai::op::Const(name);
209           if (weight_const == nullptr) {
210             MS_LOG(ERROR) << "New weight const " << name << " failed.";
211             return RET_ERROR;
212           }
213           auto weight_tensor = ConverterToNPUTensor(in_tensor);
214           weight_const->set_attr_value(weight_tensor);
215           input_ops.push_back(weight_const);
216           op_buffer_.push_back(weight_const);
217         }
218       }
219     }
220     // set input to NPU
221     int ret =
222       reinterpret_cast<NPUOp *>(op)->SetNPUInputs(op->inputs(), op->outputs(), input_ops, index2_multi_out_index);
223     if (ret != RET_OK) {
224       MS_LOG(ERROR) << op->name() << " set npu inputs failed.";
225       return RET_ERROR;
226     }
227   }
228   return RET_OK;
229 }
230 
IsSubGraphInputTensor(const mindspore::MSTensor & input)231 bool NPUSubGraph::IsSubGraphInputTensor(const mindspore::MSTensor &input) {
232   if (find(this->inputs().begin(), this->inputs().end(), input) != this->inputs().end()) {
233     return true;
234   }
235   return false;
236 }
237 
GetNPUOperators(const std::vector<NPUOp * > & ops)238 int NPUSubGraph::GetNPUOperators(const std::vector<NPUOp *> &ops) {
239   subgraph_output_ops_.reserve(ops.size());
240   for (int i = 0; i < ops.size(); i++) {
241     auto npu_op = reinterpret_cast<NPUOp *>(ops[i])->GetNPUOp();
242     if (npu_op == nullptr) {
243       MS_LOG(ERROR) << "Get NPU operator for " << ops[i]->name() << " failed.";
244       return RET_ERROR;
245     }
246     subgraph_output_ops_.push_back(*npu_op);
247   }
248   return RET_OK;
249 }
250 
BuildNPUOutputOp()251 int NPUSubGraph::BuildNPUOutputOp() {
252   subgraph_output_ops_.clear();
253   auto ret = GetNPUOperators(out_ops_);
254   if (ret != RET_OK) {
255     MS_LOG(ERROR) << "Get NPU operators failed.";
256     return RET_ERROR;
257   }
258   for (auto node : out_ops_) {
259     for (const auto &tensor : node->outputs()) {
260       all_tensors_from_out_ops_.emplace_back(tensor);
261     }
262   }
263   if (subgraph_output_ops_.empty()) {
264     MS_LOG(ERROR) << "NPU subgraph output op is empty.";
265     return RET_ERROR;
266   }
267   return RET_OK;
268 }
269 
GetOMModelName()270 std::string NPUSubGraph::GetOMModelName() { return this->name_ + ".om"; }
271 
GetModelHash()272 std::string NPUSubGraph::GetModelHash() {
273   if (npu_ops_.empty()) {
274     return "";
275   }
276   std::string npu_ops_name;
277   for (auto &npu_op : npu_ops_) {
278     npu_ops_name += npu_op->name();
279   }
280   std::hash<std::string> hash;
281   auto hash_model = hash(npu_ops_name);
282   return std::to_string(hash_model);
283 }
284 
Init()285 int NPUSubGraph::Init() {
286   auto ret = GetGraphInOutOps();
287   if (ret != RET_OK) {
288     MS_LOG(ERROR) << "Get NPU subgraph input and output ops failed.";
289     return RET_ERROR;
290   }
291   auto model_hash = GetModelHash();
292   if (!model_hash.empty()) {
293     name_ = model_hash;
294   } else {
295     name_ = "kNpuSubGraph" + std::to_string(npu_manager_->SubGraphIndex());
296   }
297   auto model_buffer_data = BuildIRModel();
298   if (model_buffer_data == nullptr) {
299     MS_LOG(ERROR) << "Build IR model failed.";
300     return RET_ERROR;
301   }
302 
303   MS_ASSERT(npu_manager_ != nullptr);
304   npu_manager_->AddModel(model_buffer_data, GetOMModelName(), npu_manager_->GetFrequency());
305 
306   executor_ = new (std::nothrow) NPUExecutor(GetOMModelName(), npu_manager_);
307   if (executor_ == nullptr) {
308     MS_LOG(ERROR) << "Create NPUExecutor failed.";
309     return RET_ERROR;
310   }
311   executor_->InitInputMappingRelationShip(input_relationship_);
312   return RET_OK;
313 }
314 
Prepare()315 int NPUSubGraph::Prepare() {
316   if (executor_->Prepare() != RET_OK) {
317     MS_LOG(ERROR) << "NPU executor prepare failed.";
318     return RET_ERROR;
319   }
320   return RET_OK;
321 }
322 }  // namespace mindspore::lite
323