• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From aa38d5a95960e60f6a90a1ffa2958a6ebcee2b4e Mon Sep 17 00:00:00 2001
2From: chengfeng27 <chengfeng27@huawei.com>
3Date: Thu, 18 Apr 2024 09:39:33 +0800
4Subject: [PATCH] nnrt litegraph dequant
5
6---
7 mindspore/lite/mindir/include/mindir_tensor.h |   6 +-
8 mindspore/lite/mindir/include/mindir_types.h  |  28 ++-
9 mindspore/lite/mindir/inner_headers/utils.h   |   2 +-
10 mindspore/lite/mindir/src/mindir.cc           |  93 ++++++++++
11 mindspore/lite/mindir/src/mindir_tensor.cc    |  14 +-
12 mindspore/lite/mindir/src/utils.cc            |  27 +--
13 .../src/litert/delegate/nnrt/nnrt_delegate.cc | 166 ++++++++++++------
14 .../src/litert/delegate/nnrt/nnrt_delegate.h  |  12 +-
15 mindspore/lite/src/litert/scheduler.cc        |   1 +
16 9 files changed, 270 insertions(+), 79 deletions(-)
17
18diff --git a/mindspore/lite/mindir/include/mindir_tensor.h b/mindspore/lite/mindir/include/mindir_tensor.h
19index c1ac89bf..43c1478c 100644
20--- a/mindspore/lite/mindir/include/mindir_tensor.h
21+++ b/mindspore/lite/mindir/include/mindir_tensor.h
22@@ -8,9 +8,9 @@ namespace lite {
23
24 // ********** Tensor **********
25 TensorPtr MindIR_Tensor_Create();
26-TensorPtr MindIR_Tensor_Create(const std::string &name, DataType data_type, const std::vector<int32_t> &dims,
27-                               Format format, const std::vector<uint8_t> &data,
28-                               const std::vector<QuantParam> &quant_params);
29+TensorPtr MindIR_Tensor_Create(const char *name, DataType data_type, const int32_t *dims, uint32_t dims_size,
30+                               Format format, const uint8_t *data, uint32_t data_size,
31+                               const QuantParam *quant_params, uint32_t quant_params_size);
32 std::string MindIR_Tensor_GetName(ConstTensorPtr tensor);
33 void MindIR_Tensor_SetName(TensorPtr *tensor, const std::string &name);
34 DataType MindIR_Tensor_GetDataType(ConstTensorPtr tensor);
35diff --git a/mindspore/lite/mindir/include/mindir_types.h b/mindspore/lite/mindir/include/mindir_types.h
36index 5744441a..196995fa 100644
37--- a/mindspore/lite/mindir/include/mindir_types.h
38+++ b/mindspore/lite/mindir/include/mindir_types.h
39@@ -44,11 +44,35 @@ enum DataType : int8_t {
40 enum Format : int8_t {
41   FORMAT_NCHW = 0,
42   FORMAT_NHWC = 1,
43+  FORMAT_NHWC4 = 2,
44+  FORMAT_HWKC = 3,
45+  FORMAT_HWCK = 4,
46+  FORMAT_KCHW = 5,
47+  FORMAT_CKHW = 6,
48+  FORMAT_KHWC = 7,
49+  FORMAT_CHWK = 8,
50+  FORMAT_HW = 9,
51+  FORMAT_HW4 = 10,
52+  FORMAT_NC = 11,
53+  FORMAT_NC4 = 12,
54+  FORMAT_NC4HW4 = 13,
55+  FORMAT_NUM_OF_FORMAT = 14,
56+  FORMAT_NCDHW = 15,
57+  FORMAT_NWC = 16,
58+  FORMAT_NCW = 17,
59+  FORMAT_NC8HW8 = 18,
60+  FORMAT_MIN = FORMAT_NCHW,
61+  FORMAT_MAX = FORMAT_NC8HW8
62 };
63
64 enum QuantType : int8_t {
65-  QUANT_TYPE_NONE,
66-  QUANT_TYPE_ALL,
67+  QUANT_TYPE_NONE = 0,
68+  QUANT_TYPE_AWARETRAINING = 1,
69+  QUANT_TYPE_WEIGHTQUANT = 2,
70+  QUANT_TYPE_POSTTRAINING = 3,
71+  QUANT_TYPE_WEIGHT = 4,
72+  QUANT_TYPE_ALL = 5,
73+  QUANT_TYPE_DYNAMIC = 6
74 };
75
76 enum NodeType : uint32_t {
77diff --git a/mindspore/lite/mindir/inner_headers/utils.h b/mindspore/lite/mindir/inner_headers/utils.h
78index 0e6eb35d..0d150f80 100644
79--- a/mindspore/lite/mindir/inner_headers/utils.h
80+++ b/mindspore/lite/mindir/inner_headers/utils.h
81@@ -17,7 +17,7 @@ flatbuffers::Offset<schema::Vec2D> CreateVec2D(flatbuffers::FlatBufferBuilder &f
82 mindspore::schema::PrimitiveType MindIR_GetPrimitiveType(PrimitivePtr prim);
83
84 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ConvertQuantParams(
85-  flatbuffers::FlatBufferBuilder &fbb, const std::vector<QuantParam> &quant_params);
86+  flatbuffers::FlatBufferBuilder &fbb, const QuantParam *quant_params, uint32_t quant_params_size);
87
88 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ConvertQuantParams(
89   flatbuffers::FlatBufferBuilder &fbb,
90diff --git a/mindspore/lite/mindir/src/mindir.cc b/mindspore/lite/mindir/src/mindir.cc
91index 7041498a..a1f86671 100644
92--- a/mindspore/lite/mindir/src/mindir.cc
93+++ b/mindspore/lite/mindir/src/mindir.cc
94@@ -398,6 +398,9 @@ std::vector<int64_t> MindIR_AvgPoolFusion_GetKernelSize(ConstPrimitivePtr primit
95     if (prim != nullptr && value != nullptr) {
96       std::vector<int64_t> result;
97       auto src = value->kernel_size();
98+      if (src == nullptr) {
99+        return {};
100+      }
101       result.resize(src->size());
102       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
103       return result;
104@@ -437,6 +440,9 @@ std::vector<int64_t> MindIR_AvgPoolFusion_GetStrides(ConstPrimitivePtr primitive
105     if (prim != nullptr && value != nullptr) {
106       std::vector<int64_t> result;
107       auto src = value->strides();
108+      if (src == nullptr) {
109+        return {};
110+      }
111       result.resize(src->size());
112       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
113       return result;
114@@ -476,6 +482,9 @@ std::vector<int64_t> MindIR_AvgPoolFusion_GetPad(ConstPrimitivePtr primitive) {
115     if (prim != nullptr && value != nullptr) {
116       std::vector<int64_t> result;
117       auto src = value->pad();
118+      if (src == nullptr) {
119+        return {};
120+      }
121       result.resize(src->size());
122       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
123       return result;
124@@ -712,6 +721,9 @@ std::vector<int64_t> MindIR_BatchToSpaceND_GetBlockShape(ConstPrimitivePtr primi
125     if (prim != nullptr && value != nullptr) {
126       std::vector<int64_t> result;
127       auto src = value->block_shape();
128+      if (src == nullptr) {
129+        return {};
130+      }
131       result.resize(src->size());
132       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
133       return result;
134@@ -747,6 +759,9 @@ std::vector<std::vector<int64_t>> MindIR_BatchToSpaceND_GetCrops(ConstPrimitiveP
135     if (prim != nullptr && value != nullptr) {
136       std::vector<std::vector<int64_t>> out;
137       auto src = value->crops();
138+      if (src == nullptr) {
139+        return {};
140+      }
141       for (auto sub_list : *src->data()) {
142         std::vector<int64_t> result_tmp;
143         result_tmp.resize(sub_list->data()->size());
144@@ -871,6 +886,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetKernelSize(ConstPrimitivePtr primiti
145     if (prim != nullptr && value != nullptr) {
146       std::vector<int64_t> result;
147       auto src = value->kernel_size();
148+      if (src == nullptr) {
149+        return {};
150+      }
151       result.resize(src->size());
152       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
153       return result;
154@@ -911,6 +929,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetStride(ConstPrimitivePtr primitive)
155     if (prim != nullptr && value != nullptr) {
156       std::vector<int64_t> result;
157       auto src = value->stride();
158+      if (src == nullptr) {
159+        return {};
160+      }
161       result.resize(src->size());
162       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
163       return result;
164@@ -952,6 +973,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetDilation(ConstPrimitivePtr primitive
165     if (prim != nullptr && value != nullptr) {
166       std::vector<int64_t> result;
167       auto src = value->dilation();
168+      if (src == nullptr) {
169+        return {};
170+      }
171       result.resize(src->size());
172       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
173       return result;
174@@ -1030,6 +1054,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetPadList(ConstPrimitivePtr primitive)
175     if (prim != nullptr && value != nullptr) {
176       std::vector<int64_t> result;
177       auto src = value->pad_list();
178+      if (src == nullptr) {
179+        return {};
180+      }
181       result.resize(src->size());
182       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
183       return result;
184@@ -1281,6 +1308,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetKernelSize(ConstPrimitivePt
185     if (prim != nullptr && value != nullptr) {
186       std::vector<int64_t> result;
187       auto src = value->kernel_size();
188+      if (src == nullptr) {
189+        return {};
190+      }
191       result.resize(src->size());
192       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
193       return result;
194@@ -1322,6 +1352,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetStride(ConstPrimitivePtr pr
195     if (prim != nullptr && value != nullptr) {
196       std::vector<int64_t> result;
197       auto src = value->stride();
198+      if (src == nullptr) {
199+        return {};
200+      }
201       result.resize(src->size());
202       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
203       return result;
204@@ -1364,6 +1397,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetDilation(ConstPrimitivePtr
205     if (prim != nullptr && value != nullptr) {
206       std::vector<int64_t> result;
207       auto src = value->dilation();
208+      if (src == nullptr) {
209+        return {};
210+      }
211       result.resize(src->size());
212       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
213       return result;
214@@ -1444,6 +1480,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetPadList(ConstPrimitivePtr p
215     if (prim != nullptr && value != nullptr) {
216       std::vector<int64_t> result;
217       auto src = value->pad_list();
218+      if (src == nullptr) {
219+        return {};
220+      }
221       result.resize(src->size());
222       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
223       return result;
224@@ -1640,6 +1679,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetOutputPaddings(ConstPrimiti
225     if (prim != nullptr && value != nullptr) {
226       std::vector<int64_t> result;
227       auto src = value->output_paddings();
228+      if (src == nullptr) {
229+        return {};
230+      }
231       result.resize(src->size());
232       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
233       return result;
234@@ -2273,6 +2315,9 @@ std::vector<int64_t> MindIR_MaxPoolFusion_GetKernelSize(ConstPrimitivePtr primit
235     if (prim != nullptr && value != nullptr) {
236       std::vector<int64_t> result;
237       auto src = value->kernel_size();
238+      if (src == nullptr) {
239+        return {};
240+      }
241       result.resize(src->size());
242       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
243       return result;
244@@ -2312,6 +2357,9 @@ std::vector<int64_t> MindIR_MaxPoolFusion_GetStrides(ConstPrimitivePtr primitive
245     if (prim != nullptr && value != nullptr) {
246       std::vector<int64_t> result;
247       auto src = value->strides();
248+      if (src == nullptr) {
249+        return {};
250+      }
251       result.resize(src->size());
252       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
253       return result;
254@@ -2351,6 +2399,9 @@ std::vector<int64_t> MindIR_MaxPoolFusion_GetPad(ConstPrimitivePtr primitive) {
255     if (prim != nullptr && value != nullptr) {
256       std::vector<int64_t> result;
257       auto src = value->pad();
258+      if (src == nullptr) {
259+        return {};
260+      }
261       result.resize(src->size());
262       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
263       return result;
264@@ -2680,6 +2731,9 @@ std::vector<std::vector<int64_t>> MindIR_PadFusion_GetPaddings(ConstPrimitivePtr
265     if (prim != nullptr && value != nullptr) {
266       std::vector<std::vector<int64_t>> out;
267       auto src = value->paddings();
268+      if (src == nullptr) {
269+        return {};
270+      }
271       for (auto sub_list : *src->data()) {
272         std::vector<int64_t> result_tmp;
273         result_tmp.resize(sub_list->data()->size());
274@@ -3601,6 +3655,9 @@ std::vector<int64_t> MindIR_SliceFusion_GetAxes(ConstPrimitivePtr primitive) {
275     if (prim != nullptr && value != nullptr) {
276       std::vector<int64_t> result;
277       auto src = value->axes();
278+      if (src == nullptr) {
279+        return {};
280+      }
281       result.resize(src->size());
282       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
283       return result;
284@@ -3646,6 +3703,9 @@ std::vector<int64_t> MindIR_Softmax_GetAxis(ConstPrimitivePtr primitive) {
285     if (prim != nullptr && value != nullptr) {
286       std::vector<int64_t> result;
287       auto src = value->axis();
288+      if (src == nullptr) {
289+        return {};
290+      }
291       result.resize(src->size());
292       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
293       return result;
294@@ -3694,6 +3754,9 @@ std::vector<int64_t> MindIR_SpaceToBatchND_GetBlockShape(ConstPrimitivePtr primi
295     if (prim != nullptr && value != nullptr) {
296       std::vector<int64_t> result;
297       auto src = value->block_shape();
298+      if (src == nullptr) {
299+        return {};
300+      }
301       result.resize(src->size());
302       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
303       return result;
304@@ -3729,6 +3792,9 @@ std::vector<std::vector<int64_t>> MindIR_SpaceToBatchND_GetPaddings(ConstPrimiti
305     if (prim != nullptr && value != nullptr) {
306       std::vector<std::vector<int64_t>> out;
307       auto src = value->paddings();
308+      if (src == nullptr) {
309+        return {};
310+      }
311       for (auto sub_list : *src->data()) {
312         std::vector<int64_t> result_tmp;
313         result_tmp.resize(sub_list->data()->size());
314@@ -3812,6 +3878,9 @@ std::vector<int64_t> MindIR_Split_GetSizeSplits(ConstPrimitivePtr primitive) {
315     if (prim != nullptr && value != nullptr) {
316       std::vector<int64_t> result;
317       auto src = value->size_splits();
318+      if (src == nullptr) {
319+        return {};
320+      }
321       result.resize(src->size());
322       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
323       return result;
324@@ -3912,6 +3981,9 @@ std::vector<int64_t> MindIR_Squeeze_GetAxis(ConstPrimitivePtr primitive) {
325     if (prim != nullptr && value != nullptr) {
326       std::vector<int64_t> result;
327       auto src = value->axis();
328+      if (src == nullptr) {
329+        return {};
330+      }
331       result.resize(src->size());
332       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
333       return result;
334@@ -4212,6 +4284,9 @@ std::vector<int64_t> MindIR_TileFusion_GetDims(ConstPrimitivePtr primitive) {
335     if (prim != nullptr && value != nullptr) {
336       std::vector<int64_t> result;
337       auto src = value->dims();
338+      if (src == nullptr) {
339+        return {};
340+      }
341       result.resize(src->size());
342       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
343       return result;
344@@ -4342,6 +4417,9 @@ std::vector<int64_t> MindIR_Unsqueeze_GetAxis(ConstPrimitivePtr primitive) {
345     if (prim != nullptr && value != nullptr) {
346       std::vector<int64_t> result;
347       auto src = value->axis();
348+      if (src == nullptr) {
349+        return {};
350+      }
351       result.resize(src->size());
352       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
353       return result;
354@@ -4399,6 +4477,9 @@ std::vector<int64_t> MindIR_BroadcastTo_GetShape(ConstPrimitivePtr primitive) {
355     if (prim != nullptr && value != nullptr) {
356       std::vector<int64_t> result;
357       auto src = value->shape();
358+      if (src == nullptr) {
359+        return {};
360+      }
361       result.resize(src->size());
362       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
363       return result;
364@@ -4477,6 +4558,9 @@ std::vector<float> MindIR_ConstantOfShape_GetValue(ConstPrimitivePtr primitive)
365     if (prim != nullptr && value_ != nullptr) {
366       std::vector<float> result;
367       auto src = value_->value();
368+      if (src == nullptr) {
369+        return {};
370+      }
371       result.resize(src->size());
372       std::transform(src->begin(), src->end(), result.begin(), [](float item) { return item; });
373       return result;
374@@ -5889,6 +5973,9 @@ std::vector<int64_t> MindIR_L2NormalizeFusion_GetAxis(ConstPrimitivePtr primitiv
375     if (prim != nullptr && value != nullptr) {
376       std::vector<int64_t> result;
377       auto src = value->axis();
378+      if (src == nullptr) {
379+        return {};
380+      }
381       result.resize(src->size());
382       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
383       return result;
384@@ -6238,6 +6325,9 @@ std::vector<int64_t> MindIR_Crop_GetOffsets(ConstPrimitivePtr primitive) {
385     if (prim != nullptr && value != nullptr) {
386       std::vector<int64_t> result;
387       auto src = value->offsets();
388+      if (src == nullptr) {
389+        return {};
390+      }
391       result.resize(src->size());
392       std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; });
393       return result;
394@@ -6348,6 +6438,9 @@ std::vector<float> MindIR_DetectionPostProcess_GetScale(ConstPrimitivePtr primit
395     if (prim != nullptr && value != nullptr) {
396       std::vector<float> result;
397       auto src = value->scale();
398+      if (src == nullptr) {
399+        return {};
400+      }
401       result.resize(src->size());
402       std::transform(src->begin(), src->end(), result.begin(), [](float item) { return item; });
403       return result;
404diff --git a/mindspore/lite/mindir/src/mindir_tensor.cc b/mindspore/lite/mindir/src/mindir_tensor.cc
405index 9575f8c2..8888e2c9 100644
406--- a/mindspore/lite/mindir/src/mindir_tensor.cc
407+++ b/mindspore/lite/mindir/src/mindir_tensor.cc
408@@ -36,15 +36,15 @@ TensorPtr MindIR_Tensor_Create() {
409   return ret_value;
410 }
411
412-TensorPtr MindIR_Tensor_Create(const std::string &name, DataType data_type, const std::vector<int32_t> &dims,
413-                               Format format, const std::vector<uint8_t> &data,
414-                               const std::vector<QuantParam> &quant_params) {
415+TensorPtr MindIR_Tensor_Create(const char *name, DataType data_type, const int32_t *dims, uint32_t dims_size,
416+                               Format format, const uint8_t *data, uint32_t data_size,
417+                               const QuantParam *quant_params, uint32_t quant_params_size) {
418   flatbuffers::FlatBufferBuilder fbb;
419
420   auto ops_offset =
421-    schema::CreateTensor(fbb, 0, data_type, fbb.CreateVector(dims.data(), dims.size()),
422-                         static_cast<schema::Format>(format), 0, 0, fbb.CreateVector(data.data(), data.size()),
423-                         ConvertQuantParams(fbb, quant_params), 0, fbb.CreateString(name.c_str(), name.size()));
424+    schema::CreateTensor(fbb, 0, data_type, fbb.CreateVector(dims, dims_size),
425+                         static_cast<schema::Format>(format), 0, 0, fbb.CreateVector(data, data_size),
426+                         ConvertQuantParams(fbb, quant_params, quant_params_size), 0, fbb.CreateString(name, strlen(name)));
427   fbb.Finish(ops_offset);
428   auto new_addr = MindIRMemoryManager::GetInstance()->CreateTensorFromBuilder(fbb, nullptr);
429   auto ret_value = flatbuffers::GetMutableRoot<schema::Tensor>(new_addr);
430@@ -332,7 +332,7 @@ void MindIR_Tensor_SetQuantParams(TensorPtr *tensor, const std::vector<QuantPara
431       }
432       auto ops_offset =
433         schema::CreateTensor(fbb, 0, value->dataType(), dims, static_cast<schema::Format>(value->format()), 0, 0, data,
434-                             ConvertQuantParams(fbb, quant_params), 0, name);
435+                             ConvertQuantParams(fbb, quant_params.data(), quant_params.size()), 0, name);
436       fbb.Finish(ops_offset);
437       auto new_addr = MindIRMemoryManager::GetInstance()->CreateTensorFromBuilder(fbb, value);
438       auto ret_value = flatbuffers::GetMutableRoot<schema::Primitive>(new_addr);
439diff --git a/mindspore/lite/mindir/src/utils.cc b/mindspore/lite/mindir/src/utils.cc
440index b044f414..870802a9 100644
441--- a/mindspore/lite/mindir/src/utils.cc
442+++ b/mindspore/lite/mindir/src/utils.cc
443@@ -63,21 +63,24 @@ flatbuffers::Offset<schema::Vec2D> CreateVec2D(flatbuffers::FlatBufferBuilder &f
444 }
445 flatbuffers::Offset<schema::Vec2D> CreateVec2D(flatbuffers::FlatBufferBuilder &fbb,
446                                                const mindspore::schema::Vec2D *data) {
447-  auto data_inner = data->data();
448   std::vector<flatbuffers::Offset<schema::Vec>> vet2d;
449-  vet2d.reserve(data_inner->size());
450-  for (const auto data_one : *data_inner) {
451-    vet2d.emplace_back(schema::CreateVec(fbb, fbb.CreateVector(data_one->data()->data(), data_one->data()->size())));
452+  if (data != nullptr) {
453+    auto data_inner = data->data();
454+    vet2d.reserve(data_inner->size());
455+    for (const auto data_one : *data_inner) {
456+      vet2d.emplace_back(schema::CreateVec(fbb, fbb.CreateVector(data_one->data()->data(), data_one->data()->size())));
457+    }
458   }
459   flatbuffers::Offset<schema::Vec2D> v2d = schema::CreateVec2D(fbb, fbb.CreateVector(vet2d));
460   return v2d;
461 }
462
463 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ConvertQuantParams(
464-  flatbuffers::FlatBufferBuilder &fbb, const std::vector<QuantParam> &quant_params) {
465+  flatbuffers::FlatBufferBuilder &fbb, const QuantParam *quant_params, uint32_t quant_params_size) {
466   std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> tmp_vec;
467-  tmp_vec.reserve(quant_params.size());
468-  for (auto q_param : quant_params) {
469+  tmp_vec.reserve(quant_params_size);
470+  for (uint32_t i = 0; i < quant_params_size; i++) {
471+  QuantParam q_param = quant_params[i];
472     tmp_vec.emplace_back(schema::CreateQuantParam(fbb, q_param.scale, q_param.zeroPoint, 0, 0, true, q_param.numBits));
473   }
474   flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ret_quant_param =
475@@ -89,10 +92,12 @@ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>
476   flatbuffers::FlatBufferBuilder &fbb,
477   const flatbuffers::Vector<flatbuffers::Offset<mindspore::schema::QuantParam>> *quant_params) {
478   std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> tmp_vec;
479-  tmp_vec.reserve(quant_params->size());
480-  for (auto q_param : *quant_params) {
481-    tmp_vec.emplace_back(
482-      schema::CreateQuantParam(fbb, q_param->scale(), q_param->zeroPoint(), 0, 0, true, q_param->numBits()));
483+  if (quant_params != nullptr && quant_params->size() != 0) {
484+    tmp_vec.reserve(quant_params->size());
485+    for (auto q_param : *quant_params) {
486+      tmp_vec.emplace_back(
487+        schema::CreateQuantParam(fbb, q_param->scale(), q_param->zeroPoint(), 0, 0, true, q_param->numBits()));
488+    }
489   }
490   flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ret_quant_param =
491     fbb.CreateVector(tmp_vec.data(), tmp_vec.size());
492diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
493index ca195af4..d8450141 100644
494--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
495+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
496@@ -52,6 +52,12 @@ void NNRTDelegate::InitCachePath() {
497 }
498
499 Status NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
500+  // dequant litegraph
501+  auto ret_dequant = DequantLiteGraph(lite_graph_);
502+  if (ret_dequant != kSuccess) {
503+    MS_LOG(ERROR) << "Dequant litegraph failed.";
504+    return kLiteError;
505+  }
506 #ifdef SUPPORT_NNRT_METAGRAPH
507   if (IsKirinNPU()) {
508     MS_LOG(DEBUG) << "Choose to build nnrt model with Metagraph";
509@@ -121,22 +127,11 @@ Status NNRTDelegate::BuildKirinNPUModel(DelegateModel<schema::Primitive> *model)
510     MS_LOG_DEBUG << "set extension, item name: " << dst_extension.name << ", value size: " << dst_extension.valueSize;
511   }
512
513-  if (IsCustomModel()) {
514-    auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_);
515-    if (ret != OH_NN_SUCCESS) {
516-      MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
517-      OH_NNModel_Destroy(&nn_model);
518-      return kLiteError;
519-    }
520-  } else {
521-    SetKirinModelInputsAndOutputs(nn_model);
522-    auto ret = OH_NNModel_BuildFromMetaGraph(nn_model, meta_graph_, extensions.data(), extensions.size());
523-    FreeLiteGraph(&lite_graph_);
524-    if (ret != OH_NN_SUCCESS) {
525-      MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
526-      OH_NNModel_Destroy(&nn_model);
527-      return kLiteError;
528-    }
529+  auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, extensions.data(), extensions.size());
530+  if (ret != OH_NN_SUCCESS) {
531+    MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
532+    OH_NNModel_Destroy(&nn_model);
533+    return kLiteError;
534   }
535
536   auto ret2 =  CreateFullModelKernel(model, nn_model);
537@@ -147,36 +142,6 @@ Status NNRTDelegate::BuildKirinNPUModel(DelegateModel<schema::Primitive> *model)
538   return kSuccess;
539 }
540
541-std::vector<OH_NN_TensorInfo> NNRTDelegate::CreateNNTensorInfos(const std::vector<uint32_t> &indices) const {
542-  std::vector<OH_NN_TensorInfo> nn_tensor_infos;
543-  for (auto index: indices) {
544-    auto tensor = lite_graph_->all_tensors_[index];
545-    auto shape = tensor->dims();
546-    auto data_type = tensor->dataType();
547-    auto name = tensor->name();
548-    auto format = tensor->format();
549-
550-    OH_NN_TensorInfo info;
551-    info.dataType = CastToNNRTDataType(static_cast<mindspore::DataType>(data_type));
552-    info.dimensions = shape->data();
553-    info.dimensionCount = shape->size();
554-    strcpy(info.name, name->c_str());
555-    info.format = CastToNNRTFormat(static_cast<Format>(format));
556-    nn_tensor_infos.push_back(info);
557-  }
558-  return nn_tensor_infos;
559-}
560-
561-Status NNRTDelegate::SetKirinModelInputsAndOutputs(OH_NNModel *nn_model) {
562-  std::vector<OH_NN_TensorInfo> inputInfos;
563-  std::vector<OH_NN_TensorInfo> outputInfos;
564-  auto input_infos = CreateNNTensorInfos(lite_graph_->input_indices_);
565-  auto output_infos = CreateNNTensorInfos(lite_graph_->output_indices_);
566-  OH_NNModel_SetInputsAndOutputsInfo(nn_model, input_infos.data(), input_infos.size(), output_infos.data(),
567-                                     output_infos.size());
568-  return kSuccess;
569-}
570-
571 Status NNRTDelegate::CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model) {
572   OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
573   if (nn_compilation == nullptr) {
574@@ -277,7 +242,7 @@ OH_NNModel *NNRTDelegate::CreateFullNNModel() {
575     return nullptr;
576   }
577
578-  auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_);
579+  auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, nullptr, 0);
580   if (ret != OH_NN_SUCCESS) {
581     MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
582     OH_NNModel_Destroy(&nn_model);
583@@ -531,7 +496,7 @@ Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive>
584     auto sub_lite_graph = sub_lite_graphs[i];
585
586     OH_NNModel *nn_model = OH_NNModel_Construct();
587-    auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph);
588+    auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph, nullptr, 0);
589     if (ret != OH_NN_SUCCESS) {
590       MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
591       OH_NNModel_Destroy(&nn_model);
592@@ -735,10 +700,6 @@ OH_NN_DataType NNRTDelegate::CastToNNRTDataType(DataType data_type) {
593   return iter->second;
594 }
595
596-OH_NN_Format NNRTDelegate::CastToNNRTFormat(Format format) {
597-  return OH_NN_FORMAT_NHWC;
598-}
599-
600 Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
601                                     OH_NNExecutor *oh_nn_executor) {
602   auto output_tensors = model->outputs();
603@@ -754,6 +715,103 @@ Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
604   return kSuccess;
605 }
606
607+schema::Tensor *NNRTDelegate::TensorToSchemaTensor(Tensor *lite_tensor, schema::Tensor *schema_tensor) {
608+  flatbuffers::FlatBufferBuilder fbb(1024);
609+  auto shape = lite_tensor->shape();
610+  std::vector<int32_t> dim_vec(shape.begin(), shape.end());
611+
612+  auto quant_params = lite_tensor->quant_params();
613+  std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> quant_vec;
614+  quant_vec.reserve(quant_params.size());
615+  for (auto q_param : quant_params) {
616+    quant_vec.emplace_back(schema::CreateQuantParam(fbb, q_param.scale, q_param.zeroPoint, 0, 0, true, q_param.bitNum));
617+  }
618+  auto quant_clusters = lite_tensor->quant_clusters();
619+
620+  auto external_data = schema_tensor->externalData();
621+  std::vector<flatbuffers::Offset<mindspore::schema::ExternalData>> external_data_vec;
622+  if (external_data != nullptr) {
623+    for (auto ed : *external_data) {
624+      external_data_vec.emplace_back(schema::CreateExternalDataDirect(fbb, ed->checkSum()->c_str(), ed->location()->c_str(), 0, ed->length()));
625+    }
626+  }
627+  uint8_t *data_src = reinterpret_cast<uint8_t *>(lite_tensor->data());
628+  std::vector<uint8_t> data_vec(data_src, data_src + lite_tensor->Size());
629+  auto tensor_offset = schema::CreateTensorDirect(fbb, schema_tensor->nodeType(), lite_tensor->data_type(), &dim_vec,
630+                                                  schema_tensor->format(), 0, 0, &data_vec, &quant_vec,
631+                                                  &quant_clusters, schema_tensor->name()->c_str(),
632+                                                  schema_tensor->enableHuffmanCode(),
633+                                                  mindspore::schema::WeightQuantCompressType_NONE, &external_data_vec);
634+  fbb.Finish(tensor_offset);
635+
636+  auto buf = fbb.GetBufferPointer();
637+  if (buf == nullptr) {
638+    MS_LOG(ERROR) << "GetBufferPointer return nullptr";
639+    fbb.Clear();
640+    return nullptr;
641+  }
642+  size_t byte_num = fbb.GetSize();
643+  auto tensor_buf = reinterpret_cast<char *>(malloc(byte_num));
644+  if (tensor_buf == nullptr) {
645+    MS_LOG(ERROR) << "malloc primitive_buf_ failed";
646+    fbb.Clear();
647+    return nullptr;
648+  }
649+  memcpy(tensor_buf, buf, fbb.GetSize());
650+  auto tensor = flatbuffers::GetRoot<schema::Tensor>(tensor_buf);
651+  fbb.Clear();
652+  return const_cast<schema::Tensor *>(tensor);
653+}
654+
655+int NNRTDelegate::DequantNodeInputs(LiteGraph::Node *node) {
656+  auto in_size = node->input_indices_.size();
657+  int ret = RET_OK;
658+  for (size_t i = 0; i < in_size; i++) {
659+    auto tensor_index = node->input_indices_[i];
660+    auto *src_tensor = lite_graph_->all_tensors_[tensor_index];
661+    auto input = dequant_src_tensors_->at(tensor_index);
662+    if (!input->IsConst() || !(src_tensor->dataType() == kNumberTypeInt8 ||
663+        src_tensor->dataType() == kNumberTypeInt16 || src_tensor->dataType() == kNumberTypeInt32)) {
664+      continue;
665+    }
666+    auto dst_tensor = TensorToSchemaTensor(input, src_tensor);
667+    if (dst_tensor != nullptr) {
668+      dequant_schema_tensors_.emplace(tensor_index, dst_tensor);
669+      replaced_schema_tensors_.emplace_back(src_tensor);
670+    } else {
671+      MS_LOG(ERROR) << "create dequant schema tensor failed, node: " << node->name_ << ", tensor_index: "
672+                    << tensor_index;
673+      ret = RET_ERROR;
674+      break;
675+    }
676+  }
677+  return ret;
678+}
679+
680+Status NNRTDelegate::DequantLiteGraph(LiteGraph *lite_graph) {
681+  for (auto node_index : lite_graph->sub_graphs_[0]->node_indices_) {
682+    auto node = lite_graph->all_nodes_[node_index];
683+
684+    if (node->quant_type_ != static_cast<int>(schema::QuantType_QUANT_WEIGHT)) {
685+      continue;
686+    }
687+    auto ret = DequantNodeInputs(node);
688+    if (ret != RET_OK) {
689+      MS_LOG(ERROR) << "Dequant node failed: " << ret << ", node_name: " << node->name_;
690+      for (auto iter : dequant_schema_tensors_) {
691+        delete iter.second;
692+        iter.second = nullptr;
693+      }
694+      return kLiteNotSupport;
695+    }
696+    node->quant_type_ = schema::QuantType_QUANT_NONE;
697+  }
698+  for (auto iter : dequant_schema_tensors_) {
699+    lite_graph_->all_tensors_[iter.first] = iter.second;
700+  }
701+  return kSuccess;
702+}
703+
704 void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
705   Status ret;
706   for (auto node : lite_graph.all_nodes_) {
707@@ -863,6 +921,10 @@ NNRTDelegate::~NNRTDelegate() {
708   if (lite_graph_ != nullptr) {
709     MS_LOG(ERROR) << "Delete NNRTDelegate.";
710   }
711+  for (auto iter : dequant_schema_tensors_) {
712+    delete iter.second;
713+    iter.second = nullptr;
714+  }
715 }
716 }  // namespace lite
717 }  // namespace mindspore
718diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
719index 4cf357d6..778553ef 100644
720--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
721+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
722@@ -50,6 +50,9 @@ class NNRTDelegate : public Delegate {
723   void SetMetaGraph(const void *meta_graph) {
724     meta_graph_ = meta_graph;
725   }
726+  void SetDequantTensors(std::vector<Tensor *> *src_tensors) {
727+    dequant_src_tensors_ = src_tensors;
728+  }
729   static std::vector<NNRTOpRange> GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model,
730                                                         const std::vector<bool> &op_supports);
731
732@@ -73,14 +76,14 @@ class NNRTDelegate : public Delegate {
733   Status PrepareOutputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor);
734   Status InitNNCompilation(OH_NNCompilation *nn_compilation) const;
735   static OH_NN_DataType CastToNNRTDataType(mindspore::DataType data_type);
736-  static OH_NN_Format CastToNNRTFormat(Format format);
737   bool IsCustomModel() const;
738+  Status DequantLiteGraph(LiteGraph *lite_graph);
739+  int DequantNodeInputs(LiteGraph::Node *node);
740+  schema::Tensor *TensorToSchemaTensor(Tensor *lite_tensor, schema::Tensor *schema_tensor);
741
742 #ifdef SUPPORT_NNRT_METAGRAPH
743   bool IsKirinNPU() const;
744   Status BuildKirinNPUModel(DelegateModel<schema::Primitive> *model);
745-  Status SetKirinModelInputsAndOutputs(OH_NNModel *nn_model);
746-  std::vector<OH_NN_TensorInfo> CreateNNTensorInfos(const std::vector<uint32_t> &indices) const;
747   Status CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model);
748 #endif
749
750@@ -90,6 +93,9 @@ class NNRTDelegate : public Delegate {
751   std::string cache_path_ = "";
752   uint32_t cache_version_ = 0;
753   std::vector<OH_NNExecutor *> nn_executor_list_;
754+  std::vector<Tensor *> *dequant_src_tensors_;
755+  std::map<uint32_t, schema::Tensor *> dequant_schema_tensors_;
756+  std::vector<schema::Tensor *> replaced_schema_tensors_;
757 };
758 }  // namespace lite
759 }  // namespace mindspore
760diff --git a/mindspore/lite/src/litert/scheduler.cc b/mindspore/lite/src/litert/scheduler.cc
761index 96efd972..d6749471 100644
762--- a/mindspore/lite/src/litert/scheduler.cc
763+++ b/mindspore/lite/src/litert/scheduler.cc
764@@ -514,6 +514,7 @@ int Scheduler::ReplaceDelegateKernels(std::vector<kernel::KernelExec *> *dst_ker
765     void *meta_graph = reinterpret_cast<void *>(
766       const_cast<mindspore::schema::MetaGraph *>(mindspore::schema::GetMetaGraph(this->src_model_->buf)));
767     delegate->SetMetaGraph(meta_graph);
768+    delegate->SetDequantTensors(this->src_tensors_);
769   }
770 #endif
771
772--
7732.17.1
774
775