1From aa38d5a95960e60f6a90a1ffa2958a6ebcee2b4e Mon Sep 17 00:00:00 2001 2From: chengfeng27 <chengfeng27@huawei.com> 3Date: Thu, 18 Apr 2024 09:39:33 +0800 4Subject: [PATCH] nnrt litegraph dequant 5 6--- 7 mindspore/lite/mindir/include/mindir_tensor.h | 6 +- 8 mindspore/lite/mindir/include/mindir_types.h | 28 ++- 9 mindspore/lite/mindir/inner_headers/utils.h | 2 +- 10 mindspore/lite/mindir/src/mindir.cc | 93 ++++++++++ 11 mindspore/lite/mindir/src/mindir_tensor.cc | 14 +- 12 mindspore/lite/mindir/src/utils.cc | 27 +-- 13 .../src/litert/delegate/nnrt/nnrt_delegate.cc | 166 ++++++++++++------ 14 .../src/litert/delegate/nnrt/nnrt_delegate.h | 12 +- 15 mindspore/lite/src/litert/scheduler.cc | 1 + 16 9 files changed, 270 insertions(+), 79 deletions(-) 17 18diff --git a/mindspore/lite/mindir/include/mindir_tensor.h b/mindspore/lite/mindir/include/mindir_tensor.h 19index c1ac89bf..43c1478c 100644 20--- a/mindspore/lite/mindir/include/mindir_tensor.h 21+++ b/mindspore/lite/mindir/include/mindir_tensor.h 22@@ -8,9 +8,9 @@ namespace lite { 23 24 // ********** Tensor ********** 25 TensorPtr MindIR_Tensor_Create(); 26-TensorPtr MindIR_Tensor_Create(const std::string &name, DataType data_type, const std::vector<int32_t> &dims, 27- Format format, const std::vector<uint8_t> &data, 28- const std::vector<QuantParam> &quant_params); 29+TensorPtr MindIR_Tensor_Create(const char *name, DataType data_type, const int32_t *dims, uint32_t dims_size, 30+ Format format, const uint8_t *data, uint32_t data_size, 31+ const QuantParam *quant_params, uint32_t quant_params_size); 32 std::string MindIR_Tensor_GetName(ConstTensorPtr tensor); 33 void MindIR_Tensor_SetName(TensorPtr *tensor, const std::string &name); 34 DataType MindIR_Tensor_GetDataType(ConstTensorPtr tensor); 35diff --git a/mindspore/lite/mindir/include/mindir_types.h b/mindspore/lite/mindir/include/mindir_types.h 36index 5744441a..196995fa 100644 37--- a/mindspore/lite/mindir/include/mindir_types.h 38+++ b/mindspore/lite/mindir/include/mindir_types.h 39@@ -44,11 +44,35 @@ enum DataType : int8_t { 40 enum Format : int8_t { 41 FORMAT_NCHW = 0, 42 FORMAT_NHWC = 1, 43+ FORMAT_NHWC4 = 2, 44+ FORMAT_HWKC = 3, 45+ FORMAT_HWCK = 4, 46+ FORMAT_KCHW = 5, 47+ FORMAT_CKHW = 6, 48+ FORMAT_KHWC = 7, 49+ FORMAT_CHWK = 8, 50+ FORMAT_HW = 9, 51+ FORMAT_HW4 = 10, 52+ FORMAT_NC = 11, 53+ FORMAT_NC4 = 12, 54+ FORMAT_NC4HW4 = 13, 55+ FORMAT_NUM_OF_FORMAT = 14, 56+ FORMAT_NCDHW = 15, 57+ FORMAT_NWC = 16, 58+ FORMAT_NCW = 17, 59+ FORMAT_NC8HW8 = 18, 60+ FORMAT_MIN = FORMAT_NCHW, 61+ FORMAT_MAX = FORMAT_NC8HW8 62 }; 63 64 enum QuantType : int8_t { 65- QUANT_TYPE_NONE, 66- QUANT_TYPE_ALL, 67+ QUANT_TYPE_NONE = 0, 68+ QUANT_TYPE_AWARETRAINING = 1, 69+ QUANT_TYPE_WEIGHTQUANT = 2, 70+ QUANT_TYPE_POSTTRAINING = 3, 71+ QUANT_TYPE_WEIGHT = 4, 72+ QUANT_TYPE_ALL = 5, 73+ QUANT_TYPE_DYNAMIC = 6 74 }; 75 76 enum NodeType : uint32_t { 77diff --git a/mindspore/lite/mindir/inner_headers/utils.h b/mindspore/lite/mindir/inner_headers/utils.h 78index 0e6eb35d..0d150f80 100644 79--- a/mindspore/lite/mindir/inner_headers/utils.h 80+++ b/mindspore/lite/mindir/inner_headers/utils.h 81@@ -17,7 +17,7 @@ flatbuffers::Offset<schema::Vec2D> CreateVec2D(flatbuffers::FlatBufferBuilder &f 82 mindspore::schema::PrimitiveType MindIR_GetPrimitiveType(PrimitivePtr prim); 83 84 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ConvertQuantParams( 85- flatbuffers::FlatBufferBuilder &fbb, const std::vector<QuantParam> &quant_params); 86+ flatbuffers::FlatBufferBuilder &fbb, const QuantParam *quant_params, uint32_t quant_params_size); 87 88 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ConvertQuantParams( 89 flatbuffers::FlatBufferBuilder &fbb, 90diff --git a/mindspore/lite/mindir/src/mindir.cc b/mindspore/lite/mindir/src/mindir.cc 91index 7041498a..a1f86671 100644 92--- a/mindspore/lite/mindir/src/mindir.cc 93+++ b/mindspore/lite/mindir/src/mindir.cc 94@@ -398,6 +398,9 @@ std::vector<int64_t> MindIR_AvgPoolFusion_GetKernelSize(ConstPrimitivePtr primit 95 if (prim != nullptr && value != nullptr) { 96 std::vector<int64_t> result; 97 auto src = value->kernel_size(); 98+ if (src == nullptr) { 99+ return {}; 100+ } 101 result.resize(src->size()); 102 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 103 return result; 104@@ -437,6 +440,9 @@ std::vector<int64_t> MindIR_AvgPoolFusion_GetStrides(ConstPrimitivePtr primitive 105 if (prim != nullptr && value != nullptr) { 106 std::vector<int64_t> result; 107 auto src = value->strides(); 108+ if (src == nullptr) { 109+ return {}; 110+ } 111 result.resize(src->size()); 112 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 113 return result; 114@@ -476,6 +482,9 @@ std::vector<int64_t> MindIR_AvgPoolFusion_GetPad(ConstPrimitivePtr primitive) { 115 if (prim != nullptr && value != nullptr) { 116 std::vector<int64_t> result; 117 auto src = value->pad(); 118+ if (src == nullptr) { 119+ return {}; 120+ } 121 result.resize(src->size()); 122 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 123 return result; 124@@ -712,6 +721,9 @@ std::vector<int64_t> MindIR_BatchToSpaceND_GetBlockShape(ConstPrimitivePtr primi 125 if (prim != nullptr && value != nullptr) { 126 std::vector<int64_t> result; 127 auto src = value->block_shape(); 128+ if (src == nullptr) { 129+ return {}; 130+ } 131 result.resize(src->size()); 132 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 133 return result; 134@@ -747,6 +759,9 @@ std::vector<std::vector<int64_t>> MindIR_BatchToSpaceND_GetCrops(ConstPrimitiveP 135 if (prim != nullptr && value != nullptr) { 136 std::vector<std::vector<int64_t>> out; 137 auto src = value->crops(); 138+ if (src == nullptr) { 139+ return {}; 140+ } 141 for (auto sub_list : *src->data()) { 142 std::vector<int64_t> result_tmp; 143 result_tmp.resize(sub_list->data()->size()); 144@@ -871,6 +886,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetKernelSize(ConstPrimitivePtr primiti 145 if (prim != nullptr && value != nullptr) { 146 std::vector<int64_t> result; 147 auto src = value->kernel_size(); 148+ if (src == nullptr) { 149+ return {}; 150+ } 151 result.resize(src->size()); 152 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 153 return result; 154@@ -911,6 +929,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetStride(ConstPrimitivePtr primitive) 155 if (prim != nullptr && value != nullptr) { 156 std::vector<int64_t> result; 157 auto src = value->stride(); 158+ if (src == nullptr) { 159+ return {}; 160+ } 161 result.resize(src->size()); 162 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 163 return result; 164@@ -952,6 +973,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetDilation(ConstPrimitivePtr primitive 165 if (prim != nullptr && value != nullptr) { 166 std::vector<int64_t> result; 167 auto src = value->dilation(); 168+ if (src == nullptr) { 169+ return {}; 170+ } 171 result.resize(src->size()); 172 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 173 return result; 174@@ -1030,6 +1054,9 @@ std::vector<int64_t> MindIR_Conv2DFusion_GetPadList(ConstPrimitivePtr primitive) 175 if (prim != nullptr && value != nullptr) { 176 std::vector<int64_t> result; 177 auto src = value->pad_list(); 178+ if (src == nullptr) { 179+ return {}; 180+ } 181 result.resize(src->size()); 182 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 183 return result; 184@@ -1281,6 +1308,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetKernelSize(ConstPrimitivePt 185 if (prim != nullptr && value != nullptr) { 186 std::vector<int64_t> result; 187 auto src = value->kernel_size(); 188+ if (src == nullptr) { 189+ return {}; 190+ } 191 result.resize(src->size()); 192 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 193 return result; 194@@ -1322,6 +1352,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetStride(ConstPrimitivePtr pr 195 if (prim != nullptr && value != nullptr) { 196 std::vector<int64_t> result; 197 auto src = value->stride(); 198+ if (src == nullptr) { 199+ return {}; 200+ } 201 result.resize(src->size()); 202 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 203 return result; 204@@ -1364,6 +1397,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetDilation(ConstPrimitivePtr 205 if (prim != nullptr && value != nullptr) { 206 std::vector<int64_t> result; 207 auto src = value->dilation(); 208+ if (src == nullptr) { 209+ return {}; 210+ } 211 result.resize(src->size()); 212 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 213 return result; 214@@ -1444,6 +1480,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetPadList(ConstPrimitivePtr p 215 if (prim != nullptr && value != nullptr) { 216 std::vector<int64_t> result; 217 auto src = value->pad_list(); 218+ if (src == nullptr) { 219+ return {}; 220+ } 221 result.resize(src->size()); 222 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 223 return result; 224@@ -1640,6 +1679,9 @@ std::vector<int64_t> MindIR_Conv2dTransposeFusion_GetOutputPaddings(ConstPrimiti 225 if (prim != nullptr && value != nullptr) { 226 std::vector<int64_t> result; 227 auto src = value->output_paddings(); 228+ if (src == nullptr) { 229+ return {}; 230+ } 231 result.resize(src->size()); 232 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 233 return result; 234@@ -2273,6 +2315,9 @@ std::vector<int64_t> MindIR_MaxPoolFusion_GetKernelSize(ConstPrimitivePtr primit 235 if (prim != nullptr && value != nullptr) { 236 std::vector<int64_t> result; 237 auto src = value->kernel_size(); 238+ if (src == nullptr) { 239+ return {}; 240+ } 241 result.resize(src->size()); 242 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 243 return result; 244@@ -2312,6 +2357,9 @@ std::vector<int64_t> MindIR_MaxPoolFusion_GetStrides(ConstPrimitivePtr primitive 245 if (prim != nullptr && value != nullptr) { 246 std::vector<int64_t> result; 247 auto src = value->strides(); 248+ if (src == nullptr) { 249+ return {}; 250+ } 251 result.resize(src->size()); 252 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 253 return result; 254@@ -2351,6 +2399,9 @@ std::vector<int64_t> MindIR_MaxPoolFusion_GetPad(ConstPrimitivePtr primitive) { 255 if (prim != nullptr && value != nullptr) { 256 std::vector<int64_t> result; 257 auto src = value->pad(); 258+ if (src == nullptr) { 259+ return {}; 260+ } 261 result.resize(src->size()); 262 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 263 return result; 264@@ -2680,6 +2731,9 @@ std::vector<std::vector<int64_t>> MindIR_PadFusion_GetPaddings(ConstPrimitivePtr 265 if (prim != nullptr && value != nullptr) { 266 std::vector<std::vector<int64_t>> out; 267 auto src = value->paddings(); 268+ if (src == nullptr) { 269+ return {}; 270+ } 271 for (auto sub_list : *src->data()) { 272 std::vector<int64_t> result_tmp; 273 result_tmp.resize(sub_list->data()->size()); 274@@ -3601,6 +3655,9 @@ std::vector<int64_t> MindIR_SliceFusion_GetAxes(ConstPrimitivePtr primitive) { 275 if (prim != nullptr && value != nullptr) { 276 std::vector<int64_t> result; 277 auto src = value->axes(); 278+ if (src == nullptr) { 279+ return {}; 280+ } 281 result.resize(src->size()); 282 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 283 return result; 284@@ -3646,6 +3703,9 @@ std::vector<int64_t> MindIR_Softmax_GetAxis(ConstPrimitivePtr primitive) { 285 if (prim != nullptr && value != nullptr) { 286 std::vector<int64_t> result; 287 auto src = value->axis(); 288+ if (src == nullptr) { 289+ return {}; 290+ } 291 result.resize(src->size()); 292 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 293 return result; 294@@ -3694,6 +3754,9 @@ std::vector<int64_t> MindIR_SpaceToBatchND_GetBlockShape(ConstPrimitivePtr primi 295 if (prim != nullptr && value != nullptr) { 296 std::vector<int64_t> result; 297 auto src = value->block_shape(); 298+ if (src == nullptr) { 299+ return {}; 300+ } 301 result.resize(src->size()); 302 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 303 return result; 304@@ -3729,6 +3792,9 @@ std::vector<std::vector<int64_t>> MindIR_SpaceToBatchND_GetPaddings(ConstPrimiti 305 if (prim != nullptr && value != nullptr) { 306 std::vector<std::vector<int64_t>> out; 307 auto src = value->paddings(); 308+ if (src == nullptr) { 309+ return {}; 310+ } 311 for (auto sub_list : *src->data()) { 312 std::vector<int64_t> result_tmp; 313 result_tmp.resize(sub_list->data()->size()); 314@@ -3812,6 +3878,9 @@ std::vector<int64_t> MindIR_Split_GetSizeSplits(ConstPrimitivePtr primitive) { 315 if (prim != nullptr && value != nullptr) { 316 std::vector<int64_t> result; 317 auto src = value->size_splits(); 318+ if (src == nullptr) { 319+ return {}; 320+ } 321 result.resize(src->size()); 322 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 323 return result; 324@@ -3912,6 +3981,9 @@ std::vector<int64_t> MindIR_Squeeze_GetAxis(ConstPrimitivePtr primitive) { 325 if (prim != nullptr && value != nullptr) { 326 std::vector<int64_t> result; 327 auto src = value->axis(); 328+ if (src == nullptr) { 329+ return {}; 330+ } 331 result.resize(src->size()); 332 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 333 return result; 334@@ -4212,6 +4284,9 @@ std::vector<int64_t> MindIR_TileFusion_GetDims(ConstPrimitivePtr primitive) { 335 if (prim != nullptr && value != nullptr) { 336 std::vector<int64_t> result; 337 auto src = value->dims(); 338+ if (src == nullptr) { 339+ return {}; 340+ } 341 result.resize(src->size()); 342 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 343 return result; 344@@ -4342,6 +4417,9 @@ std::vector<int64_t> MindIR_Unsqueeze_GetAxis(ConstPrimitivePtr primitive) { 345 if (prim != nullptr && value != nullptr) { 346 std::vector<int64_t> result; 347 auto src = value->axis(); 348+ if (src == nullptr) { 349+ return {}; 350+ } 351 result.resize(src->size()); 352 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 353 return result; 354@@ -4399,6 +4477,9 @@ std::vector<int64_t> MindIR_BroadcastTo_GetShape(ConstPrimitivePtr primitive) { 355 if (prim != nullptr && value != nullptr) { 356 std::vector<int64_t> result; 357 auto src = value->shape(); 358+ if (src == nullptr) { 359+ return {}; 360+ } 361 result.resize(src->size()); 362 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 363 return result; 364@@ -4477,6 +4558,9 @@ std::vector<float> MindIR_ConstantOfShape_GetValue(ConstPrimitivePtr primitive) 365 if (prim != nullptr && value_ != nullptr) { 366 std::vector<float> result; 367 auto src = value_->value(); 368+ if (src == nullptr) { 369+ return {}; 370+ } 371 result.resize(src->size()); 372 std::transform(src->begin(), src->end(), result.begin(), [](float item) { return item; }); 373 return result; 374@@ -5889,6 +5973,9 @@ std::vector<int64_t> MindIR_L2NormalizeFusion_GetAxis(ConstPrimitivePtr primitiv 375 if (prim != nullptr && value != nullptr) { 376 std::vector<int64_t> result; 377 auto src = value->axis(); 378+ if (src == nullptr) { 379+ return {}; 380+ } 381 result.resize(src->size()); 382 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 383 return result; 384@@ -6238,6 +6325,9 @@ std::vector<int64_t> MindIR_Crop_GetOffsets(ConstPrimitivePtr primitive) { 385 if (prim != nullptr && value != nullptr) { 386 std::vector<int64_t> result; 387 auto src = value->offsets(); 388+ if (src == nullptr) { 389+ return {}; 390+ } 391 result.resize(src->size()); 392 std::transform(src->begin(), src->end(), result.begin(), [](int64_t item) { return item; }); 393 return result; 394@@ -6348,6 +6438,9 @@ std::vector<float> MindIR_DetectionPostProcess_GetScale(ConstPrimitivePtr primit 395 if (prim != nullptr && value != nullptr) { 396 std::vector<float> result; 397 auto src = value->scale(); 398+ if (src == nullptr) { 399+ return {}; 400+ } 401 result.resize(src->size()); 402 std::transform(src->begin(), src->end(), result.begin(), [](float item) { return item; }); 403 return result; 404diff --git a/mindspore/lite/mindir/src/mindir_tensor.cc b/mindspore/lite/mindir/src/mindir_tensor.cc 405index 9575f8c2..8888e2c9 100644 406--- a/mindspore/lite/mindir/src/mindir_tensor.cc 407+++ b/mindspore/lite/mindir/src/mindir_tensor.cc 408@@ -36,15 +36,15 @@ TensorPtr MindIR_Tensor_Create() { 409 return ret_value; 410 } 411 412-TensorPtr MindIR_Tensor_Create(const std::string &name, DataType data_type, const std::vector<int32_t> &dims, 413- Format format, const std::vector<uint8_t> &data, 414- const std::vector<QuantParam> &quant_params) { 415+TensorPtr MindIR_Tensor_Create(const char *name, DataType data_type, const int32_t *dims, uint32_t dims_size, 416+ Format format, const uint8_t *data, uint32_t data_size, 417+ const QuantParam *quant_params, uint32_t quant_params_size) { 418 flatbuffers::FlatBufferBuilder fbb; 419 420 auto ops_offset = 421- schema::CreateTensor(fbb, 0, data_type, fbb.CreateVector(dims.data(), dims.size()), 422- static_cast<schema::Format>(format), 0, 0, fbb.CreateVector(data.data(), data.size()), 423- ConvertQuantParams(fbb, quant_params), 0, fbb.CreateString(name.c_str(), name.size())); 424+ schema::CreateTensor(fbb, 0, data_type, fbb.CreateVector(dims, dims_size), 425+ static_cast<schema::Format>(format), 0, 0, fbb.CreateVector(data, data_size), 426+ ConvertQuantParams(fbb, quant_params, quant_params_size), 0, fbb.CreateString(name, strlen(name))); 427 fbb.Finish(ops_offset); 428 auto new_addr = MindIRMemoryManager::GetInstance()->CreateTensorFromBuilder(fbb, nullptr); 429 auto ret_value = flatbuffers::GetMutableRoot<schema::Tensor>(new_addr); 430@@ -332,7 +332,7 @@ void MindIR_Tensor_SetQuantParams(TensorPtr *tensor, const std::vector<QuantPara 431 } 432 auto ops_offset = 433 schema::CreateTensor(fbb, 0, value->dataType(), dims, static_cast<schema::Format>(value->format()), 0, 0, data, 434- ConvertQuantParams(fbb, quant_params), 0, name); 435+ ConvertQuantParams(fbb, quant_params.data(), quant_params.size()), 0, name); 436 fbb.Finish(ops_offset); 437 auto new_addr = MindIRMemoryManager::GetInstance()->CreateTensorFromBuilder(fbb, value); 438 auto ret_value = flatbuffers::GetMutableRoot<schema::Primitive>(new_addr); 439diff --git a/mindspore/lite/mindir/src/utils.cc b/mindspore/lite/mindir/src/utils.cc 440index b044f414..870802a9 100644 441--- a/mindspore/lite/mindir/src/utils.cc 442+++ b/mindspore/lite/mindir/src/utils.cc 443@@ -63,21 +63,24 @@ flatbuffers::Offset<schema::Vec2D> CreateVec2D(flatbuffers::FlatBufferBuilder &f 444 } 445 flatbuffers::Offset<schema::Vec2D> CreateVec2D(flatbuffers::FlatBufferBuilder &fbb, 446 const mindspore::schema::Vec2D *data) { 447- auto data_inner = data->data(); 448 std::vector<flatbuffers::Offset<schema::Vec>> vet2d; 449- vet2d.reserve(data_inner->size()); 450- for (const auto data_one : *data_inner) { 451- vet2d.emplace_back(schema::CreateVec(fbb, fbb.CreateVector(data_one->data()->data(), data_one->data()->size()))); 452+ if (data != nullptr) { 453+ auto data_inner = data->data(); 454+ vet2d.reserve(data_inner->size()); 455+ for (const auto data_one : *data_inner) { 456+ vet2d.emplace_back(schema::CreateVec(fbb, fbb.CreateVector(data_one->data()->data(), data_one->data()->size()))); 457+ } 458 } 459 flatbuffers::Offset<schema::Vec2D> v2d = schema::CreateVec2D(fbb, fbb.CreateVector(vet2d)); 460 return v2d; 461 } 462 463 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ConvertQuantParams( 464- flatbuffers::FlatBufferBuilder &fbb, const std::vector<QuantParam> &quant_params) { 465+ flatbuffers::FlatBufferBuilder &fbb, const QuantParam *quant_params, uint32_t quant_params_size) { 466 std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> tmp_vec; 467- tmp_vec.reserve(quant_params.size()); 468- for (auto q_param : quant_params) { 469+ tmp_vec.reserve(quant_params_size); 470+ for (uint32_t i = 0; i < quant_params_size; i++) { 471+ QuantParam q_param = quant_params[i]; 472 tmp_vec.emplace_back(schema::CreateQuantParam(fbb, q_param.scale, q_param.zeroPoint, 0, 0, true, q_param.numBits)); 473 } 474 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ret_quant_param = 475@@ -89,10 +92,12 @@ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>> 476 flatbuffers::FlatBufferBuilder &fbb, 477 const flatbuffers::Vector<flatbuffers::Offset<mindspore::schema::QuantParam>> *quant_params) { 478 std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> tmp_vec; 479- tmp_vec.reserve(quant_params->size()); 480- for (auto q_param : *quant_params) { 481- tmp_vec.emplace_back( 482- schema::CreateQuantParam(fbb, q_param->scale(), q_param->zeroPoint(), 0, 0, true, q_param->numBits())); 483+ if (quant_params != nullptr && quant_params->size() != 0) { 484+ tmp_vec.reserve(quant_params->size()); 485+ for (auto q_param : *quant_params) { 486+ tmp_vec.emplace_back( 487+ schema::CreateQuantParam(fbb, q_param->scale(), q_param->zeroPoint(), 0, 0, true, q_param->numBits())); 488+ } 489 } 490 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>>> ret_quant_param = 491 fbb.CreateVector(tmp_vec.data(), tmp_vec.size()); 492diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc 493index ca195af4..d8450141 100644 494--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc 495+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc 496@@ -52,6 +52,12 @@ void NNRTDelegate::InitCachePath() { 497 } 498 499 Status NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) { 500+ // dequant litegraph 501+ auto ret_dequant = DequantLiteGraph(lite_graph_); 502+ if (ret_dequant != kSuccess) { 503+ MS_LOG(ERROR) << "Dequant litegraph failed."; 504+ return kLiteError; 505+ } 506 #ifdef SUPPORT_NNRT_METAGRAPH 507 if (IsKirinNPU()) { 508 MS_LOG(DEBUG) << "Choose to build nnrt model with Metagraph"; 509@@ -121,22 +127,11 @@ Status NNRTDelegate::BuildKirinNPUModel(DelegateModel<schema::Primitive> *model) 510 MS_LOG_DEBUG << "set extension, item name: " << dst_extension.name << ", value size: " << dst_extension.valueSize; 511 } 512 513- if (IsCustomModel()) { 514- auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_); 515- if (ret != OH_NN_SUCCESS) { 516- MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; 517- OH_NNModel_Destroy(&nn_model); 518- return kLiteError; 519- } 520- } else { 521- SetKirinModelInputsAndOutputs(nn_model); 522- auto ret = OH_NNModel_BuildFromMetaGraph(nn_model, meta_graph_, extensions.data(), extensions.size()); 523- FreeLiteGraph(&lite_graph_); 524- if (ret != OH_NN_SUCCESS) { 525- MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; 526- OH_NNModel_Destroy(&nn_model); 527- return kLiteError; 528- } 529+ auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, extensions.data(), extensions.size()); 530+ if (ret != OH_NN_SUCCESS) { 531+ MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; 532+ OH_NNModel_Destroy(&nn_model); 533+ return kLiteError; 534 } 535 536 auto ret2 = CreateFullModelKernel(model, nn_model); 537@@ -147,36 +142,6 @@ Status NNRTDelegate::BuildKirinNPUModel(DelegateModel<schema::Primitive> *model) 538 return kSuccess; 539 } 540 541-std::vector<OH_NN_TensorInfo> NNRTDelegate::CreateNNTensorInfos(const std::vector<uint32_t> &indices) const { 542- std::vector<OH_NN_TensorInfo> nn_tensor_infos; 543- for (auto index: indices) { 544- auto tensor = lite_graph_->all_tensors_[index]; 545- auto shape = tensor->dims(); 546- auto data_type = tensor->dataType(); 547- auto name = tensor->name(); 548- auto format = tensor->format(); 549- 550- OH_NN_TensorInfo info; 551- info.dataType = CastToNNRTDataType(static_cast<mindspore::DataType>(data_type)); 552- info.dimensions = shape->data(); 553- info.dimensionCount = shape->size(); 554- strcpy(info.name, name->c_str()); 555- info.format = CastToNNRTFormat(static_cast<Format>(format)); 556- nn_tensor_infos.push_back(info); 557- } 558- return nn_tensor_infos; 559-} 560- 561-Status NNRTDelegate::SetKirinModelInputsAndOutputs(OH_NNModel *nn_model) { 562- std::vector<OH_NN_TensorInfo> inputInfos; 563- std::vector<OH_NN_TensorInfo> outputInfos; 564- auto input_infos = CreateNNTensorInfos(lite_graph_->input_indices_); 565- auto output_infos = CreateNNTensorInfos(lite_graph_->output_indices_); 566- OH_NNModel_SetInputsAndOutputsInfo(nn_model, input_infos.data(), input_infos.size(), output_infos.data(), 567- output_infos.size()); 568- return kSuccess; 569-} 570- 571 Status NNRTDelegate::CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model) { 572 OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model); 573 if (nn_compilation == nullptr) { 574@@ -277,7 +242,7 @@ OH_NNModel *NNRTDelegate::CreateFullNNModel() { 575 return nullptr; 576 } 577 578- auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_); 579+ auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, nullptr, 0); 580 if (ret != OH_NN_SUCCESS) { 581 MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; 582 OH_NNModel_Destroy(&nn_model); 583@@ -531,7 +496,7 @@ Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> 584 auto sub_lite_graph = sub_lite_graphs[i]; 585 586 OH_NNModel *nn_model = OH_NNModel_Construct(); 587- auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph); 588+ auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph, nullptr, 0); 589 if (ret != OH_NN_SUCCESS) { 590 MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; 591 OH_NNModel_Destroy(&nn_model); 592@@ -735,10 +700,6 @@ OH_NN_DataType NNRTDelegate::CastToNNRTDataType(DataType data_type) { 593 return iter->second; 594 } 595 596-OH_NN_Format NNRTDelegate::CastToNNRTFormat(Format format) { 597- return OH_NN_FORMAT_NHWC; 598-} 599- 600 Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model, 601 OH_NNExecutor *oh_nn_executor) { 602 auto output_tensors = model->outputs(); 603@@ -754,6 +715,103 @@ Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model, 604 return kSuccess; 605 } 606 607+schema::Tensor *NNRTDelegate::TensorToSchemaTensor(Tensor *lite_tensor, schema::Tensor *schema_tensor) { 608+ flatbuffers::FlatBufferBuilder fbb(1024); 609+ auto shape = lite_tensor->shape(); 610+ std::vector<int32_t> dim_vec(shape.begin(), shape.end()); 611+ 612+ auto quant_params = lite_tensor->quant_params(); 613+ std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> quant_vec; 614+ quant_vec.reserve(quant_params.size()); 615+ for (auto q_param : quant_params) { 616+ quant_vec.emplace_back(schema::CreateQuantParam(fbb, q_param.scale, q_param.zeroPoint, 0, 0, true, q_param.bitNum)); 617+ } 618+ auto quant_clusters = lite_tensor->quant_clusters(); 619+ 620+ auto external_data = schema_tensor->externalData(); 621+ std::vector<flatbuffers::Offset<mindspore::schema::ExternalData>> external_data_vec; 622+ if (external_data != nullptr) { 623+ for (auto ed : *external_data) { 624+ external_data_vec.emplace_back(schema::CreateExternalDataDirect(fbb, ed->checkSum()->c_str(), ed->location()->c_str(), 0, ed->length())); 625+ } 626+ } 627+ uint8_t *data_src = reinterpret_cast<uint8_t *>(lite_tensor->data()); 628+ std::vector<uint8_t> data_vec(data_src, data_src + lite_tensor->Size()); 629+ auto tensor_offset = schema::CreateTensorDirect(fbb, schema_tensor->nodeType(), lite_tensor->data_type(), &dim_vec, 630+ schema_tensor->format(), 0, 0, &data_vec, &quant_vec, 631+ &quant_clusters, schema_tensor->name()->c_str(), 632+ schema_tensor->enableHuffmanCode(), 633+ mindspore::schema::WeightQuantCompressType_NONE, &external_data_vec); 634+ fbb.Finish(tensor_offset); 635+ 636+ auto buf = fbb.GetBufferPointer(); 637+ if (buf == nullptr) { 638+ MS_LOG(ERROR) << "GetBufferPointer return nullptr"; 639+ fbb.Clear(); 640+ return nullptr; 641+ } 642+ size_t byte_num = fbb.GetSize(); 643+ auto tensor_buf = reinterpret_cast<char *>(malloc(byte_num)); 644+ if (tensor_buf == nullptr) { 645+ MS_LOG(ERROR) << "malloc primitive_buf_ failed"; 646+ fbb.Clear(); 647+ return nullptr; 648+ } 649+ memcpy(tensor_buf, buf, fbb.GetSize()); 650+ auto tensor = flatbuffers::GetRoot<schema::Tensor>(tensor_buf); 651+ fbb.Clear(); 652+ return const_cast<schema::Tensor *>(tensor); 653+} 654+ 655+int NNRTDelegate::DequantNodeInputs(LiteGraph::Node *node) { 656+ auto in_size = node->input_indices_.size(); 657+ int ret = RET_OK; 658+ for (size_t i = 0; i < in_size; i++) { 659+ auto tensor_index = node->input_indices_[i]; 660+ auto *src_tensor = lite_graph_->all_tensors_[tensor_index]; 661+ auto input = dequant_src_tensors_->at(tensor_index); 662+ if (!input->IsConst() || !(src_tensor->dataType() == kNumberTypeInt8 || 663+ src_tensor->dataType() == kNumberTypeInt16 || src_tensor->dataType() == kNumberTypeInt32)) { 664+ continue; 665+ } 666+ auto dst_tensor = TensorToSchemaTensor(input, src_tensor); 667+ if (dst_tensor != nullptr) { 668+ dequant_schema_tensors_.emplace(tensor_index, dst_tensor); 669+ replaced_schema_tensors_.emplace_back(src_tensor); 670+ } else { 671+ MS_LOG(ERROR) << "create dequant schema tensor failed, node: " << node->name_ << ", tensor_index: " 672+ << tensor_index; 673+ ret = RET_ERROR; 674+ break; 675+ } 676+ } 677+ return ret; 678+} 679+ 680+Status NNRTDelegate::DequantLiteGraph(LiteGraph *lite_graph) { 681+ for (auto node_index : lite_graph->sub_graphs_[0]->node_indices_) { 682+ auto node = lite_graph->all_nodes_[node_index]; 683+ 684+ if (node->quant_type_ != static_cast<int>(schema::QuantType_QUANT_WEIGHT)) { 685+ continue; 686+ } 687+ auto ret = DequantNodeInputs(node); 688+ if (ret != RET_OK) { 689+ MS_LOG(ERROR) << "Dequant node failed: " << ret << ", node_name: " << node->name_; 690+ for (auto iter : dequant_schema_tensors_) { 691+ delete iter.second; 692+ iter.second = nullptr; 693+ } 694+ return kLiteNotSupport; 695+ } 696+ node->quant_type_ = schema::QuantType_QUANT_NONE; 697+ } 698+ for (auto iter : dequant_schema_tensors_) { 699+ lite_graph_->all_tensors_[iter.first] = iter.second; 700+ } 701+ return kSuccess; 702+} 703+ 704 void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) { 705 Status ret; 706 for (auto node : lite_graph.all_nodes_) { 707@@ -863,6 +921,10 @@ NNRTDelegate::~NNRTDelegate() { 708 if (lite_graph_ != nullptr) { 709 MS_LOG(ERROR) << "Delete NNRTDelegate."; 710 } 711+ for (auto iter : dequant_schema_tensors_) { 712+ delete iter.second; 713+ iter.second = nullptr; 714+ } 715 } 716 } // namespace lite 717 } // namespace mindspore 718diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h 719index 4cf357d6..778553ef 100644 720--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h 721+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h 722@@ -50,6 +50,9 @@ class NNRTDelegate : public Delegate { 723 void SetMetaGraph(const void *meta_graph) { 724 meta_graph_ = meta_graph; 725 } 726+ void SetDequantTensors(std::vector<Tensor *> *src_tensors) { 727+ dequant_src_tensors_ = src_tensors; 728+ } 729 static std::vector<NNRTOpRange> GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model, 730 const std::vector<bool> &op_supports); 731 732@@ -73,14 +76,14 @@ class NNRTDelegate : public Delegate { 733 Status PrepareOutputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor); 734 Status InitNNCompilation(OH_NNCompilation *nn_compilation) const; 735 static OH_NN_DataType CastToNNRTDataType(mindspore::DataType data_type); 736- static OH_NN_Format CastToNNRTFormat(Format format); 737 bool IsCustomModel() const; 738+ Status DequantLiteGraph(LiteGraph *lite_graph); 739+ int DequantNodeInputs(LiteGraph::Node *node); 740+ schema::Tensor *TensorToSchemaTensor(Tensor *lite_tensor, schema::Tensor *schema_tensor); 741 742 #ifdef SUPPORT_NNRT_METAGRAPH 743 bool IsKirinNPU() const; 744 Status BuildKirinNPUModel(DelegateModel<schema::Primitive> *model); 745- Status SetKirinModelInputsAndOutputs(OH_NNModel *nn_model); 746- std::vector<OH_NN_TensorInfo> CreateNNTensorInfos(const std::vector<uint32_t> &indices) const; 747 Status CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model); 748 #endif 749 750@@ -90,6 +93,9 @@ class NNRTDelegate : public Delegate { 751 std::string cache_path_ = ""; 752 uint32_t cache_version_ = 0; 753 std::vector<OH_NNExecutor *> nn_executor_list_; 754+ std::vector<Tensor *> *dequant_src_tensors_; 755+ std::map<uint32_t, schema::Tensor *> dequant_schema_tensors_; 756+ std::vector<schema::Tensor *> replaced_schema_tensors_; 757 }; 758 } // namespace lite 759 } // namespace mindspore 760diff --git a/mindspore/lite/src/litert/scheduler.cc b/mindspore/lite/src/litert/scheduler.cc 761index 96efd972..d6749471 100644 762--- a/mindspore/lite/src/litert/scheduler.cc 763+++ b/mindspore/lite/src/litert/scheduler.cc 764@@ -514,6 +514,7 @@ int Scheduler::ReplaceDelegateKernels(std::vector<kernel::KernelExec *> *dst_ker 765 void *meta_graph = reinterpret_cast<void *>( 766 const_cast<mindspore::schema::MetaGraph *>(mindspore::schema::GetMetaGraph(this->src_model_->buf))); 767 delegate->SetMetaGraph(meta_graph); 768+ delegate->SetDequantTensors(this->src_tensors_); 769 } 770 #endif 771 772-- 7732.17.1 774 775