1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "tools/benchmark/benchmark_base.h"
18 #include <cinttypes>
19 #include <algorithm>
20 #include <utility>
21 #include <regex>
22 #include <functional>
23 #include "schema/model_generated.h"
24 #include "src/common/common.h"
25 #include "src/tensor.h"
26 #ifdef ENABLE_ARM64
27 #include <linux/perf_event.h>
28 #include <sys/ioctl.h>
29 #include <asm/unistd.h>
30 #include <unistd.h>
31 #endif
32 #ifdef SUPPORT_NNIE
33 #include "include/hi_common.h"
34 #include "include/hi_comm_vb.h"
35 #include "include/mpi_sys.h"
36 #include "include/mpi_vb.h"
37 #endif
38
39 namespace mindspore {
40 namespace lite {
41 constexpr int kThreadNumMin = 1;
42 constexpr int kParallelThreadNumMin = 1;
43 constexpr int kColumnLen = 4;
44 constexpr int kPrintColNum = 5;
45 constexpr int kPrintRowLenMax = 100;
46
47 constexpr float kInputDataFloatMin = 0.1f;
48 constexpr float kInputDataFloatMax = 1.0f;
49 constexpr double kInputDataDoubleMin = 0.1;
50 constexpr double kInputDataDoubleMax = 1.0;
51 constexpr int64_t kInputDataInt64Min = 0;
52 constexpr int64_t kInputDataInt64Max = 1;
53 constexpr int32_t kInputDataInt32Min = 0;
54 constexpr int32_t kInputDataInt32Max = 1;
55 constexpr int16_t kInputDataInt16Min = 0;
56 constexpr int16_t kInputDataInt16Max = 1;
57 constexpr int16_t kInputDataInt8Min = -127;
58 constexpr int16_t kInputDataInt8Max = 127;
59 constexpr int16_t kInputDataUint8Min = 0;
60 constexpr int16_t kInputDataUint8Max = 254;
61 #ifdef SUPPORT_NNIE
62 constexpr int kNNIEMaxPoolCnt = 2;
63 constexpr int kNNIEBlkSize = 768 * 576 * 2;
64 #endif
65
66 const std::unordered_map<int, std::string> kTypeIdMap{
67 {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"},
68 {kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"},
69 {kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"},
70 {kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"},
71 {kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}};
72
73 const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap{
74 {mindspore::NCHW, "NCHW"}, {mindspore::NHWC, "NHWC"}, {mindspore::NHWC4, "NHWC4"}, {mindspore::HWKC, "HWKC"},
75 {mindspore::HWCK, "HWCK"}, {mindspore::KCHW, "KCHW"}, {mindspore::CKHW, "CKHW"}, {mindspore::KHWC, "KHWC"},
76 {mindspore::CHWK, "CHWK"}, {mindspore::HW, "HW"}, {mindspore::HW4, "HW4"}, {mindspore::NC, "NC"},
77 {mindspore::NC4, "NC4"}, {mindspore::NC4HW4, "NC4HW4"}, {mindspore::NCDHW, "NCDHW"}};
78
GenerateRandomData(size_t size,void * data,int data_type)79 int BenchmarkBase::GenerateRandomData(size_t size, void *data, int data_type) {
80 if (data == nullptr) {
81 return RET_ERROR;
82 }
83 if (size == 0) {
84 return RET_OK;
85 }
86 switch (data_type) {
87 case kNumberTypeFloat32:
88 case kNumberTypeFloat:
89 FillInputData<float>(size, data, std::uniform_real_distribution<float>(kInputDataFloatMin, kInputDataFloatMax));
90 break;
91 case kNumberTypeFloat64:
92 FillInputData<double>(size, data,
93 std::uniform_real_distribution<double>(kInputDataDoubleMin, kInputDataDoubleMax));
94 break;
95 case kNumberTypeInt64:
96 FillInputData<int64_t>(size, data,
97 std::uniform_int_distribution<int64_t>(kInputDataInt64Min, kInputDataInt64Max));
98 break;
99 case kNumberTypeInt:
100 case kNumberTypeInt32:
101 FillInputData<int32_t>(size, data,
102 std::uniform_int_distribution<int32_t>(kInputDataInt32Min, kInputDataInt32Max));
103 break;
104 case kNumberTypeInt16:
105 FillInputData<int16_t>(size, data,
106 std::uniform_int_distribution<int16_t>(kInputDataInt16Min, kInputDataInt16Max));
107 break;
108 case kNumberTypeInt8:
109 FillInputData<int8_t>(size, data, std::uniform_int_distribution<int16_t>(kInputDataInt8Min, kInputDataInt8Max));
110 break;
111 case kNumberTypeUInt8:
112 FillInputData<uint8_t>(size, data,
113 std::uniform_int_distribution<uint16_t>(kInputDataUint8Min, kInputDataUint8Max));
114 break;
115 default:
116 char *casted_data = static_cast<char *>(data);
117 for (size_t i = 0; i < size; i++) {
118 casted_data[i] = static_cast<char>(i);
119 }
120 }
121 return RET_OK;
122 }
123
124 // calibData is FP32
ReadCalibData()125 int BenchmarkBase::ReadCalibData() {
126 const char *calib_data_path = flags_->benchmark_data_file_.c_str();
127 // read calib data
128 std::ifstream in_file(calib_data_path);
129 if (!in_file.good()) {
130 std::cerr << "file: " << calib_data_path << " is not exist" << std::endl;
131 MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist";
132 return RET_ERROR;
133 }
134
135 if (!in_file.is_open()) {
136 std::cerr << "file: " << calib_data_path << " open failed" << std::endl;
137 MS_LOG(ERROR) << "file: " << calib_data_path << " open failed";
138 in_file.close();
139 return RET_ERROR;
140 }
141 MS_LOG(INFO) << "Start reading calibData file";
142 std::string line;
143 std::string tensor_name;
144
145 while (!in_file.eof()) {
146 (void)getline(in_file, line);
147 std::stringstream string_line1(line);
148 size_t dim = 0;
149 string_line1 >> tensor_name >> dim;
150 std::vector<size_t> dims;
151 for (size_t i = 0; i < dim; i++) {
152 size_t tmp_dim;
153 string_line1 >> tmp_dim;
154 dims.push_back(tmp_dim);
155 }
156 auto ret = ReadTensorData(in_file, tensor_name, dims);
157 if (ret != RET_OK) {
158 MS_LOG(ERROR) << "Read tensor data failed, tensor name: " << tensor_name;
159 in_file.close();
160 return RET_ERROR;
161 }
162 }
163 in_file.close();
164 MS_LOG(INFO) << "Finish reading calibData file";
165 return RET_OK;
166 }
167
ReadTensorData(std::ifstream & in_file_stream,const std::string & tensor_name,const std::vector<size_t> & dims)168 int BenchmarkBase::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
169 const std::vector<size_t> &dims) {
170 std::string line;
171 (void)getline(in_file_stream, line);
172 std::stringstream line_stream(line);
173 if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
174 return RET_OK;
175 }
176 std::vector<float> data;
177 std::vector<std::string> strings_data;
178 size_t shape_size = 1;
179 if (!dims.empty()) {
180 for (size_t i = 0; i < dims.size(); ++i) {
181 if (dims[i] == 0) {
182 MS_LOG(ERROR) << "dim is 0.";
183 return RET_ERROR;
184 }
185 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(shape_size, dims[i]), RET_ERROR, "mul overflow");
186 shape_size *= dims[i];
187 }
188 }
189 auto tensor_data_type = GetDataTypeByTensorName(tensor_name);
190 if (tensor_data_type == static_cast<int>(kTypeUnknown)) {
191 MS_LOG(ERROR) << "get data type failed.";
192 return RET_ERROR;
193 }
194 if (tensor_data_type == static_cast<int>(kObjectTypeString)) {
195 strings_data.push_back(line);
196 for (size_t i = 1; i < shape_size; i++) {
197 getline(in_file_stream, line);
198 strings_data.push_back(line);
199 }
200 } else {
201 for (size_t i = 0; i < shape_size; i++) {
202 float tmp_data;
203 line_stream >> tmp_data;
204 data.push_back(tmp_data);
205 }
206 }
207 auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data);
208 if (check_tensor == nullptr) {
209 MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name;
210 return RET_ERROR;
211 }
212 this->benchmark_tensor_names_.push_back(tensor_name);
213 this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor));
214 return RET_OK;
215 }
216
CompareStringData(const std::string & name,const std::vector<std::string> & calib_strings,const std::vector<std::string> & output_strings)217 int BenchmarkBase::CompareStringData(const std::string &name, const std::vector<std::string> &calib_strings,
218 const std::vector<std::string> &output_strings) {
219 size_t compare_num = std::min(calib_strings.size(), output_strings.size());
220 size_t print_num = std::min(compare_num, static_cast<size_t>(kNumPrintMin));
221
222 std::cout << "Data of node " << name << " : " << std::endl;
223 for (size_t i = 0; i < compare_num; i++) {
224 if (i < print_num) {
225 std::cout << " " << output_strings[i] << std::endl;
226 }
227 if (calib_strings[i] != output_strings[i]) {
228 MS_LOG(ERROR) << "Compare failed, index: " << i;
229 std::cerr << "Compare failed, index: " << i << std::endl;
230 return RET_ERROR;
231 }
232 }
233 return RET_OK;
234 }
235
InitInputDataList()236 void BenchmarkFlags::InitInputDataList() {
237 if (in_data_file_.empty()) {
238 input_data_list_ = {};
239 return;
240 }
241 std::regex re{"[\\s,]+"};
242 input_data_list_ = std::vector<std::string>{
243 std::sregex_token_iterator(in_data_file_.begin(), in_data_file_.end(), re, -1), std::sregex_token_iterator()};
244 }
245
ParseResizeDimsStringV2(const std::string & input_string,std::map<std::string,std::vector<int64_t>> * graph_input_shape_map)246 bool ParseResizeDimsStringV2(const std::string &input_string,
247 std::map<std::string, std::vector<int64_t>> *graph_input_shape_map) {
248 // define regex pattern that matches "inTensor1:1,32,32,32;inTensor2:1,1,32,32,4"
249 std::regex pattern("^(\\w+:\\d+(?:,\\d+)*)(?:;(\\w+:\\d+(?:,\\d+)*))*;?$");
250 std::vector<int64_t> shape;
251 if (std::regex_match(input_string, pattern)) {
252 auto group_splits = lite::StrSplit(input_string, std::string(";"));
253 for (auto group : group_splits) {
254 if (group.empty()) {
255 continue;
256 }
257 shape.clear();
258 auto string_split = lite::StrSplit(group, std::string(":"));
259 auto name = string_split[0];
260 for (size_t i = 1; i < string_split.size() - 1; ++i) {
261 name += ":" + string_split[i];
262 }
263 auto dim_strs = string_split[string_split.size() - 1];
264 auto dims = lite::StrSplit(dim_strs, std::string(","));
265 for (const auto &dim : dims) {
266 int64_t dim_value;
267 // try {
268 dim_value = std::stoi(dim);
269 // } catch (const std::exception &e) {
270 // MS_LOG(ERROR) << "Get dim failed: " << e.what();
271 // return false;
272 // }
273 shape.push_back(dim_value);
274 }
275 if ((*graph_input_shape_map).find(name) != (*graph_input_shape_map).end()) {
276 MS_LOG(ERROR) << "The input shape string is not valid, found duplicate name [" << name << "].";
277 return false;
278 }
279 (*graph_input_shape_map)[name] = shape;
280 }
281 } else {
282 MS_LOG(ERROR) << "The input shape string is not valid, please specify input names followed by their shapes. Wrap "
283 "the whole string in "
284 "double-quotes(\"\"). e.g. "
285 "\"inTensor1:1,32,32,32;inTensor2:1,1,32,32,4\"";
286 return false;
287 }
288 return !(*graph_input_shape_map).empty();
289 }
290
InitResizeDimsList()291 int BenchmarkFlags::InitResizeDimsList() {
292 std::string content = this->resize_dims_in_;
293 std::string content_v2 = this->resize_dims_in_v2_;
294 if (!content_v2.empty()) { // use newer flag "--inputShape" first
295 std::map<std::string, std::vector<int64_t>> graph_input_shape_map;
296 bool success = ParseResizeDimsStringV2(content_v2, &graph_input_shape_map);
297 if (!success) {
298 MS_LOG(ERROR) << "Parse inputShape string \"" << content_v2 << "\" failed, please check input.";
299 return RET_PARAM_INVALID;
300 }
301 this->graph_input_shape_map_ = graph_input_shape_map;
302 std::vector<int> shape;
303 for (const auto &group : graph_input_shape_map) {
304 shape.clear();
305 std::cout << "Resize Dims: ";
306 for (int number : group.second) {
307 std::cout << number << " ";
308 shape.emplace_back(number);
309 }
310 std::cout << std::endl;
311 this->resize_dims_.emplace_back(shape);
312 }
313 } else {
314 if (content.empty()) {
315 return RET_OK;
316 }
317 MS_LOG(WARNING) << "The --inputShapes flag has been deprecated, the replacement flag is --inputShape. Please "
318 "update your command line usage.";
319 std::vector<int> shape;
320 auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
321 for (const auto &shape_str : shape_strs) {
322 shape.clear();
323 auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
324 std::cout << "Resize Dims: ";
325 for (const auto &dim_str : dim_strs) {
326 std::cout << dim_str << " ";
327 shape.emplace_back(static_cast<int>(std::stoi(dim_str)));
328 }
329 std::cout << std::endl;
330 this->resize_dims_.emplace_back(shape);
331 }
332 }
333 return RET_OK;
334 }
335
InitCoreList()336 void BenchmarkFlags::InitCoreList() {
337 std::string core_list_str = this->core_list_str_;
338 if (core_list_str.empty()) {
339 return;
340 }
341 auto core_ids = StrSplit(core_list_str, std::string(DELIM_COMMA));
342 std::cout << "core list: ";
343 for (const auto &core_id : core_ids) {
344 std::cout << core_id << " ";
345 this->core_list_.emplace_back(static_cast<int>(std::stoi(core_id)));
346 }
347 std::cout << std::endl;
348 }
349
CheckModelValid()350 int BenchmarkBase::CheckModelValid() {
351 this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary;
352
353 if (!flags_->benchmark_data_type_.empty()) {
354 if (data_type_map_.find(flags_->benchmark_data_type_) == data_type_map_.end()) {
355 MS_LOG(ERROR) << "CalibDataType not supported: " << flags_->benchmark_data_type_.c_str();
356 return RET_ERROR;
357 }
358 msCalibDataType = data_type_map_.at(flags_->benchmark_data_type_);
359 MS_LOG(INFO) << "CalibDataType = " << flags_->benchmark_data_type_.c_str();
360 std::cout << "CalibDataType = " << flags_->benchmark_data_type_.c_str() << std::endl;
361 }
362
363 if (flags_->model_file_.empty()) {
364 MS_LOG(ERROR) << "modelPath is required";
365 std::cerr << "modelPath is required" << std::endl;
366 return RET_ERROR;
367 }
368
369 if (ModelTypeMap.find(flags_->model_type_) == ModelTypeMap.end()) {
370 MS_LOG(ERROR) << "Invalid model type: " << flags_->model_type_;
371 std::cerr << "Invalid model type: " << flags_->model_type_ << std::endl;
372 return RET_ERROR;
373 }
374 return RET_OK;
375 }
376
CheckThreadNumValid()377 int BenchmarkBase::CheckThreadNumValid() {
378 if (this->flags_->num_threads_ < kThreadNumMin) {
379 MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0";
380 std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl;
381 return RET_ERROR;
382 }
383
384 if (flags_->enable_parallel_) {
385 if (flags_->num_threads_ < kParallelThreadNumMin) {
386 MS_LOG(ERROR) << "enable parallel need more than 1 thread.";
387 std::cerr << "enable parallel need more than 1 thread." << std::endl;
388 return RET_ERROR;
389 }
390 }
391 return RET_OK;
392 }
393
CheckDeviceTypeValid()394 int BenchmarkBase::CheckDeviceTypeValid() {
395 if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU" &&
396 flags_->device_ != "Ascend" && flags_->device_ != "NNRT") {
397 MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported.";
398 std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl;
399 return RET_ERROR;
400 }
401 return RET_OK;
402 }
403
InitDumpConfigFromJson(const char * path)404 int BenchmarkBase::InitDumpConfigFromJson(const char *path) {
405 #ifndef BENCHMARK_CLIP_JSON
406 auto real_path = RealPath(path);
407 std::ifstream ifs(real_path);
408 if (!ifs.good()) {
409 MS_LOG(ERROR) << "file: " << real_path << " is not exist";
410 return RET_ERROR;
411 }
412 if (!ifs.is_open()) {
413 MS_LOG(ERROR) << "file: " << real_path << " open failed";
414 return RET_ERROR;
415 }
416
417 // try {
418 dump_cfg_json_ = nlohmann::json::parse(ifs);
419 // } catch (const nlohmann::json::parse_error &error) {
420 // MS_LOG(ERROR) << "parse json file failed, please check your file.";
421 // return RET_ERROR;
422 // }
423 if (dump_cfg_json_[dump::kSettings] == nullptr) {
424 MS_LOG(ERROR) << "\"common_dump_settings\" is required.";
425 return RET_ERROR;
426 }
427 if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) {
428 MS_LOG(ERROR) << "\"dump_mode\" is required.";
429 return RET_ERROR;
430 }
431 if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) {
432 MS_LOG(ERROR) << "\"path\" is required.";
433 return RET_ERROR;
434 }
435 if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) {
436 dump_cfg_json_[dump::kSettings][dump::kNetName] = "default";
437 }
438 if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) {
439 dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0;
440 }
441 if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr &&
442 !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) {
443 if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) {
444 MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)";
445 return RET_ERROR;
446 }
447 }
448
449 auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get<std::string>();
450 auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get<std::string>();
451 if (abs_path.back() == '\\' || abs_path.back() == '/') {
452 dump_file_output_dir_ = abs_path + net_name;
453 } else {
454 #ifdef _WIN32
455 dump_file_output_dir_ = abs_path + "\\" + net_name;
456 #else
457 dump_file_output_dir_ = abs_path + "/" + net_name;
458 #endif
459 }
460
461 auto status = CreateOutputDir(&dump_file_output_dir_);
462 if (status != RET_OK) {
463 MS_LOG(ERROR) << "create data output directory failed.";
464 return RET_ERROR;
465 }
466 #endif
467 return RET_OK;
468 }
469
InitCallbackParameter()470 int BenchmarkBase::InitCallbackParameter() {
471 int ret = RET_OK;
472 if (flags_->time_profiling_) {
473 ret = InitTimeProfilingCallbackParameter();
474 } else if (flags_->perf_profiling_) {
475 ret = InitPerfProfilingCallbackParameter();
476 } else if (flags_->print_tensor_data_) {
477 ret = InitPrintTensorDataCallbackParameter();
478 } else if (flags_->dump_tensor_data_) {
479 ret = InitDumpTensorDataCallbackParameter();
480 }
481 return ret;
482 }
483
Init()484 int BenchmarkBase::Init() {
485 MS_CHECK_FALSE(this->flags_ == nullptr, RET_ERROR);
486 MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_;
487 MS_LOG(INFO) << "ModelType = " << this->flags_->model_type_;
488 MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_;
489 MS_LOG(INFO) << "GroupInfoFile = " << this->flags_->group_info_file_;
490 MS_LOG(INFO) << "ConfigFilePath = " << this->flags_->config_file_;
491 MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_;
492 MS_LOG(INFO) << "LoopCount = " << this->flags_->loop_count_;
493 MS_LOG(INFO) << "DeviceType = " << this->flags_->device_;
494 MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_;
495 MS_LOG(INFO) << "CosineDistanceThreshold = " << this->flags_->cosine_distance_threshold_;
496 MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_;
497 MS_LOG(INFO) << "PerformanceMode = " << this->flags_->nnrt_performance_mode_;
498 MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_;
499 MS_LOG(INFO) << "InterOpParallelNum = " << this->flags_->inter_op_parallel_num_;
500 MS_LOG(INFO) << "Fp16Priority = " << this->flags_->enable_fp16_;
501 MS_LOG(INFO) << "EnableParallel = " << this->flags_->enable_parallel_;
502 MS_LOG(INFO) << "calibDataPath = " << this->flags_->benchmark_data_file_;
503 MS_LOG(INFO) << "EnableGLTexture = " << this->flags_->enable_gl_texture_;
504
505 std::cout << "ModelPath = " << this->flags_->model_file_ << std::endl;
506 std::cout << "ModelType = " << this->flags_->model_type_ << std::endl;
507 std::cout << "InDataPath = " << this->flags_->in_data_file_ << std::endl;
508 std::cout << "GroupInfoFile = " << this->flags_->group_info_file_ << std::endl;
509 std::cout << "ConfigFilePath = " << this->flags_->config_file_ << std::endl;
510 std::cout << "InDataType = " << this->flags_->in_data_type_in_ << std::endl;
511 std::cout << "LoopCount = " << this->flags_->loop_count_ << std::endl;
512 std::cout << "DeviceType = " << this->flags_->device_ << std::endl;
513 std::cout << "AccuracyThreshold = " << this->flags_->accuracy_threshold_ << std::endl;
514 std::cout << "CosineDistanceThreshold = " << this->flags_->cosine_distance_threshold_ << std::endl;
515 std::cout << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_ << std::endl;
516 std::cout << "PerformanceMode = " << this->flags_->nnrt_performance_mode_ << std::endl;
517 std::cout << "NumThreads = " << this->flags_->num_threads_ << std::endl;
518 std::cout << "InterOpParallelNum = " << this->flags_->inter_op_parallel_num_ << std::endl;
519 std::cout << "Fp16Priority = " << this->flags_->enable_fp16_ << std::endl;
520 std::cout << "EnableParallel = " << this->flags_->enable_parallel_ << std::endl;
521 std::cout << "calibDataPath = " << this->flags_->benchmark_data_file_ << std::endl;
522 std::cout << "EnableGLTexture = " << this->flags_->enable_gl_texture_ << std::endl;
523 if (this->flags_->loop_count_ < 1) {
524 MS_LOG(ERROR) << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0";
525 std::cerr << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0" << std::endl;
526 return RET_ERROR;
527 }
528
529 if (this->flags_->enable_gl_texture_ == true && this->flags_->device_ != "GPU") {
530 MS_LOG(ERROR) << "device must be GPU if you want to enable GLTexture";
531 std::cerr << "ERROR: device must be GPU if you want to enable GLTexture" << std::endl;
532 return RET_ERROR;
533 }
534
535 auto thread_ret = CheckThreadNumValid();
536 if (thread_ret != RET_OK) {
537 MS_LOG(ERROR) << "Invalid numThreads.";
538 std::cerr << "Invalid numThreads." << std::endl;
539 return RET_ERROR;
540 }
541
542 static std::vector<std::string> CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"};
543 if (this->flags_->cpu_bind_mode_ >= 1) {
544 MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_];
545 std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl;
546 } else {
547 MS_LOG(INFO) << "cpuBindMode = NO_BIND";
548 std::cout << "cpuBindMode = NO_BIND" << std::endl;
549 }
550
551 auto model_ret = CheckModelValid();
552 if (model_ret != RET_OK) {
553 MS_LOG(ERROR) << "Invalid Model File.";
554 std::cerr << "Invalid Model File." << std::endl;
555 return RET_ERROR;
556 }
557
558 flags_->InitInputDataList();
559 flags_->InitCoreList();
560 MS_CHECK_FALSE_MSG(flags_->InitResizeDimsList() != RET_OK, RET_ERROR, "init resize dims failed.");
561 if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() &&
562 flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
563 MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
564 std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
565 return RET_ERROR;
566 }
567
568 if (CheckDeviceTypeValid() != RET_OK) {
569 MS_LOG(ERROR) << "Device type is invalid.";
570 return RET_ERROR;
571 }
572
573 if (flags_->time_profiling_ && flags_->perf_profiling_) {
574 MS_LOG(INFO) << "time_profiling is enabled, will not run perf_profiling.";
575 }
576
577 // get dump data output path
578 auto dump_cfg_path = std::getenv(dump::kConfigPath);
579 if (dump_cfg_path != nullptr) {
580 flags_->dump_tensor_data_ = true;
581 if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) {
582 MS_LOG(ERROR) << "parse dump config file failed.";
583 return RET_ERROR;
584 }
585 } else {
586 MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data";
587 }
588
589 auto status = InitCallbackParameter();
590 if (status != RET_OK) {
591 MS_LOG(ERROR) << "Init callback Parameter failed.";
592 std::cerr << "Init callback Parameter failed." << std::endl;
593 return RET_ERROR;
594 }
595
596 return RET_OK;
597 }
598
PrintResult(const std::vector<std::string> & title,const std::map<std::string,std::pair<int,float>> & result)599 int BenchmarkBase::PrintResult(const std::vector<std::string> &title,
600 const std::map<std::string, std::pair<int, float>> &result) {
601 std::vector<size_t> columnLenMax(kPrintColNum);
602 std::vector<std::vector<std::string>> rows;
603
604 for (auto &iter : result) {
605 char stringBuf[kPrintColNum][kPrintRowLenMax] = {};
606 std::vector<std::string> columns;
607 size_t len = 0;
608 int index = 0;
609 len = iter.first.size();
610 if (len > columnLenMax.at(index)) {
611 columnLenMax.at(index) = len + kColumnLen;
612 }
613 columns.push_back(iter.first);
614
615 index++;
616 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f",
617 iter.second.second / static_cast<float>(flags_->loop_count_));
618 if (len > columnLenMax.at(index)) {
619 columnLenMax.at(index) = len + kColumnLen;
620 }
621 columns.emplace_back(stringBuf[index]);
622
623 index++;
624 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second / op_cost_total_);
625 if (len > columnLenMax.at(index)) {
626 columnLenMax.at(index) = len + kColumnLen;
627 }
628 columns.emplace_back(stringBuf[index]);
629
630 index++;
631 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%d", iter.second.first);
632 if (len > columnLenMax.at(index)) {
633 columnLenMax.at(index) = len + kColumnLen;
634 }
635 columns.emplace_back(stringBuf[index]);
636
637 index++;
638 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second);
639 if (len > columnLenMax.at(index)) {
640 columnLenMax.at(index) = len + kColumnLen;
641 }
642 columns.emplace_back(stringBuf[index]);
643
644 rows.push_back(columns);
645 }
646
647 printf("-------------------------------------------------------------------------\n");
648 for (int i = 0; i < kPrintColNum; i++) {
649 auto printBuf = title[i];
650 if (printBuf.size() > columnLenMax.at(i)) {
651 columnLenMax.at(i) = printBuf.size();
652 }
653 printBuf.resize(columnLenMax.at(i), ' ');
654 printf("%s\t", printBuf.c_str());
655 }
656 printf("\n");
657 for (auto &row : rows) {
658 for (int j = 0; j < kPrintColNum; j++) {
659 auto printBuf = row[j];
660 printBuf.resize(columnLenMax.at(j), ' ');
661 printf("%s\t", printBuf.c_str());
662 }
663 printf("\n");
664 }
665 return RET_OK;
666 }
667
668 #ifdef ENABLE_ARM64
PrintPerfResult(const std::vector<std::string> & title,const std::map<std::string,std::pair<int,struct PerfCount>> & result)669 int BenchmarkBase::PrintPerfResult(const std::vector<std::string> &title,
670 const std::map<std::string, std::pair<int, struct PerfCount>> &result) {
671 std::vector<size_t> columnLenMax(kPrintColNum);
672 std::vector<std::vector<std::string>> rows;
673
674 for (auto &iter : result) {
675 char stringBuf[kPrintColNum][kPrintRowLenMax] = {};
676 std::vector<std::string> columns;
677 size_t len = 0;
678 int index = 0;
679 len = iter.first.size();
680 if (len > columnLenMax.at(index)) {
681 columnLenMax.at(index) = len + kColumnLen;
682 }
683 columns.push_back(iter.first);
684 index++;
685 float tmp = float_t(flags_->num_threads_) * iter.second.second.value[0] / float_t(flags_->loop_count_) / kFloatMSEC;
686 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%.2f", tmp);
687 if (len > columnLenMax.at(index)) {
688 columnLenMax.at(index) = len + kColumnLen;
689 }
690 columns.emplace_back(stringBuf[index]);
691 index++;
692 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second.value[0] / op_cost_total_);
693 if (len > columnLenMax.at(index)) {
694 columnLenMax.at(index) = len + kColumnLen;
695 }
696 columns.emplace_back(stringBuf[index]);
697
698 index++;
699 tmp = float_t(flags_->num_threads_) * iter.second.second.value[1] / float_t(flags_->loop_count_) / kFloatMSEC;
700 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%.2f", tmp);
701 if (len > columnLenMax.at(index)) {
702 columnLenMax.at(index) = len + kColumnLen;
703 }
704 columns.emplace_back(stringBuf[index]);
705
706 index++;
707 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second.value[1] / op_cost2_total_);
708 if (len > columnLenMax.at(index)) {
709 columnLenMax.at(index) = len + kColumnLen;
710 }
711 columns.emplace_back(stringBuf[index]);
712
713 rows.push_back(columns);
714 }
715
716 printf("-------------------------------------------------------------------------\n");
717 for (int i = 0; i < kPrintColNum; i++) {
718 auto printBuf = title[i];
719 if (printBuf.size() > columnLenMax.at(i)) {
720 columnLenMax.at(i) = printBuf.size();
721 }
722 printBuf.resize(columnLenMax.at(i), ' ');
723 printf("%s\t", printBuf.c_str());
724 }
725 printf("\n");
726 for (auto &row : rows) {
727 for (int j = 0; j < kPrintColNum; j++) {
728 auto printBuf = row[j];
729 printBuf.resize(columnLenMax.at(j), ' ');
730 printf("%s\t", printBuf.c_str());
731 }
732 printf("\n");
733 }
734 return RET_OK;
735 }
736 #endif
737
738 #ifdef SUPPORT_NNIE
SvpSysInit()739 int SvpSysInit() {
740 HI_S32 ret = HI_SUCCESS;
741 VB_CONFIG_S struVbConf;
742 ret = HI_MPI_SYS_Exit();
743 if (ret != HI_SUCCESS) {
744 MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
745 return RET_ERROR;
746 }
747
748 ret = HI_MPI_VB_Exit();
749 if (ret != HI_SUCCESS) {
750 MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
751 ret = HI_MPI_SYS_Init();
752 if (ret != HI_SUCCESS) {
753 MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
754 return RET_ERROR;
755 }
756 return RET_OK;
757 }
758
759 memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
760 struVbConf.u32MaxPoolCnt = kNNIEMaxPoolCnt;
761 struVbConf.astCommPool[1].u64BlkSize = kNNIEBlkSize;
762 struVbConf.astCommPool[1].u32BlkCnt = 1;
763
764 ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
765 if (ret != HI_SUCCESS) {
766 MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!";
767 return RET_ERROR;
768 }
769
770 ret = HI_MPI_VB_Init();
771 if (ret != HI_SUCCESS) {
772 MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!";
773 return RET_ERROR;
774 }
775
776 ret = HI_MPI_SYS_Init();
777 if (ret != HI_SUCCESS) {
778 MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
779 return RET_ERROR;
780 }
781
782 return RET_OK;
783 }
784
SvpSysExit()785 int SvpSysExit() {
786 HI_S32 ret = HI_SUCCESS;
787
788 ret = HI_MPI_SYS_Exit();
789 if (ret != HI_SUCCESS) {
790 MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
791 return RET_ERROR;
792 }
793
794 ret = HI_MPI_VB_Exit();
795 if (ret != HI_SUCCESS) {
796 MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
797 return RET_OK;
798 }
799
800 return RET_OK;
801 }
802 #endif
803
~BenchmarkBase()804 BenchmarkBase::~BenchmarkBase() {
805 for (auto &iter : this->benchmark_data_) {
806 iter.second->shape.clear();
807 iter.second->data.clear();
808 delete iter.second;
809 iter.second = nullptr;
810 }
811 this->benchmark_data_.clear();
812 #ifdef SUPPORT_NNIE
813 SvpSysExit();
814 #endif
815 }
816 } // namespace lite
817 } // namespace mindspore
818