1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "tools/benchmark/benchmark_base.h"
18 #include <cinttypes>
19 #include <algorithm>
20 #include <utility>
21 #include <regex>
22 #include <functional>
23 #include "schema/model_generated.h"
24 #include "src/common/common.h"
25 #include "src/tensor.h"
26 #ifdef ENABLE_ARM64
27 #include <linux/perf_event.h>
28 #include <sys/ioctl.h>
29 #include <asm/unistd.h>
30 #include <unistd.h>
31 #endif
32 #ifdef SUPPORT_NNIE
33 #include "include/hi_common.h"
34 #include "include/hi_comm_vb.h"
35 #include "include/mpi_sys.h"
36 #include "include/mpi_vb.h"
37 #endif
38
39 namespace mindspore {
40 namespace lite {
41 constexpr int kThreadNumMin = 1;
42 constexpr int kParallelThreadNumMin = 1;
43 constexpr int kColumnLen = 4;
44 constexpr int kPrintColNum = 5;
45 constexpr int kPrintRowLenMax = 100;
46
47 constexpr float kInputDataFloatMin = 0.1f;
48 constexpr float kInputDataFloatMax = 1.0f;
49 constexpr double kInputDataDoubleMin = 0.1;
50 constexpr double kInputDataDoubleMax = 1.0;
51 constexpr int64_t kInputDataInt64Min = 0;
52 constexpr int64_t kInputDataInt64Max = 1;
53 constexpr int32_t kInputDataInt32Min = 0;
54 constexpr int32_t kInputDataInt32Max = 1;
55 constexpr int16_t kInputDataInt16Min = 0;
56 constexpr int16_t kInputDataInt16Max = 1;
57 constexpr int16_t kInputDataInt8Min = -127;
58 constexpr int16_t kInputDataInt8Max = 127;
59 constexpr int16_t kInputDataUint8Min = 0;
60 constexpr int16_t kInputDataUint8Max = 254;
61 #ifdef SUPPORT_NNIE
62 constexpr int kNNIEMaxPoolCnt = 2;
63 constexpr int kNNIEBlkSize = 768 * 576 * 2;
64 #endif
65
66 const std::unordered_map<int, std::string> kTypeIdMap{
67 {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"},
68 {kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"},
69 {kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"},
70 {kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"},
71 {kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}};
72
73 const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap{
74 {mindspore::NCHW, "NCHW"}, {mindspore::NHWC, "NHWC"}, {mindspore::NHWC4, "NHWC4"}, {mindspore::HWKC, "HWKC"},
75 {mindspore::HWCK, "HWCK"}, {mindspore::KCHW, "KCHW"}, {mindspore::CKHW, "CKHW"}, {mindspore::KHWC, "KHWC"},
76 {mindspore::CHWK, "CHWK"}, {mindspore::HW, "HW"}, {mindspore::HW4, "HW4"}, {mindspore::NC, "NC"},
77 {mindspore::NC4, "NC4"}, {mindspore::NC4HW4, "NC4HW4"}, {mindspore::NCDHW, "NCDHW"}};
78
GenerateRandomData(size_t size,void * data,int data_type)79 int BenchmarkBase::GenerateRandomData(size_t size, void *data, int data_type) {
80 if (data == nullptr) {
81 return RET_ERROR;
82 }
83 if (size == 0) {
84 return RET_OK;
85 }
86 switch (data_type) {
87 case kNumberTypeFloat32:
88 case kNumberTypeFloat:
89 FillInputData<float>(size, data, std::uniform_real_distribution<float>(kInputDataFloatMin, kInputDataFloatMax));
90 break;
91 case kNumberTypeFloat64:
92 FillInputData<double>(size, data,
93 std::uniform_real_distribution<double>(kInputDataDoubleMin, kInputDataDoubleMax));
94 break;
95 case kNumberTypeInt64:
96 FillInputData<int64_t>(size, data,
97 std::uniform_int_distribution<int64_t>(kInputDataInt64Min, kInputDataInt64Max));
98 break;
99 case kNumberTypeInt:
100 case kNumberTypeInt32:
101 FillInputData<int32_t>(size, data,
102 std::uniform_int_distribution<int32_t>(kInputDataInt32Min, kInputDataInt32Max));
103 break;
104 case kNumberTypeInt16:
105 FillInputData<int16_t>(size, data,
106 std::uniform_int_distribution<int16_t>(kInputDataInt16Min, kInputDataInt16Max));
107 break;
108 case kNumberTypeInt8:
109 FillInputData<int8_t>(size, data, std::uniform_int_distribution<int16_t>(kInputDataInt8Min, kInputDataInt8Max));
110 break;
111 case kNumberTypeUInt8:
112 FillInputData<uint8_t>(size, data,
113 std::uniform_int_distribution<uint16_t>(kInputDataUint8Min, kInputDataUint8Max));
114 break;
115 default:
116 char *casted_data = static_cast<char *>(data);
117 for (size_t i = 0; i < size; i++) {
118 casted_data[i] = static_cast<char>(i);
119 }
120 }
121 return RET_OK;
122 }
123
124 // calibData is FP32
ReadCalibData()125 int BenchmarkBase::ReadCalibData() {
126 const char *calib_data_path = flags_->benchmark_data_file_.c_str();
127 // read calib data
128 std::ifstream in_file(calib_data_path);
129 if (!in_file.good()) {
130 std::cerr << "file: " << calib_data_path << " is not exist" << std::endl;
131 MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist";
132 return RET_ERROR;
133 }
134
135 if (!in_file.is_open()) {
136 std::cerr << "file: " << calib_data_path << " open failed" << std::endl;
137 MS_LOG(ERROR) << "file: " << calib_data_path << " open failed";
138 in_file.close();
139 return RET_ERROR;
140 }
141 MS_LOG(INFO) << "Start reading calibData file";
142 std::string line;
143 std::string tensor_name;
144
145 while (!in_file.eof()) {
146 (void)getline(in_file, line);
147 std::stringstream string_line1(line);
148 size_t dim = 0;
149 string_line1 >> tensor_name >> dim;
150 std::vector<size_t> dims;
151 for (size_t i = 0; i < dim; i++) {
152 size_t tmp_dim;
153 string_line1 >> tmp_dim;
154 dims.push_back(tmp_dim);
155 }
156 auto ret = ReadTensorData(in_file, tensor_name, dims);
157 if (ret != RET_OK) {
158 MS_LOG(ERROR) << "Read tensor data failed, tensor name: " << tensor_name;
159 in_file.close();
160 return RET_ERROR;
161 }
162 }
163 in_file.close();
164 MS_LOG(INFO) << "Finish reading calibData file";
165 return RET_OK;
166 }
167
ReadTensorData(std::ifstream & in_file_stream,const std::string & tensor_name,const std::vector<size_t> & dims)168 int BenchmarkBase::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
169 const std::vector<size_t> &dims) {
170 std::string line;
171 (void)getline(in_file_stream, line);
172 std::stringstream line_stream(line);
173 if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
174 return RET_OK;
175 }
176 std::vector<float> data;
177 std::vector<std::string> strings_data;
178 size_t shape_size = 1;
179 if (!dims.empty()) {
180 for (size_t i = 0; i < dims.size(); ++i) {
181 if (dims[i] == 0) {
182 MS_LOG(ERROR) << "dim is 0.";
183 return RET_ERROR;
184 }
185 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(shape_size, dims[i]), RET_ERROR, "mul overflow");
186 shape_size *= dims[i];
187 }
188 }
189 auto tensor_data_type = GetDataTypeByTensorName(tensor_name);
190 if (tensor_data_type == static_cast<int>(kTypeUnknown)) {
191 MS_LOG(ERROR) << "get data type failed.";
192 return RET_ERROR;
193 }
194 if (tensor_data_type == static_cast<int>(kObjectTypeString)) {
195 strings_data.push_back(line);
196 for (size_t i = 1; i < shape_size; i++) {
197 getline(in_file_stream, line);
198 strings_data.push_back(line);
199 }
200 } else {
201 for (size_t i = 0; i < shape_size; i++) {
202 float tmp_data;
203 line_stream >> tmp_data;
204 data.push_back(tmp_data);
205 }
206 }
207 auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data);
208 if (check_tensor == nullptr) {
209 MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name;
210 return RET_ERROR;
211 }
212 this->benchmark_tensor_names_.push_back(tensor_name);
213 this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor));
214 return RET_OK;
215 }
216
CompareStringData(const std::string & name,const std::vector<std::string> & calib_strings,const std::vector<std::string> & output_strings)217 int BenchmarkBase::CompareStringData(const std::string &name, const std::vector<std::string> &calib_strings,
218 const std::vector<std::string> &output_strings) {
219 size_t compare_num = std::min(calib_strings.size(), output_strings.size());
220 size_t print_num = std::min(compare_num, static_cast<size_t>(kNumPrintMin));
221
222 std::cout << "Data of node " << name << " : " << std::endl;
223 for (size_t i = 0; i < compare_num; i++) {
224 if (i < print_num) {
225 std::cout << " " << output_strings[i] << std::endl;
226 }
227 if (calib_strings[i] != output_strings[i]) {
228 MS_LOG(ERROR) << "Compare failed, index: " << i;
229 std::cerr << "Compare failed, index: " << i << std::endl;
230 return RET_ERROR;
231 }
232 }
233 return RET_OK;
234 }
235
InitInputDataList()236 void BenchmarkFlags::InitInputDataList() {
237 if (in_data_file_.empty()) {
238 input_data_list_ = {};
239 return;
240 }
241 std::regex re{"[\\s,]+"};
242 input_data_list_ = std::vector<std::string>{
243 std::sregex_token_iterator(in_data_file_.begin(), in_data_file_.end(), re, -1), std::sregex_token_iterator()};
244 }
245
ParseResizeDimsStringV2(const std::string & input_string,std::map<std::string,std::vector<int64_t>> * graph_input_shape_map)246 bool ParseResizeDimsStringV2(const std::string &input_string,
247 std::map<std::string, std::vector<int64_t>> *graph_input_shape_map) {
248 // define regex pattern that matches "inTensor1:1,32,32,32;inTensor2:1,1,32,32,4"
249 std::regex pattern("^(\\w+:\\d+(?:,\\d+)*)(?:;(\\w+:\\d+(?:,\\d+)*))*;?$");
250 std::vector<int64_t> shape;
251 if (std::regex_match(input_string, pattern)) {
252 auto group_splits = lite::StrSplit(input_string, std::string(";"));
253 for (auto group : group_splits) {
254 if (group.empty()) {
255 continue;
256 }
257 shape.clear();
258 auto string_split = lite::StrSplit(group, std::string(":"));
259 auto name = string_split[0];
260 for (size_t i = 1; i < string_split.size() - 1; ++i) {
261 name += ":" + string_split[i];
262 }
263 auto dim_strs = string_split[string_split.size() - 1];
264 auto dims = lite::StrSplit(dim_strs, std::string(","));
265 for (const auto &dim : dims) {
266 int64_t dim_value;
267 // try {
268 dim_value = std::stoi(dim);
269 // } catch (const std::exception &e) {
270 // MS_LOG(ERROR) << "Get dim failed: " << e.what();
271 // return false;
272 // }
273 shape.push_back(dim_value);
274 }
275 if ((*graph_input_shape_map).find(name) != (*graph_input_shape_map).end()) {
276 MS_LOG(ERROR) << "The input shape string is not valid, found duplicate name [" << name << "].";
277 return false;
278 }
279 (*graph_input_shape_map)[name] = shape;
280 }
281 } else {
282 MS_LOG(ERROR) << "The input shape string is not valid, please specify input names followed by their shapes. Wrap "
283 "the whole string in "
284 "double-quotes(\"\"). e.g. "
285 "\"inTensor1:1,32,32,32;inTensor2:1,1,32,32,4\"";
286 return false;
287 }
288 return !(*graph_input_shape_map).empty();
289 }
290
InitResizeDimsList()291 int BenchmarkFlags::InitResizeDimsList() {
292 std::string content = this->resize_dims_in_;
293 std::string content_v2 = this->resize_dims_in_v2_;
294 if (!content_v2.empty()) { // use newer flag "--inputShape" first
295 std::map<std::string, std::vector<int64_t>> graph_input_shape_map;
296 bool success = ParseResizeDimsStringV2(content_v2, &graph_input_shape_map);
297 if (!success) {
298 MS_LOG(ERROR) << "Parse inputShape string \"" << content_v2 << "\" failed, please check input.";
299 return RET_PARAM_INVALID;
300 }
301 this->graph_input_shape_map_ = graph_input_shape_map;
302 std::vector<int> shape;
303 for (const auto &group : graph_input_shape_map) {
304 shape.clear();
305 std::cout << "Resize Dims: ";
306 for (int number : group.second) {
307 std::cout << number << " ";
308 shape.emplace_back(number);
309 }
310 std::cout << std::endl;
311 this->resize_dims_.emplace_back(shape);
312 }
313 } else {
314 if (content.empty()) {
315 return RET_OK;
316 }
317 MS_LOG(WARNING) << "The --inputShapes flag has been deprecated, the replacement flag is --inputShape. Please "
318 "update your command line usage.";
319 std::vector<int> shape;
320 auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
321 for (const auto &shape_str : shape_strs) {
322 shape.clear();
323 auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
324 std::cout << "Resize Dims: ";
325 for (const auto &dim_str : dim_strs) {
326 std::cout << dim_str << " ";
327 shape.emplace_back(static_cast<int>(std::stoi(dim_str)));
328 }
329 std::cout << std::endl;
330 this->resize_dims_.emplace_back(shape);
331 }
332 }
333 return RET_OK;
334 }
335
InitCoreList()336 void BenchmarkFlags::InitCoreList() {
337 std::string core_list_str = this->core_list_str_;
338 if (core_list_str.empty()) {
339 return;
340 }
341 auto core_ids = StrSplit(core_list_str, std::string(DELIM_COMMA));
342 std::cout << "core list: ";
343 for (const auto &core_id : core_ids) {
344 std::cout << core_id << " ";
345 this->core_list_.emplace_back(static_cast<int>(std::stoi(core_id)));
346 }
347 std::cout << std::endl;
348 }
349
CheckModelValid()350 int BenchmarkBase::CheckModelValid() {
351 this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary;
352
353 if (!flags_->benchmark_data_type_.empty()) {
354 if (data_type_map_.find(flags_->benchmark_data_type_) == data_type_map_.end()) {
355 MS_LOG(ERROR) << "CalibDataType not supported: " << flags_->benchmark_data_type_.c_str();
356 return RET_ERROR;
357 }
358 msCalibDataType = data_type_map_.at(flags_->benchmark_data_type_);
359 MS_LOG(INFO) << "CalibDataType = " << flags_->benchmark_data_type_.c_str();
360 std::cout << "CalibDataType = " << flags_->benchmark_data_type_.c_str() << std::endl;
361 }
362
363 if (flags_->model_file_.empty()) {
364 MS_LOG(ERROR) << "modelPath is required";
365 std::cerr << "modelPath is required" << std::endl;
366 return RET_ERROR;
367 }
368
369 if (ModelTypeMap.find(flags_->model_type_) == ModelTypeMap.end()) {
370 MS_LOG(ERROR) << "Invalid model type: " << flags_->model_type_;
371 std::cerr << "Invalid model type: " << flags_->model_type_ << std::endl;
372 return RET_ERROR;
373 }
374 return RET_OK;
375 }
376
CheckThreadNumValid()377 int BenchmarkBase::CheckThreadNumValid() {
378 if (this->flags_->num_threads_ < kThreadNumMin) {
379 MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0";
380 std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl;
381 return RET_ERROR;
382 }
383
384 if (flags_->enable_parallel_) {
385 if (flags_->num_threads_ < kParallelThreadNumMin) {
386 MS_LOG(ERROR) << "enable parallel need more than 1 thread.";
387 std::cerr << "enable parallel need more than 1 thread." << std::endl;
388 return RET_ERROR;
389 }
390 }
391 return RET_OK;
392 }
393
CheckDeviceTypeValid()394 int BenchmarkBase::CheckDeviceTypeValid() {
395 if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU" &&
396 flags_->device_ != "Ascend" && flags_->device_ != "NNRT") {
397 MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported.";
398 std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl;
399 return RET_ERROR;
400 }
401 return RET_OK;
402 }
403
InitDumpConfigFromJson(const char * path)404 int BenchmarkBase::InitDumpConfigFromJson(const char *path) {
405 #ifndef BENCHMARK_CLIP_JSON
406 auto real_path = RealPath(path);
407 std::ifstream ifs(real_path);
408 if (!ifs.good()) {
409 MS_LOG(ERROR) << "file: " << real_path << " is not exist";
410 return RET_ERROR;
411 }
412 if (!ifs.is_open()) {
413 MS_LOG(ERROR) << "file: " << real_path << " open failed";
414 return RET_ERROR;
415 }
416
417 // try {
418 dump_cfg_json_ = nlohmann::json::parse(ifs);
419 // } catch (const nlohmann::json::parse_error &error) {
420 // MS_LOG(ERROR) << "parse json file failed, please check your file.";
421 // return RET_ERROR;
422 // }
423 if (dump_cfg_json_[dump::kSettings] == nullptr) {
424 MS_LOG(ERROR) << "\"common_dump_settings\" is required.";
425 return RET_ERROR;
426 }
427 if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) {
428 MS_LOG(ERROR) << "\"dump_mode\" is required.";
429 return RET_ERROR;
430 }
431 if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) {
432 MS_LOG(ERROR) << "\"path\" is required.";
433 return RET_ERROR;
434 }
435 if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) {
436 dump_cfg_json_[dump::kSettings][dump::kNetName] = "default";
437 }
438 if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) {
439 dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0;
440 }
441 if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr &&
442 !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) {
443 if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) {
444 MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)";
445 return RET_ERROR;
446 }
447 }
448
449 auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get<std::string>();
450 auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get<std::string>();
451 if (abs_path.back() == '\\' || abs_path.back() == '/') {
452 dump_file_output_dir_ = abs_path + net_name;
453 } else {
454 #ifdef _WIN32
455 dump_file_output_dir_ = abs_path + "\\" + net_name;
456 #else
457 dump_file_output_dir_ = abs_path + "/" + net_name;
458 #endif
459 }
460
461 auto status = CreateOutputDir(&dump_file_output_dir_);
462 if (status != RET_OK) {
463 MS_LOG(ERROR) << "create data output directory failed.";
464 return RET_ERROR;
465 }
466 #endif
467 return RET_OK;
468 }
469
InitCallbackParameter()470 int BenchmarkBase::InitCallbackParameter() {
471 int ret = RET_OK;
472 if (flags_->time_profiling_) {
473 ret = InitTimeProfilingCallbackParameter();
474 } else if (flags_->perf_profiling_) {
475 ret = InitPerfProfilingCallbackParameter();
476 } else if (flags_->print_tensor_data_) {
477 ret = InitPrintTensorDataCallbackParameter();
478 } else if (flags_->dump_tensor_data_) {
479 ret = InitDumpTensorDataCallbackParameter();
480 }
481 return ret;
482 }
483
Init()484 int BenchmarkBase::Init() {
485 MS_CHECK_FALSE(this->flags_ == nullptr, RET_ERROR);
486 MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_;
487 MS_LOG(INFO) << "ModelType = " << this->flags_->model_type_;
488 MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_;
489 MS_LOG(INFO) << "GroupInfoFile = " << this->flags_->group_info_file_;
490 MS_LOG(INFO) << "ConfigFilePath = " << this->flags_->config_file_;
491 MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_;
492 MS_LOG(INFO) << "LoopCount = " << this->flags_->loop_count_;
493 MS_LOG(INFO) << "DeviceType = " << this->flags_->device_;
494 MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_;
495 MS_LOG(INFO) << "CosineDistanceThreshold = " << this->flags_->cosine_distance_threshold_;
496 MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_;
497 MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_;
498 MS_LOG(INFO) << "InterOpParallelNum = " << this->flags_->inter_op_parallel_num_;
499 MS_LOG(INFO) << "Fp16Priority = " << this->flags_->enable_fp16_;
500 MS_LOG(INFO) << "EnableParallel = " << this->flags_->enable_parallel_;
501 MS_LOG(INFO) << "calibDataPath = " << this->flags_->benchmark_data_file_;
502 MS_LOG(INFO) << "EnableGLTexture = " << this->flags_->enable_gl_texture_;
503
504 std::cout << "ModelPath = " << this->flags_->model_file_ << std::endl;
505 std::cout << "ModelType = " << this->flags_->model_type_ << std::endl;
506 std::cout << "InDataPath = " << this->flags_->in_data_file_ << std::endl;
507 std::cout << "GroupInfoFile = " << this->flags_->group_info_file_ << std::endl;
508 std::cout << "ConfigFilePath = " << this->flags_->config_file_ << std::endl;
509 std::cout << "InDataType = " << this->flags_->in_data_type_in_ << std::endl;
510 std::cout << "LoopCount = " << this->flags_->loop_count_ << std::endl;
511 std::cout << "DeviceType = " << this->flags_->device_ << std::endl;
512 std::cout << "AccuracyThreshold = " << this->flags_->accuracy_threshold_ << std::endl;
513 std::cout << "CosineDistanceThreshold = " << this->flags_->cosine_distance_threshold_ << std::endl;
514 std::cout << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_ << std::endl;
515 std::cout << "NumThreads = " << this->flags_->num_threads_ << std::endl;
516 std::cout << "InterOpParallelNum = " << this->flags_->inter_op_parallel_num_ << std::endl;
517 std::cout << "Fp16Priority = " << this->flags_->enable_fp16_ << std::endl;
518 std::cout << "EnableParallel = " << this->flags_->enable_parallel_ << std::endl;
519 std::cout << "calibDataPath = " << this->flags_->benchmark_data_file_ << std::endl;
520 std::cout << "EnableGLTexture = " << this->flags_->enable_gl_texture_ << std::endl;
521 if (this->flags_->loop_count_ < 1) {
522 MS_LOG(ERROR) << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0";
523 std::cerr << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0" << std::endl;
524 return RET_ERROR;
525 }
526
527 if (this->flags_->enable_gl_texture_ == true && this->flags_->device_ != "GPU") {
528 MS_LOG(ERROR) << "device must be GPU if you want to enable GLTexture";
529 std::cerr << "ERROR: device must be GPU if you want to enable GLTexture" << std::endl;
530 return RET_ERROR;
531 }
532
533 auto thread_ret = CheckThreadNumValid();
534 if (thread_ret != RET_OK) {
535 MS_LOG(ERROR) << "Invalid numThreads.";
536 std::cerr << "Invalid numThreads." << std::endl;
537 return RET_ERROR;
538 }
539
540 static std::vector<std::string> CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"};
541 if (this->flags_->cpu_bind_mode_ >= 1) {
542 MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_];
543 std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl;
544 } else {
545 MS_LOG(INFO) << "cpuBindMode = NO_BIND";
546 std::cout << "cpuBindMode = NO_BIND" << std::endl;
547 }
548
549 auto model_ret = CheckModelValid();
550 if (model_ret != RET_OK) {
551 MS_LOG(ERROR) << "Invalid Model File.";
552 std::cerr << "Invalid Model File." << std::endl;
553 return RET_ERROR;
554 }
555
556 flags_->InitInputDataList();
557 flags_->InitCoreList();
558 MS_CHECK_FALSE_MSG(flags_->InitResizeDimsList() != RET_OK, RET_ERROR, "init resize dims failed.");
559 if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() &&
560 flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
561 MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
562 std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
563 return RET_ERROR;
564 }
565
566 if (CheckDeviceTypeValid() != RET_OK) {
567 MS_LOG(ERROR) << "Device type is invalid.";
568 return RET_ERROR;
569 }
570
571 if (flags_->time_profiling_ && flags_->perf_profiling_) {
572 MS_LOG(INFO) << "time_profiling is enabled, will not run perf_profiling.";
573 }
574
575 // get dump data output path
576 auto dump_cfg_path = std::getenv(dump::kConfigPath);
577 if (dump_cfg_path != nullptr) {
578 flags_->dump_tensor_data_ = true;
579 if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) {
580 MS_LOG(ERROR) << "parse dump config file failed.";
581 return RET_ERROR;
582 }
583 } else {
584 MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data";
585 }
586
587 auto status = InitCallbackParameter();
588 if (status != RET_OK) {
589 MS_LOG(ERROR) << "Init callback Parameter failed.";
590 std::cerr << "Init callback Parameter failed." << std::endl;
591 return RET_ERROR;
592 }
593
594 return RET_OK;
595 }
596
PrintResult(const std::vector<std::string> & title,const std::map<std::string,std::pair<int,float>> & result)597 int BenchmarkBase::PrintResult(const std::vector<std::string> &title,
598 const std::map<std::string, std::pair<int, float>> &result) {
599 std::vector<size_t> columnLenMax(kPrintColNum);
600 std::vector<std::vector<std::string>> rows;
601
602 for (auto &iter : result) {
603 char stringBuf[kPrintColNum][kPrintRowLenMax] = {};
604 std::vector<std::string> columns;
605 size_t len = 0;
606 int index = 0;
607 len = iter.first.size();
608 if (len > columnLenMax.at(index)) {
609 columnLenMax.at(index) = len + kColumnLen;
610 }
611 columns.push_back(iter.first);
612
613 index++;
614 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f",
615 iter.second.second / static_cast<float>(flags_->loop_count_));
616 if (len > columnLenMax.at(index)) {
617 columnLenMax.at(index) = len + kColumnLen;
618 }
619 columns.emplace_back(stringBuf[index]);
620
621 index++;
622 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second / op_cost_total_);
623 if (len > columnLenMax.at(index)) {
624 columnLenMax.at(index) = len + kColumnLen;
625 }
626 columns.emplace_back(stringBuf[index]);
627
628 index++;
629 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%d", iter.second.first);
630 if (len > columnLenMax.at(index)) {
631 columnLenMax.at(index) = len + kColumnLen;
632 }
633 columns.emplace_back(stringBuf[index]);
634
635 index++;
636 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second);
637 if (len > columnLenMax.at(index)) {
638 columnLenMax.at(index) = len + kColumnLen;
639 }
640 columns.emplace_back(stringBuf[index]);
641
642 rows.push_back(columns);
643 }
644
645 printf("-------------------------------------------------------------------------\n");
646 for (int i = 0; i < kPrintColNum; i++) {
647 auto printBuf = title[i];
648 if (printBuf.size() > columnLenMax.at(i)) {
649 columnLenMax.at(i) = printBuf.size();
650 }
651 printBuf.resize(columnLenMax.at(i), ' ');
652 printf("%s\t", printBuf.c_str());
653 }
654 printf("\n");
655 for (auto &row : rows) {
656 for (int j = 0; j < kPrintColNum; j++) {
657 auto printBuf = row[j];
658 printBuf.resize(columnLenMax.at(j), ' ');
659 printf("%s\t", printBuf.c_str());
660 }
661 printf("\n");
662 }
663 return RET_OK;
664 }
665
666 #ifdef ENABLE_ARM64
PrintPerfResult(const std::vector<std::string> & title,const std::map<std::string,std::pair<int,struct PerfCount>> & result)667 int BenchmarkBase::PrintPerfResult(const std::vector<std::string> &title,
668 const std::map<std::string, std::pair<int, struct PerfCount>> &result) {
669 std::vector<size_t> columnLenMax(kPrintColNum);
670 std::vector<std::vector<std::string>> rows;
671
672 for (auto &iter : result) {
673 char stringBuf[kPrintColNum][kPrintRowLenMax] = {};
674 std::vector<std::string> columns;
675 size_t len = 0;
676 int index = 0;
677 len = iter.first.size();
678 if (len > columnLenMax.at(index)) {
679 columnLenMax.at(index) = len + kColumnLen;
680 }
681 columns.push_back(iter.first);
682 index++;
683 float tmp = float_t(flags_->num_threads_) * iter.second.second.value[0] / float_t(flags_->loop_count_) / kFloatMSEC;
684 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%.2f", tmp);
685 if (len > columnLenMax.at(index)) {
686 columnLenMax.at(index) = len + kColumnLen;
687 }
688 columns.emplace_back(stringBuf[index]);
689 index++;
690 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second.value[0] / op_cost_total_);
691 if (len > columnLenMax.at(index)) {
692 columnLenMax.at(index) = len + kColumnLen;
693 }
694 columns.emplace_back(stringBuf[index]);
695
696 index++;
697 tmp = float_t(flags_->num_threads_) * iter.second.second.value[1] / float_t(flags_->loop_count_) / kFloatMSEC;
698 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%.2f", tmp);
699 if (len > columnLenMax.at(index)) {
700 columnLenMax.at(index) = len + kColumnLen;
701 }
702 columns.emplace_back(stringBuf[index]);
703
704 index++;
705 len = snprintf(stringBuf[index], sizeof(stringBuf[index]), "%f", iter.second.second.value[1] / op_cost2_total_);
706 if (len > columnLenMax.at(index)) {
707 columnLenMax.at(index) = len + kColumnLen;
708 }
709 columns.emplace_back(stringBuf[index]);
710
711 rows.push_back(columns);
712 }
713
714 printf("-------------------------------------------------------------------------\n");
715 for (int i = 0; i < kPrintColNum; i++) {
716 auto printBuf = title[i];
717 if (printBuf.size() > columnLenMax.at(i)) {
718 columnLenMax.at(i) = printBuf.size();
719 }
720 printBuf.resize(columnLenMax.at(i), ' ');
721 printf("%s\t", printBuf.c_str());
722 }
723 printf("\n");
724 for (auto &row : rows) {
725 for (int j = 0; j < kPrintColNum; j++) {
726 auto printBuf = row[j];
727 printBuf.resize(columnLenMax.at(j), ' ');
728 printf("%s\t", printBuf.c_str());
729 }
730 printf("\n");
731 }
732 return RET_OK;
733 }
734 #endif
735
736 #ifdef SUPPORT_NNIE
SvpSysInit()737 int SvpSysInit() {
738 HI_S32 ret = HI_SUCCESS;
739 VB_CONFIG_S struVbConf;
740 ret = HI_MPI_SYS_Exit();
741 if (ret != HI_SUCCESS) {
742 MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
743 return RET_ERROR;
744 }
745
746 ret = HI_MPI_VB_Exit();
747 if (ret != HI_SUCCESS) {
748 MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
749 ret = HI_MPI_SYS_Init();
750 if (ret != HI_SUCCESS) {
751 MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
752 return RET_ERROR;
753 }
754 return RET_OK;
755 }
756
757 memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
758 struVbConf.u32MaxPoolCnt = kNNIEMaxPoolCnt;
759 struVbConf.astCommPool[1].u64BlkSize = kNNIEBlkSize;
760 struVbConf.astCommPool[1].u32BlkCnt = 1;
761
762 ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
763 if (ret != HI_SUCCESS) {
764 MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!";
765 return RET_ERROR;
766 }
767
768 ret = HI_MPI_VB_Init();
769 if (ret != HI_SUCCESS) {
770 MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!";
771 return RET_ERROR;
772 }
773
774 ret = HI_MPI_SYS_Init();
775 if (ret != HI_SUCCESS) {
776 MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
777 return RET_ERROR;
778 }
779
780 return RET_OK;
781 }
782
SvpSysExit()783 int SvpSysExit() {
784 HI_S32 ret = HI_SUCCESS;
785
786 ret = HI_MPI_SYS_Exit();
787 if (ret != HI_SUCCESS) {
788 MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
789 return RET_ERROR;
790 }
791
792 ret = HI_MPI_VB_Exit();
793 if (ret != HI_SUCCESS) {
794 MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
795 return RET_OK;
796 }
797
798 return RET_OK;
799 }
800 #endif
801
~BenchmarkBase()802 BenchmarkBase::~BenchmarkBase() {
803 for (auto &iter : this->benchmark_data_) {
804 iter.second->shape.clear();
805 iter.second->data.clear();
806 delete iter.second;
807 iter.second = nullptr;
808 }
809 this->benchmark_data_.clear();
810 #ifdef SUPPORT_NNIE
811 SvpSysExit();
812 #endif
813 }
814 } // namespace lite
815 } // namespace mindspore
816