1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_BASE_H_ 18 #define MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_BASE_H_ 19 20 #include <signal.h> 21 #include <random> 22 #include <unordered_map> 23 #include <fstream> 24 #include <iostream> 25 #include <map> 26 #include <cmath> 27 #include <string> 28 #include <vector> 29 #include <memory> 30 #include <cfloat> 31 #include <utility> 32 #include <mutex> 33 #ifndef BENCHMARK_CLIP_JSON 34 #include <nlohmann/json.hpp> 35 #endif 36 #include "include/model.h" 37 #include "include/api/types.h" 38 #include "include/api/format.h" 39 #include "tools/common/flag_parser.h" 40 #include "src/common/file_utils.h" 41 #include "src/common/utils.h" 42 #include "ir/dtype/type_id.h" 43 #include "schema/model_generated.h" 44 #include "nnacl/op_base.h" 45 46 namespace mindspore::lite { 47 #define BENCHMARK_LOG_ERROR(str) \ 48 do { \ 49 MS_LOG(ERROR) << str; \ 50 std::cerr << str << std::endl; \ 51 } while (0); 52 53 enum MS_API InDataType { kImage = 0, kBinary = 1 }; 54 55 enum MS_API AiModelDescription_Frequency { 56 AiModelDescription_Frequency_LOW = 1, 57 AiModelDescription_Frequency_MEDIUM = 2, 58 AiModelDescription_Frequency_HIGH = 3, 59 AiModelDescription_Frequency_EXTREME = 4 60 }; 61 62 enum MS_API DumpMode { DUMP_MODE_ALL = 0, DUMP_MODE_INPUT = 1, DUMP_MODE_OUTPUT = 2 }; 63 64 constexpr float kRelativeTolerance = 1e-5; 65 constexpr float kAbsoluteTolerance = 1e-8; 66 constexpr int CosineErrMaxVal = 2; 67 constexpr float kFloatMSEC = 1000.0f; 68 69 constexpr int kNumPrintMin = 5; 70 constexpr const char *DELIM_COLON = ":"; 71 constexpr const char *DELIM_COMMA = ","; 72 constexpr const char *DELIM_SLASH = "/"; 73 constexpr size_t kEncMaxLen = 16; 74 75 extern const std::unordered_map<int, std::string> kTypeIdMap; 76 extern const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap; 77 78 const std::unordered_map<std::string, mindspore::ModelType> ModelTypeMap{ 79 {"MindIR_Lite", mindspore::ModelType::kMindIR_Lite}, {"MindIR", mindspore::ModelType::kMindIR}}; 80 81 namespace dump { 82 constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG"; 83 constexpr auto kSettings = "common_dump_settings"; 84 constexpr auto kMode = "dump_mode"; 85 constexpr auto kPath = "path"; 86 constexpr auto kNetName = "net_name"; 87 constexpr auto kInputOutput = "input_output"; 88 constexpr auto kKernels = "kernels"; 89 } // namespace dump 90 91 #ifdef ENABLE_ARM64 92 struct PerfResult { 93 int64_t nr; 94 struct { 95 int64_t value; 96 int64_t id; 97 } values[2]; 98 }; 99 struct PerfCount { 100 int64_t value[2]; 101 }; 102 #endif 103 104 struct MS_API CheckTensor { 105 CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data, 106 const std::vector<std::string> &strings_data = {""}) { 107 this->shape = shape; 108 this->data = data; 109 this->strings_data = strings_data; 110 } 111 std::vector<size_t> shape; 112 std::vector<float> data; 113 std::vector<std::string> strings_data; 114 }; 115 116 class MS_API BenchmarkFlags : public virtual FlagParser { 117 public: BenchmarkFlags()118 BenchmarkFlags() { 119 // common 120 AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", ""); 121 AddFlag(&BenchmarkFlags::model_type_, "modelType", "Input model type. MindIR | MindIR_Lite", "MindIR"); 122 AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); 123 AddFlag(&BenchmarkFlags::group_info_file_, "GroupInfoFile", "Communication group info file", ""); 124 AddFlag(&BenchmarkFlags::config_file_, "configFile", "Config file", ""); 125 AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend | NNRT | Auto", "CPU"); 126 AddFlag(&BenchmarkFlags::provider_, "provider", "device provider litert | tensorrt | mindrt", "litert"); 127 AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU.", 1); 128 // MarkPerformance 129 AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); 130 AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); 131 AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false); 132 AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false); 133 AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3); 134 AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false); 135 AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling", 136 "Perf event profiling(only instructions statics enabled currently)", false); 137 AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE"); 138 // MarkAccuracy 139 AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", ""); 140 AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType", 141 "Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT"); 142 AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); 143 AddFlag(&BenchmarkFlags::cosine_distance_threshold_, "cosineDistanceThreshold", "cosine distance threshold", -1.1); 144 AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes", 145 "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", ""); 146 AddFlag(&BenchmarkFlags::resize_dims_in_v2_, "inputShape", 147 "Shape of input data. Specify input names followed by their shapes. Wrap the whole string in " 148 "double-quotes(\"\"). e.g. " 149 "\"inTensor1:1,32,32,32;inTensor2:1,1,32,32,4\"", 150 ""); 151 #ifdef ENABLE_CLOUD_FUSION_INFERENCE 152 // Distributed Infer 153 AddFlag(&BenchmarkFlags::device_id_, "deviceId", "Set device id for distributed inference", -1); 154 AddFlag(&BenchmarkFlags::rank_id_, "rankId", "Set rank id for distributed inference", -1); 155 #endif 156 // Decrypt and Crypte 157 AddFlag(&BenchmarkFlags::decrypt_key_str_, "decryptKey", 158 "The key used to decrypt the file, expressed in hexadecimal characters. Only support AES-GCM and the key " 159 "length is 16.", 160 ""); 161 AddFlag(&BenchmarkFlags::crypto_lib_path_, "cryptoLibPath", "Pass the crypto library path.", ""); 162 AddFlag(&BenchmarkFlags::enable_parallel_predict_, "enableParallelPredict", "Enable model parallel : true | false", 163 false); 164 AddFlag(&BenchmarkFlags::parallel_num_, "parallelNum", "parallel num of parallel predict", 2); 165 AddFlag(&BenchmarkFlags::parallel_task_num_, "parallelTaskNum", 166 "parallel task num of parallel predict, unlimited number of tasks when the value is -1", 2); 167 AddFlag(&BenchmarkFlags::workers_num_, "workersNum", "works num of parallel predict", 2); 168 AddFlag(&BenchmarkFlags::core_list_str_, "cpuCoreList", "The core id of the bundled core, e.g. 0,1,2,3", ""); 169 AddFlag(&BenchmarkFlags::inter_op_parallel_num_, "interOpParallelNum", "parallel number of operators in predict", 170 1); 171 AddFlag(&BenchmarkFlags::enable_gl_texture_, "enableGLTexture", "Enable GlTexture2D", false); 172 AddFlag(&BenchmarkFlags::delegate_mode_, "delegateMode", "set the delegate mode: CoreML | NNAPI", ""); 173 AddFlag(&BenchmarkFlags::enable_shared_thread_pool_, "enableSharedThreadPool", "Enable shared thread pool", false); 174 AddFlag(&BenchmarkFlags::thread_num_limit_per_worker_, "threadNumLimitPerWorker", "thread num limit per worker ", 175 ""); 176 AddFlag(&BenchmarkFlags::thread_num_remaining_per_worker_, "threadNumRemainingPerWorker", 177 "thread num limit per worker ", ""); 178 } 179 180 ~BenchmarkFlags() override = default; 181 182 void InitInputDataList(); 183 184 int InitResizeDimsList(); 185 186 void InitCoreList(); 187 188 public: 189 // common 190 bool enable_parallel_predict_ = false; 191 int parallel_num_ = 2; 192 int parallel_task_num_ = 2; 193 int inter_op_parallel_num_ = 1; 194 int workers_num_ = 2; 195 std::string model_file_; 196 std::string in_data_file_; 197 std::string group_info_file_; 198 std::string config_file_; 199 std::string model_type_; 200 std::vector<std::string> input_data_list_; 201 InDataType in_data_type_ = kBinary; 202 std::string in_data_type_in_ = "bin"; 203 int cpu_bind_mode_ = 1; 204 std::string core_list_str_; 205 std::vector<int> core_list_; 206 // MarkPerformance 207 int loop_count_ = 10; 208 int num_threads_ = 2; 209 bool enable_fp16_ = false; 210 bool enable_gl_texture_ = false; 211 bool enable_parallel_ = false; 212 int warm_up_loop_count_ = 3; 213 // MarkAccuracy 214 std::string benchmark_data_file_; 215 std::string benchmark_data_type_ = "FLOAT"; 216 float accuracy_threshold_ = 0.5; 217 float cosine_distance_threshold_ = -1.1; 218 // Resize 219 std::string resize_dims_in_; 220 std::string resize_dims_in_v2_; 221 std::map<std::string, std::vector<int64_t>> graph_input_shape_map_; 222 std::vector<std::vector<int>> resize_dims_; 223 // Distributed Infer 224 int device_id_; 225 int rank_id_; 226 227 std::string device_ = "CPU"; 228 std::string provider_ = "litert"; 229 bool time_profiling_ = false; 230 bool perf_profiling_ = false; 231 std::string perf_event_ = "CYCLE"; 232 bool dump_tensor_data_ = false; 233 bool print_tensor_data_ = false; 234 std::string decrypt_key_str_; 235 std::string dec_mode_ = "AES-GCM"; 236 std::string crypto_lib_path_; 237 std::string delegate_mode_; 238 bool enable_shared_thread_pool_ = false; 239 std::string thread_num_limit_per_worker_; 240 std::string thread_num_remaining_per_worker_; 241 }; 242 243 class MS_API BenchmarkBase { 244 public: BenchmarkBase(BenchmarkFlags * flags)245 explicit BenchmarkBase(BenchmarkFlags *flags) : flags_(flags) {} 246 247 virtual ~BenchmarkBase(); 248 249 int Init(); 250 virtual int RunBenchmark() = 0; 251 252 protected: 253 virtual int LoadInput() = 0; 254 255 virtual int GenerateInputData() = 0; 256 257 int GenerateRandomData(size_t size, void *data, int data_type); 258 259 virtual int ReadInputFile() = 0; 260 261 int ReadCalibData(); 262 263 int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector<size_t> &dims); 264 265 virtual int GetDataTypeByTensorName(const std::string &tensor_name) = 0; 266 267 virtual int CompareOutput() = 0; 268 269 int CompareStringData(const std::string &name, const std::vector<std::string> &calib_strings, 270 const std::vector<std::string> &output_strings); 271 272 int InitDumpConfigFromJson(const char *path); 273 274 int InitCallbackParameter(); 275 276 virtual int InitTimeProfilingCallbackParameter() = 0; 277 278 virtual int InitPerfProfilingCallbackParameter() = 0; 279 280 virtual int InitDumpTensorDataCallbackParameter() = 0; 281 282 virtual int InitPrintTensorDataCallbackParameter() = 0; 283 284 int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result); 285 286 #ifdef ENABLE_ARM64 287 int PrintPerfResult(const std::vector<std::string> &title, 288 const std::map<std::string, std::pair<int, struct PerfCount>> &result); 289 #endif 290 291 // tensorData need to be converter first 292 template <typename T, typename ST> 293 float CompareData(const std::string &nodeName, const std::vector<ST> &msShape, const void *tensor_data, 294 float relativeTolerance = kRelativeTolerance, float absoluteTolerance = kAbsoluteTolerance) { 295 const T *msTensorData = static_cast<const T *>(tensor_data); 296 auto iter = this->benchmark_data_.find(nodeName); 297 if (iter != this->benchmark_data_.end()) { 298 std::vector<size_t> castedMSShape; 299 size_t shapeSize = 1; 300 for (ST dim : msShape) { 301 if (dim <= 0) { 302 MS_LOG(ERROR) << "Invalid shape."; 303 return RET_ERROR; 304 } 305 castedMSShape.push_back(dim); 306 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(shapeSize, static_cast<size_t>(dim)), RET_ERROR, "mul overflow"); 307 shapeSize *= static_cast<size_t>(dim); 308 } 309 310 CheckTensor *calibTensor = iter->second; 311 if (calibTensor == nullptr) { 312 MS_LOG(ERROR) << "calibTensor is nullptr."; 313 return RET_ERROR; 314 } 315 if (!CheckShapeValid(calibTensor->shape, castedMSShape)) { 316 std::ostringstream oss; 317 oss << "Shape of mslite output("; 318 for (auto dim : castedMSShape) { 319 oss << dim << ","; 320 } 321 oss << ") and shape source model output("; 322 for (auto dim : calibTensor->shape) { 323 oss << dim << ","; 324 } 325 oss << ") are different"; 326 std::cerr << oss.str() << std::endl; 327 MS_LOG(ERROR) << oss.str().c_str(); 328 return RET_ERROR; 329 } 330 if (calibTensor->data.size() != shapeSize) { 331 MS_LOG(ERROR) << "calibTensor data size " << calibTensor->data.size() << " is not equal to " << shapeSize; 332 return RET_ERROR; 333 } 334 size_t errorCount = 0; 335 float meanError = 0; 336 std::cout << "Data of node " << nodeName << " : "; 337 for (size_t j = 0; j < shapeSize; j++) { 338 if (j < 50) { 339 std::cout << static_cast<float>(msTensorData[j]) << " "; 340 } 341 342 if (std::is_same<T, float>::value && (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j]))) { 343 std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; 344 MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; 345 return RET_ERROR; 346 } 347 348 auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j)); 349 auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j)); 350 if (absoluteError > tolerance) { 351 if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) { 352 if (absoluteError > 1e-5) { 353 meanError += absoluteError; 354 errorCount++; 355 } else { 356 continue; 357 } 358 } else { 359 // just assume that atol = rtol 360 meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN); 361 errorCount++; 362 } 363 } 364 } 365 std::cout << std::endl; 366 if (meanError > 0.0f) { 367 meanError /= errorCount; 368 } 369 370 if (meanError <= 0.0000001) { 371 std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl; 372 } else { 373 std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl; 374 } 375 return meanError; 376 } else { 377 MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str(); 378 return RET_ERROR; 379 } 380 } 381 GetMeanError(double sum_a,double sum_b,double dot_sum,float * mean_error)382 void GetMeanError(double sum_a, double sum_b, double dot_sum, float *mean_error) { 383 if (fabs(sum_a) < DBL_EPSILON && fabs(sum_b) < FLT_EPSILON) { 384 *mean_error = 1; 385 } else if (fabs(sum_a * sum_b) < DBL_EPSILON) { 386 if (fabs(sum_a) < FLT_EPSILON || fabs(sum_b) < FLT_EPSILON) { 387 *mean_error = 1; 388 } else { 389 *mean_error = 0; 390 } 391 } else { 392 *mean_error = dot_sum / (sqrt(sum_a) * sqrt(sum_b)); 393 } 394 } 395 396 // tensorData need to be converter first 397 template <typename T, typename ST> CompareDatabyCosineDistance(const std::string & nodeName,const std::vector<ST> & msShape,const void * tensor_data,float * mean_error)398 int CompareDatabyCosineDistance(const std::string &nodeName, const std::vector<ST> &msShape, const void *tensor_data, 399 float *mean_error) { 400 if (mean_error == nullptr) { 401 MS_LOG(ERROR) << "mean_error is nullptr"; 402 return RET_ERROR; 403 } 404 if (tensor_data == nullptr) { 405 MS_LOG(ERROR) << "tensor_data is nullptr"; 406 return RET_ERROR; 407 } 408 const T *msTensorData = static_cast<const T *>(tensor_data); 409 auto iter = this->benchmark_data_.find(nodeName); 410 if (iter != this->benchmark_data_.end()) { 411 std::vector<size_t> castedMSShape; 412 size_t shapeSize = 1; 413 for (int64_t dim : msShape) { 414 if (dim <= 0) { 415 MS_LOG(ERROR) << "Invalid shape."; 416 return RET_ERROR; 417 } 418 castedMSShape.push_back(size_t(dim)); 419 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(shapeSize, static_cast<size_t>(dim)), RET_ERROR, "mul overflow"); 420 shapeSize *= static_cast<size_t>(dim); 421 } 422 423 CheckTensor *calibTensor = iter->second; 424 if (calibTensor == nullptr) { 425 MS_LOG(ERROR) << "calibTensor is nullptr."; 426 return RET_ERROR; 427 } 428 if (!CheckShapeValid(calibTensor->shape, castedMSShape)) { 429 std::ostringstream oss; 430 oss << "Shape of mslite output("; 431 for (auto dim : castedMSShape) { 432 oss << dim << ","; 433 } 434 oss << ") and shape source model output("; 435 for (auto dim : calibTensor->shape) { 436 oss << dim << ","; 437 } 438 oss << ") are different"; 439 std::cerr << oss.str() << std::endl; 440 MS_LOG(ERROR) << oss.str().c_str(); 441 return RET_ERROR; 442 } 443 if (calibTensor->data.size() != shapeSize) { 444 MS_LOG(ERROR) << "calibTensor data size " << calibTensor->data.size() << " is not equal to " << shapeSize; 445 return RET_ERROR; 446 } 447 448 double dot_sum = 0; 449 double sum_a = 0; 450 double sum_b = 0; 451 std::cout << "Data of node " << nodeName << " : "; 452 for (size_t j = 0; j < shapeSize; j++) { 453 if (j < 50) { 454 std::cout << static_cast<float>(msTensorData[j]) << " "; 455 } 456 457 if (std::is_same<T, float>::value && (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j]))) { 458 std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; 459 MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; 460 return RET_ERROR; 461 } 462 dot_sum += static_cast<double>(msTensorData[j]) * calibTensor->data.at(j); 463 sum_a += static_cast<double>(msTensorData[j]) * msTensorData[j]; 464 sum_b += static_cast<double>(calibTensor->data.at(j)) * calibTensor->data.at(j); 465 } 466 GetMeanError(sum_a, sum_b, dot_sum, mean_error); 467 std::cout << std::endl; 468 std::cout << "Mean cosine distance of node/tensor " << nodeName << " : " << (*mean_error) * 100 << "%" 469 << std::endl; 470 return RET_OK; 471 } else { 472 MS_LOG(ERROR) << "%s is not in Source Model output", nodeName.c_str(); 473 return RET_ERROR; 474 } 475 } 476 477 template <typename T, typename Distribution> FillInputData(size_t size,void * data,Distribution distribution)478 void FillInputData(size_t size, void *data, Distribution distribution) { 479 MS_ASSERT(data != nullptr); 480 size_t elements_num = size / sizeof(T); 481 (void)std::generate_n(static_cast<T *>(data), elements_num, 482 [&, this]() { return static_cast<T>(distribution(random_engine_)); }); 483 } 484 CheckShapeValid(const std::vector<size_t> & calib_output_shape,const std::vector<size_t> & real_output_shape)485 bool CheckShapeValid(const std::vector<size_t> &calib_output_shape, const std::vector<size_t> &real_output_shape) { 486 if (calib_output_shape == real_output_shape) { 487 return true; 488 } 489 // (1, 225) compare with (1, 225, 1, 1) return true 490 size_t min_size = 491 calib_output_shape.size() > real_output_shape.size() ? real_output_shape.size() : calib_output_shape.size(); 492 size_t i = 0; 493 for (i = 0; i < min_size; ++i) { 494 if (calib_output_shape[i] != real_output_shape[i]) { 495 return false; 496 } 497 } 498 while (i < calib_output_shape.size()) { 499 if (calib_output_shape[i] != 1) { 500 return false; 501 } 502 i++; 503 } 504 while (i < real_output_shape.size()) { 505 if (real_output_shape[i] != 1) { 506 return false; 507 } 508 i++; 509 } 510 return true; 511 } 512 513 int CheckThreadNumValid(); 514 515 int CheckModelValid(); 516 517 int CheckDeviceTypeValid(); 518 519 protected: 520 BenchmarkFlags *flags_; 521 std::vector<std::string> benchmark_tensor_names_; 522 std::unordered_map<std::string, CheckTensor *> benchmark_data_; 523 std::unordered_map<std::string, int> data_type_map_{ 524 {"FLOAT", kNumberTypeFloat}, {"INT8", kNumberTypeInt8}, {"INT32", kNumberTypeInt32}, {"UINT8", kNumberTypeUInt8}}; 525 int msCalibDataType = kNumberTypeFloat; 526 527 // callback parameters 528 uint64_t op_begin_ = 0; 529 int op_call_times_total_ = 0; 530 float op_cost_total_ = 0.0f; 531 std::map<std::string, std::pair<int, float>> op_times_by_type_; 532 std::map<std::string, std::pair<int, float>> op_times_by_name_; 533 std::map<std::string, float> op_start_times_by_name_; 534 std::mutex op_times_mutex_; 535 #ifndef BENCHMARK_CLIP_JSON 536 // dump data 537 nlohmann::json dump_cfg_json_; 538 #endif 539 std::string dump_file_output_dir_; 540 #ifdef ENABLE_ARM64 541 int perf_fd = 0; 542 int perf_fd2 = 0; 543 float op_cost2_total_ = 0.0f; 544 std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_type_; 545 std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_name_; 546 #endif 547 std::mt19937 random_engine_; 548 }; 549 #ifdef SUPPORT_NNIE 550 int SvpSysInit(); 551 int SvpSysExit(); 552 #endif 553 554 } // namespace mindspore::lite 555 #endif // MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_BASE_H_ 556