1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_BASE_H_ 18 #define MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_BASE_H_ 19 20 #include <signal.h> 21 #include <random> 22 #include <unordered_map> 23 #include <fstream> 24 #include <iostream> 25 #include <map> 26 #include <cmath> 27 #include <string> 28 #include <vector> 29 #include <memory> 30 #include <cfloat> 31 #include <utility> 32 #include <mutex> 33 #ifndef BENCHMARK_CLIP_JSON 34 #include <nlohmann/json.hpp> 35 #endif 36 #include "include/model.h" 37 #include "include/api/types.h" 38 #include "include/api/format.h" 39 #include "tools/common/flag_parser.h" 40 #include "src/common/file_utils.h" 41 #include "src/common/utils.h" 42 #include "ir/dtype/type_id.h" 43 #include "schema/model_generated.h" 44 #include "nnacl/op_base.h" 45 46 namespace mindspore::lite { 47 #define BENCHMARK_LOG_ERROR(str) \ 48 do { \ 49 MS_LOG(ERROR) << str; \ 50 std::cerr << str << std::endl; \ 51 } while (0); 52 53 enum MS_API InDataType { kImage = 0, kBinary = 1 }; 54 55 enum MS_API AiModelDescription_Frequency { 56 AiModelDescription_Frequency_LOW = 1, 57 AiModelDescription_Frequency_MEDIUM = 2, 58 AiModelDescription_Frequency_HIGH = 3, 59 AiModelDescription_Frequency_EXTREME = 4 60 }; 61 62 enum MS_API DumpMode { DUMP_MODE_ALL = 0, DUMP_MODE_INPUT = 1, DUMP_MODE_OUTPUT = 2 }; 63 64 constexpr float kRelativeTolerance = 1e-5; 65 constexpr float kAbsoluteTolerance = 1e-8; 66 constexpr int CosineErrMaxVal = 2; 67 constexpr float kFloatMSEC = 1000.0f; 68 69 constexpr int kNumPrintMin = 5; 70 constexpr const char *DELIM_COLON = ":"; 71 constexpr const char *DELIM_COMMA = ","; 72 constexpr const char *DELIM_SLASH = "/"; 73 constexpr size_t kEncMaxLen = 16; 74 75 extern const std::unordered_map<int, std::string> kTypeIdMap; 76 extern const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap; 77 78 const std::unordered_map<std::string, mindspore::ModelType> ModelTypeMap{ 79 {"MindIR_Lite", mindspore::ModelType::kMindIR_Lite}, {"MindIR", mindspore::ModelType::kMindIR}}; 80 81 namespace dump { 82 constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG"; 83 constexpr auto kSettings = "common_dump_settings"; 84 constexpr auto kMode = "dump_mode"; 85 constexpr auto kPath = "path"; 86 constexpr auto kNetName = "net_name"; 87 constexpr auto kInputOutput = "input_output"; 88 constexpr auto kKernels = "kernels"; 89 } // namespace dump 90 91 #ifdef ENABLE_ARM64 92 struct PerfResult { 93 int64_t nr; 94 struct { 95 int64_t value; 96 int64_t id; 97 } values[2]; 98 }; 99 struct PerfCount { 100 int64_t value[2]; 101 }; 102 #endif 103 104 struct MS_API CheckTensor { 105 CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data, 106 const std::vector<std::string> &strings_data = {""}) { 107 this->shape = shape; 108 this->data = data; 109 this->strings_data = strings_data; 110 } 111 std::vector<size_t> shape; 112 std::vector<float> data; 113 std::vector<std::string> strings_data; 114 }; 115 116 class MS_API BenchmarkFlags : public virtual FlagParser { 117 public: BenchmarkFlags()118 BenchmarkFlags() { 119 // common 120 AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", ""); 121 AddFlag(&BenchmarkFlags::model_type_, "modelType", "Input model type. MindIR | MindIR_Lite", "MindIR"); 122 AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); 123 AddFlag(&BenchmarkFlags::group_info_file_, "GroupInfoFile", "Communication group info file", ""); 124 AddFlag(&BenchmarkFlags::config_file_, "configFile", "Config file", ""); 125 AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend | NNRT | Auto", "CPU"); 126 AddFlag(&BenchmarkFlags::provider_, "provider", "device provider litert | tensorrt | mindrt", "litert"); 127 AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU.", 1); 128 // MarkPerformance 129 AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); 130 AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); 131 AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false); 132 AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false); 133 AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3); 134 AddFlag(&BenchmarkFlags::nnrt_performance_mode_, "nnrtPerformanceMode", "NNRT Performance Mode: 0|1|2|3|4, " 135 "0 for NONE, 1 for LOW, 2 for MEDIUM, 3 for HIGH, 4 for EXTREME.", 3); 136 AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false); 137 AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling", 138 "Perf event profiling(only instructions statics enabled currently)", false); 139 AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE"); 140 // MarkAccuracy 141 AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", ""); 142 AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType", 143 "Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT"); 144 AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); 145 AddFlag(&BenchmarkFlags::cosine_distance_threshold_, "cosineDistanceThreshold", "cosine distance threshold", -1.1); 146 AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes", 147 "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", ""); 148 AddFlag(&BenchmarkFlags::resize_dims_in_v2_, "inputShape", 149 "Shape of input data. Specify input names followed by their shapes. Wrap the whole string in " 150 "double-quotes(\"\"). e.g. " 151 "\"inTensor1:1,32,32,32;inTensor2:1,1,32,32,4\"", 152 ""); 153 #ifdef ENABLE_CLOUD_FUSION_INFERENCE 154 // Distributed Infer 155 AddFlag(&BenchmarkFlags::device_id_, "deviceId", "Set device id for distributed inference", -1); 156 AddFlag(&BenchmarkFlags::rank_id_, "rankId", "Set rank id for distributed inference", -1); 157 #endif 158 // Decrypt and Crypte 159 AddFlag(&BenchmarkFlags::decrypt_key_str_, "decryptKey", 160 "The key used to decrypt the file, expressed in hexadecimal characters. Only support AES-GCM and the key " 161 "length is 16.", 162 ""); 163 AddFlag(&BenchmarkFlags::crypto_lib_path_, "cryptoLibPath", "Pass the crypto library path.", ""); 164 AddFlag(&BenchmarkFlags::enable_parallel_predict_, "enableParallelPredict", "Enable model parallel : true | false", 165 false); 166 AddFlag(&BenchmarkFlags::parallel_num_, "parallelNum", "parallel num of parallel predict", 2); 167 AddFlag(&BenchmarkFlags::parallel_task_num_, "parallelTaskNum", 168 "parallel task num of parallel predict, unlimited number of tasks when the value is -1", 2); 169 AddFlag(&BenchmarkFlags::workers_num_, "workersNum", "works num of parallel predict", 2); 170 AddFlag(&BenchmarkFlags::core_list_str_, "cpuCoreList", "The core id of the bundled core, e.g. 0,1,2,3", ""); 171 AddFlag(&BenchmarkFlags::inter_op_parallel_num_, "interOpParallelNum", "parallel number of operators in predict", 172 1); 173 AddFlag(&BenchmarkFlags::enable_gl_texture_, "enableGLTexture", "Enable GlTexture2D", false); 174 AddFlag(&BenchmarkFlags::delegate_mode_, "delegateMode", "set the delegate mode: CoreML | NNAPI", ""); 175 AddFlag(&BenchmarkFlags::enable_shared_thread_pool_, "enableSharedThreadPool", "Enable shared thread pool", false); 176 AddFlag(&BenchmarkFlags::thread_num_limit_per_worker_, "threadNumLimitPerWorker", "thread num limit per worker ", 177 ""); 178 AddFlag(&BenchmarkFlags::thread_num_remaining_per_worker_, "threadNumRemainingPerWorker", 179 "thread num limit per worker ", ""); 180 } 181 182 ~BenchmarkFlags() override = default; 183 184 void InitInputDataList(); 185 186 int InitResizeDimsList(); 187 188 void InitCoreList(); 189 190 public: 191 // common 192 bool enable_parallel_predict_ = false; 193 int parallel_num_ = 2; 194 int parallel_task_num_ = 2; 195 int inter_op_parallel_num_ = 1; 196 int workers_num_ = 2; 197 std::string model_file_; 198 std::string in_data_file_; 199 std::string group_info_file_; 200 std::string config_file_; 201 std::string model_type_; 202 std::vector<std::string> input_data_list_; 203 InDataType in_data_type_ = kBinary; 204 std::string in_data_type_in_ = "bin"; 205 int cpu_bind_mode_ = 1; 206 std::string core_list_str_; 207 std::vector<int> core_list_; 208 // MarkPerformance 209 int loop_count_ = 10; 210 int num_threads_ = 2; 211 bool enable_fp16_ = false; 212 bool enable_gl_texture_ = false; 213 bool enable_parallel_ = false; 214 int warm_up_loop_count_ = 3; 215 int nnrt_performance_mode_ = 3; 216 // MarkAccuracy 217 std::string benchmark_data_file_; 218 std::string benchmark_data_type_ = "FLOAT"; 219 float accuracy_threshold_ = 0.5; 220 float cosine_distance_threshold_ = -1.1; 221 // Resize 222 std::string resize_dims_in_; 223 std::string resize_dims_in_v2_; 224 std::map<std::string, std::vector<int64_t>> graph_input_shape_map_; 225 std::vector<std::vector<int>> resize_dims_; 226 // Distributed Infer 227 int device_id_; 228 int rank_id_; 229 230 std::string device_ = "CPU"; 231 std::string provider_ = "litert"; 232 bool time_profiling_ = false; 233 bool perf_profiling_ = false; 234 std::string perf_event_ = "CYCLE"; 235 bool dump_tensor_data_ = false; 236 bool print_tensor_data_ = false; 237 std::string decrypt_key_str_; 238 std::string dec_mode_ = "AES-GCM"; 239 std::string crypto_lib_path_; 240 std::string delegate_mode_; 241 bool enable_shared_thread_pool_ = false; 242 std::string thread_num_limit_per_worker_; 243 std::string thread_num_remaining_per_worker_; 244 }; 245 246 class MS_API BenchmarkBase { 247 public: BenchmarkBase(BenchmarkFlags * flags)248 explicit BenchmarkBase(BenchmarkFlags *flags) : flags_(flags) {} 249 250 virtual ~BenchmarkBase(); 251 252 int Init(); 253 virtual int RunBenchmark() = 0; 254 255 protected: 256 virtual int LoadInput() = 0; 257 258 virtual int GenerateInputData() = 0; 259 260 int GenerateRandomData(size_t size, void *data, int data_type); 261 262 virtual int ReadInputFile() = 0; 263 264 int ReadCalibData(); 265 266 int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector<size_t> &dims); 267 268 virtual int GetDataTypeByTensorName(const std::string &tensor_name) = 0; 269 270 virtual int CompareOutput() = 0; 271 272 int CompareStringData(const std::string &name, const std::vector<std::string> &calib_strings, 273 const std::vector<std::string> &output_strings); 274 275 int InitDumpConfigFromJson(const char *path); 276 277 int InitCallbackParameter(); 278 279 virtual int InitTimeProfilingCallbackParameter() = 0; 280 281 virtual int InitPerfProfilingCallbackParameter() = 0; 282 283 virtual int InitDumpTensorDataCallbackParameter() = 0; 284 285 virtual int InitPrintTensorDataCallbackParameter() = 0; 286 287 int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result); 288 289 #ifdef ENABLE_ARM64 290 int PrintPerfResult(const std::vector<std::string> &title, 291 const std::map<std::string, std::pair<int, struct PerfCount>> &result); 292 #endif 293 294 // tensorData need to be converter first 295 template <typename T, typename ST> 296 float CompareData(const std::string &nodeName, const std::vector<ST> &msShape, const void *tensor_data, 297 float relativeTolerance = kRelativeTolerance, float absoluteTolerance = kAbsoluteTolerance) { 298 const T *msTensorData = static_cast<const T *>(tensor_data); 299 auto iter = this->benchmark_data_.find(nodeName); 300 if (iter != this->benchmark_data_.end()) { 301 std::vector<size_t> castedMSShape; 302 size_t shapeSize = 1; 303 for (ST dim : msShape) { 304 if (dim <= 0) { 305 MS_LOG(ERROR) << "Invalid shape."; 306 return RET_ERROR; 307 } 308 castedMSShape.push_back(dim); 309 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(shapeSize, static_cast<size_t>(dim)), RET_ERROR, "mul overflow"); 310 shapeSize *= static_cast<size_t>(dim); 311 } 312 313 CheckTensor *calibTensor = iter->second; 314 if (calibTensor == nullptr) { 315 MS_LOG(ERROR) << "calibTensor is nullptr."; 316 return RET_ERROR; 317 } 318 if (!CheckShapeValid(calibTensor->shape, castedMSShape)) { 319 std::ostringstream oss; 320 oss << "Shape of mslite output("; 321 for (auto dim : castedMSShape) { 322 oss << dim << ","; 323 } 324 oss << ") and shape source model output("; 325 for (auto dim : calibTensor->shape) { 326 oss << dim << ","; 327 } 328 oss << ") are different"; 329 std::cerr << oss.str() << std::endl; 330 MS_LOG(ERROR) << oss.str().c_str(); 331 return RET_ERROR; 332 } 333 if (calibTensor->data.size() != shapeSize) { 334 MS_LOG(ERROR) << "calibTensor data size " << calibTensor->data.size() << " is not equal to " << shapeSize; 335 return RET_ERROR; 336 } 337 size_t errorCount = 0; 338 float meanError = 0; 339 std::cout << "Data of node " << nodeName << " : "; 340 for (size_t j = 0; j < shapeSize; j++) { 341 if (j < 50) { 342 std::cout << static_cast<float>(msTensorData[j]) << " "; 343 } 344 345 if (std::is_same<T, float>::value && (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j]))) { 346 std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; 347 MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; 348 return RET_ERROR; 349 } 350 351 auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j)); 352 auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j)); 353 if (absoluteError > tolerance) { 354 if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) { 355 if (absoluteError > 1e-5) { 356 meanError += absoluteError; 357 errorCount++; 358 } else { 359 continue; 360 } 361 } else { 362 // just assume that atol = rtol 363 meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN); 364 errorCount++; 365 } 366 } 367 } 368 std::cout << std::endl; 369 if (meanError > 0.0f) { 370 meanError /= errorCount; 371 } 372 373 if (meanError <= 0.0000001) { 374 std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl; 375 } else { 376 std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl; 377 } 378 return meanError; 379 } else { 380 MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str(); 381 return RET_ERROR; 382 } 383 } 384 GetMeanError(double sum_a,double sum_b,double dot_sum,float * mean_error)385 void GetMeanError(double sum_a, double sum_b, double dot_sum, float *mean_error) { 386 if (fabs(sum_a) < DBL_EPSILON && fabs(sum_b) < FLT_EPSILON) { 387 *mean_error = 1; 388 } else if (fabs(sum_a * sum_b) < DBL_EPSILON) { 389 if (fabs(sum_a) < FLT_EPSILON || fabs(sum_b) < FLT_EPSILON) { 390 *mean_error = 1; 391 } else { 392 *mean_error = 0; 393 } 394 } else { 395 *mean_error = dot_sum / (sqrt(sum_a) * sqrt(sum_b)); 396 } 397 } 398 399 // tensorData need to be converter first 400 template <typename T, typename ST> CompareDatabyCosineDistance(const std::string & nodeName,const std::vector<ST> & msShape,const void * tensor_data,float * mean_error)401 int CompareDatabyCosineDistance(const std::string &nodeName, const std::vector<ST> &msShape, const void *tensor_data, 402 float *mean_error) { 403 if (mean_error == nullptr) { 404 MS_LOG(ERROR) << "mean_error is nullptr"; 405 return RET_ERROR; 406 } 407 if (tensor_data == nullptr) { 408 MS_LOG(ERROR) << "tensor_data is nullptr"; 409 return RET_ERROR; 410 } 411 const T *msTensorData = static_cast<const T *>(tensor_data); 412 auto iter = this->benchmark_data_.find(nodeName); 413 if (iter != this->benchmark_data_.end()) { 414 std::vector<size_t> castedMSShape; 415 size_t shapeSize = 1; 416 for (int64_t dim : msShape) { 417 if (dim <= 0) { 418 MS_LOG(ERROR) << "Invalid shape."; 419 return RET_ERROR; 420 } 421 castedMSShape.push_back(size_t(dim)); 422 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(shapeSize, static_cast<size_t>(dim)), RET_ERROR, "mul overflow"); 423 shapeSize *= static_cast<size_t>(dim); 424 } 425 426 CheckTensor *calibTensor = iter->second; 427 if (calibTensor == nullptr) { 428 MS_LOG(ERROR) << "calibTensor is nullptr."; 429 return RET_ERROR; 430 } 431 if (!CheckShapeValid(calibTensor->shape, castedMSShape)) { 432 std::ostringstream oss; 433 oss << "Shape of mslite output("; 434 for (auto dim : castedMSShape) { 435 oss << dim << ","; 436 } 437 oss << ") and shape source model output("; 438 for (auto dim : calibTensor->shape) { 439 oss << dim << ","; 440 } 441 oss << ") are different"; 442 std::cerr << oss.str() << std::endl; 443 MS_LOG(ERROR) << oss.str().c_str(); 444 return RET_ERROR; 445 } 446 if (calibTensor->data.size() != shapeSize) { 447 MS_LOG(ERROR) << "calibTensor data size " << calibTensor->data.size() << " is not equal to " << shapeSize; 448 return RET_ERROR; 449 } 450 451 double dot_sum = 0; 452 double sum_a = 0; 453 double sum_b = 0; 454 std::cout << "Data of node " << nodeName << " : "; 455 for (size_t j = 0; j < shapeSize; j++) { 456 if (j < 50) { 457 std::cout << static_cast<float>(msTensorData[j]) << " "; 458 } 459 460 if (std::is_same<T, float>::value && (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j]))) { 461 std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; 462 MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; 463 return RET_ERROR; 464 } 465 dot_sum += static_cast<double>(msTensorData[j]) * calibTensor->data.at(j); 466 sum_a += static_cast<double>(msTensorData[j]) * msTensorData[j]; 467 sum_b += static_cast<double>(calibTensor->data.at(j)) * calibTensor->data.at(j); 468 } 469 GetMeanError(sum_a, sum_b, dot_sum, mean_error); 470 std::cout << std::endl; 471 std::cout << "Mean cosine distance of node/tensor " << nodeName << " : " << (*mean_error) * 100 << "%" 472 << std::endl; 473 return RET_OK; 474 } else { 475 MS_LOG(ERROR) << "%s is not in Source Model output", nodeName.c_str(); 476 return RET_ERROR; 477 } 478 } 479 480 template <typename T, typename Distribution> FillInputData(size_t size,void * data,Distribution distribution)481 void FillInputData(size_t size, void *data, Distribution distribution) { 482 MS_ASSERT(data != nullptr); 483 size_t elements_num = size / sizeof(T); 484 (void)std::generate_n(static_cast<T *>(data), elements_num, 485 [&, this]() { return static_cast<T>(distribution(random_engine_)); }); 486 } 487 CheckShapeValid(const std::vector<size_t> & calib_output_shape,const std::vector<size_t> & real_output_shape)488 bool CheckShapeValid(const std::vector<size_t> &calib_output_shape, const std::vector<size_t> &real_output_shape) { 489 if (calib_output_shape == real_output_shape) { 490 return true; 491 } 492 // (1, 225) compare with (1, 225, 1, 1) return true 493 size_t min_size = 494 calib_output_shape.size() > real_output_shape.size() ? real_output_shape.size() : calib_output_shape.size(); 495 size_t i = 0; 496 for (i = 0; i < min_size; ++i) { 497 if (calib_output_shape[i] != real_output_shape[i]) { 498 return false; 499 } 500 } 501 while (i < calib_output_shape.size()) { 502 if (calib_output_shape[i] != 1) { 503 return false; 504 } 505 i++; 506 } 507 while (i < real_output_shape.size()) { 508 if (real_output_shape[i] != 1) { 509 return false; 510 } 511 i++; 512 } 513 return true; 514 } 515 516 int CheckThreadNumValid(); 517 518 int CheckModelValid(); 519 520 int CheckDeviceTypeValid(); 521 522 protected: 523 BenchmarkFlags *flags_; 524 std::vector<std::string> benchmark_tensor_names_; 525 std::unordered_map<std::string, CheckTensor *> benchmark_data_; 526 std::unordered_map<std::string, int> data_type_map_{ 527 {"FLOAT", kNumberTypeFloat}, {"INT8", kNumberTypeInt8}, {"INT32", kNumberTypeInt32}, {"UINT8", kNumberTypeUInt8}}; 528 int msCalibDataType = kNumberTypeFloat; 529 530 // callback parameters 531 uint64_t op_begin_ = 0; 532 int op_call_times_total_ = 0; 533 float op_cost_total_ = 0.0f; 534 std::map<std::string, std::pair<int, float>> op_times_by_type_; 535 std::map<std::string, std::pair<int, float>> op_times_by_name_; 536 std::map<std::string, float> op_start_times_by_name_; 537 std::mutex op_times_mutex_; 538 #ifndef BENCHMARK_CLIP_JSON 539 // dump data 540 nlohmann::json dump_cfg_json_; 541 #endif 542 std::string dump_file_output_dir_; 543 #ifdef ENABLE_ARM64 544 int perf_fd = 0; 545 int perf_fd2 = 0; 546 float op_cost2_total_ = 0.0f; 547 std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_type_; 548 std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_name_; 549 #endif 550 std::mt19937 random_engine_; 551 }; 552 #ifdef SUPPORT_NNIE 553 int SvpSysInit(); 554 int SvpSysExit(); 555 #endif 556 557 } // namespace mindspore::lite 558 #endif // MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_BASE_H_ 559