1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINNIE_BENCHMARK_BENCHMARK_BASE_H_ 18 #define MINNIE_BENCHMARK_BENCHMARK_BASE_H_ 19 20 #include <signal.h> 21 #include <random> 22 #include <unordered_map> 23 #include <fstream> 24 #include <iostream> 25 #include <map> 26 #include <cmath> 27 #include <string> 28 #include <vector> 29 #include <memory> 30 #include <cfloat> 31 #include <utility> 32 #ifndef BENCHMARK_CLIP_JSON 33 #include <nlohmann/json.hpp> 34 #endif 35 #include "include/model.h" 36 #include "include/api/format.h" 37 #include "tools/common/flag_parser.h" 38 #include "src/common/file_utils.h" 39 #include "src/common/utils.h" 40 #include "ir/dtype/type_id.h" 41 #include "schema/model_generated.h" 42 43 namespace mindspore::lite { 44 #define BENCHMARK_LOG_ERROR(str) \ 45 do { \ 46 MS_LOG(ERROR) << str; \ 47 std::cerr << str << std::endl; \ 48 } while (0); 49 50 enum MS_API InDataType { kImage = 0, kBinary = 1 }; 51 52 enum MS_API AiModelDescription_Frequency { 53 AiModelDescription_Frequency_LOW = 1, 54 AiModelDescription_Frequency_MEDIUM = 2, 55 AiModelDescription_Frequency_HIGH = 3, 56 AiModelDescription_Frequency_EXTREME = 4 57 }; 58 59 enum MS_API DumpMode { DUMP_MODE_ALL = 0, DUMP_MODE_INPUT = 1, DUMP_MODE_OUTPUT = 2 }; 60 61 constexpr float relativeTolerance = 1e-5; 62 constexpr float absoluteTolerance = 1e-8; 63 64 constexpr float kFloatMSEC = 1000.0f; 65 66 constexpr int kNumPrintMin = 5; 67 constexpr const char *DELIM_COLON = ":"; 68 constexpr const char *DELIM_COMMA = ","; 69 constexpr const char *DELIM_SLASH = "/"; 70 71 extern const std::unordered_map<int, std::string> kTypeIdMap; 72 extern const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap; 73 74 // 75 namespace dump { 76 constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG"; 77 constexpr auto kSettings = "common_dump_settings"; 78 constexpr auto kMode = "dump_mode"; 79 constexpr auto kPath = "path"; 80 constexpr auto kNetName = "net_name"; 81 constexpr auto kInputOutput = "input_output"; 82 constexpr auto kKernels = "kernels"; 83 } // namespace dump 84 85 #ifdef ENABLE_ARM64 86 struct PerfResult { 87 int64_t nr; 88 struct { 89 int64_t value; 90 int64_t id; 91 } values[2]; 92 }; 93 struct PerfCount { 94 int64_t value[2]; 95 }; 96 #endif 97 98 struct MS_API CheckTensor { 99 CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data, 100 const std::vector<std::string> &strings_data = {""}) { 101 this->shape = shape; 102 this->data = data; 103 this->strings_data = strings_data; 104 } 105 std::vector<size_t> shape; 106 std::vector<float> data; 107 std::vector<std::string> strings_data; 108 }; 109 110 class MS_API BenchmarkFlags : public virtual FlagParser { 111 public: BenchmarkFlags()112 BenchmarkFlags() { 113 // common 114 AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", ""); 115 AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); 116 AddFlag(&BenchmarkFlags::config_file_, "configFile", "Config file", ""); 117 AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend310 | NNRT", "CPU"); 118 AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", 119 "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1); 120 // MarkPerformance 121 AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); 122 AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); 123 AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false); 124 AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false); 125 AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3); 126 AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false); 127 AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling", 128 "Perf event profiling(only instructions statics enabled currently)", false); 129 AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE"); 130 // MarkAccuracy 131 AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", ""); 132 AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType", 133 "Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT"); 134 AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); 135 AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes", 136 "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", ""); 137 } 138 139 ~BenchmarkFlags() override = default; 140 141 void InitInputDataList(); 142 143 void InitResizeDimsList(); 144 145 public: 146 // common 147 std::string model_file_; 148 std::string in_data_file_; 149 std::string config_file_; 150 std::vector<std::string> input_data_list_; 151 InDataType in_data_type_ = kBinary; 152 std::string in_data_type_in_ = "bin"; 153 int cpu_bind_mode_ = 1; 154 // MarkPerformance 155 int loop_count_ = 10; 156 int num_threads_ = 2; 157 bool enable_fp16_ = false; 158 bool enable_parallel_ = false; 159 int warm_up_loop_count_ = 3; 160 // MarkAccuracy 161 std::string benchmark_data_file_; 162 std::string benchmark_data_type_ = "FLOAT"; 163 float accuracy_threshold_ = 0.5; 164 // Resize 165 std::string resize_dims_in_; 166 std::vector<std::vector<int>> resize_dims_; 167 168 std::string device_ = "CPU"; 169 bool time_profiling_ = false; 170 bool perf_profiling_ = false; 171 std::string perf_event_ = "CYCLE"; 172 bool dump_tensor_data_ = false; 173 bool print_tensor_data_ = false; 174 }; 175 176 class MS_API BenchmarkBase { 177 public: BenchmarkBase(BenchmarkFlags * flags)178 explicit BenchmarkBase(BenchmarkFlags *flags) : flags_(flags) {} 179 180 virtual ~BenchmarkBase(); 181 182 int Init(); 183 virtual int RunBenchmark() = 0; 184 185 protected: 186 int LoadInput(); 187 188 virtual int GenerateInputData() = 0; 189 190 int GenerateRandomData(size_t size, void *data, int data_type); 191 192 virtual int ReadInputFile() = 0; 193 194 int ReadCalibData(); 195 196 int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector<size_t> &dims); 197 198 virtual int GetDataTypeByTensorName(const std::string &tensor_name) = 0; 199 200 virtual int CompareOutput() = 0; 201 202 int CompareStringData(const std::string &name, const std::vector<std::string> &calib_strings, 203 const std::vector<std::string> &output_strings); 204 205 int InitDumpConfigFromJson(char *path); 206 207 int InitCallbackParameter(); 208 209 virtual int InitTimeProfilingCallbackParameter() = 0; 210 211 virtual int InitPerfProfilingCallbackParameter() = 0; 212 213 virtual int InitDumpTensorDataCallbackParameter() = 0; 214 215 virtual int InitPrintTensorDataCallbackParameter() = 0; 216 217 int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result); 218 219 #ifdef ENABLE_ARM64 220 int PrintPerfResult(const std::vector<std::string> &title, 221 const std::map<std::string, std::pair<int, struct PerfCount>> &result); 222 #endif 223 224 // tensorData need to be converter first 225 template <typename T, typename ST> CompareData(const std::string & nodeName,const std::vector<ST> & msShape,const void * tensor_data)226 float CompareData(const std::string &nodeName, const std::vector<ST> &msShape, const void *tensor_data) { 227 const T *msTensorData = static_cast<const T *>(tensor_data); 228 auto iter = this->benchmark_data_.find(nodeName); 229 if (iter != this->benchmark_data_.end()) { 230 std::vector<size_t> castedMSShape; 231 size_t shapeSize = 1; 232 for (ST dim : msShape) { 233 castedMSShape.push_back(dim); 234 shapeSize *= dim; 235 } 236 237 CheckTensor *calibTensor = iter->second; 238 if (calibTensor->shape != castedMSShape) { 239 std::ostringstream oss; 240 oss << "Shape of mslite output("; 241 for (auto dim : castedMSShape) { 242 oss << dim << ","; 243 } 244 oss << ") and shape source model output("; 245 for (auto dim : calibTensor->shape) { 246 oss << dim << ","; 247 } 248 oss << ") are different"; 249 std::cerr << oss.str() << std::endl; 250 MS_LOG(ERROR) << oss.str().c_str(); 251 return RET_ERROR; 252 } 253 size_t errorCount = 0; 254 float meanError = 0; 255 std::cout << "Data of node " << nodeName << " : "; 256 for (size_t j = 0; j < shapeSize; j++) { 257 if (j < 50) { 258 std::cout << static_cast<float>(msTensorData[j]) << " "; 259 } 260 261 if (std::is_same<T, float>::value && (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j]))) { 262 std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; 263 MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; 264 return RET_ERROR; 265 } 266 267 auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j)); 268 auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j)); 269 if (absoluteError > tolerance) { 270 if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) { 271 if (absoluteError > 1e-5) { 272 meanError += absoluteError; 273 errorCount++; 274 } else { 275 continue; 276 } 277 } else { 278 // just assume that atol = rtol 279 meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN); 280 errorCount++; 281 } 282 } 283 } 284 std::cout << std::endl; 285 if (meanError > 0.0f) { 286 meanError /= errorCount; 287 } 288 289 if (meanError <= 0.0000001) { 290 std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl; 291 } else { 292 std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl; 293 } 294 return meanError; 295 } else { 296 MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str(); 297 return RET_ERROR; 298 } 299 } 300 301 template <typename T, typename Distribution> FillInputData(size_t size,void * data,Distribution distribution)302 void FillInputData(size_t size, void *data, Distribution distribution) { 303 MS_ASSERT(data != nullptr); 304 size_t elements_num = size / sizeof(T); 305 (void)std::generate_n(static_cast<T *>(data), elements_num, 306 [&]() { return static_cast<T>(distribution(random_engine_)); }); 307 } 308 309 int CheckThreadNumValid(); 310 311 int CheckDeviceTypeValid(); 312 313 protected: 314 BenchmarkFlags *flags_; 315 std::vector<std::string> benchmark_tensor_names_; 316 std::unordered_map<std::string, CheckTensor *> benchmark_data_; 317 std::unordered_map<std::string, int> data_type_map_{ 318 {"FLOAT", kNumberTypeFloat}, {"INT8", kNumberTypeInt8}, {"INT32", kNumberTypeInt32}, {"UINT8", kNumberTypeUInt8}}; 319 int msCalibDataType = kNumberTypeFloat; 320 321 // callback parameters 322 uint64_t op_begin_ = 0; 323 int op_call_times_total_ = 0; 324 float op_cost_total_ = 0.0f; 325 std::map<std::string, std::pair<int, float>> op_times_by_type_; 326 std::map<std::string, std::pair<int, float>> op_times_by_name_; 327 #ifndef BENCHMARK_CLIP_JSON 328 // dump data 329 nlohmann::json dump_cfg_json_; 330 #endif 331 std::string dump_file_output_dir_; 332 #ifdef ENABLE_ARM64 333 int perf_fd = 0; 334 int perf_fd2 = 0; 335 float op_cost2_total_ = 0.0f; 336 std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_type_; 337 std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_name_; 338 #endif 339 std::mt19937 random_engine_; 340 }; 341 342 } // namespace mindspore::lite 343 #endif // MINNIE_BENCHMARK_BENCHMARK_BASE_H_ 344