• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINNIE_BENCHMARK_BENCHMARK_BASE_H_
18 #define MINNIE_BENCHMARK_BENCHMARK_BASE_H_
19 
20 #include <signal.h>
21 #include <random>
22 #include <unordered_map>
23 #include <fstream>
24 #include <iostream>
25 #include <map>
26 #include <cmath>
27 #include <string>
28 #include <vector>
29 #include <memory>
30 #include <cfloat>
31 #include <utility>
32 #ifndef BENCHMARK_CLIP_JSON
33 #include <nlohmann/json.hpp>
34 #endif
35 #include "include/model.h"
36 #include "include/api/format.h"
37 #include "tools/common/flag_parser.h"
38 #include "src/common/file_utils.h"
39 #include "src/common/utils.h"
40 #include "ir/dtype/type_id.h"
41 #include "schema/model_generated.h"
42 
43 namespace mindspore::lite {
44 #define BENCHMARK_LOG_ERROR(str)   \
45   do {                             \
46     MS_LOG(ERROR) << str;          \
47     std::cerr << str << std::endl; \
48   } while (0);
49 
50 enum MS_API InDataType { kImage = 0, kBinary = 1 };
51 
52 enum MS_API AiModelDescription_Frequency {
53   AiModelDescription_Frequency_LOW = 1,
54   AiModelDescription_Frequency_MEDIUM = 2,
55   AiModelDescription_Frequency_HIGH = 3,
56   AiModelDescription_Frequency_EXTREME = 4
57 };
58 
59 enum MS_API DumpMode { DUMP_MODE_ALL = 0, DUMP_MODE_INPUT = 1, DUMP_MODE_OUTPUT = 2 };
60 
61 constexpr float relativeTolerance = 1e-5;
62 constexpr float absoluteTolerance = 1e-8;
63 
64 constexpr float kFloatMSEC = 1000.0f;
65 
66 constexpr int kNumPrintMin = 5;
67 constexpr const char *DELIM_COLON = ":";
68 constexpr const char *DELIM_COMMA = ",";
69 constexpr const char *DELIM_SLASH = "/";
70 
71 extern const std::unordered_map<int, std::string> kTypeIdMap;
72 extern const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap;
73 
74 //
75 namespace dump {
76 constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG";
77 constexpr auto kSettings = "common_dump_settings";
78 constexpr auto kMode = "dump_mode";
79 constexpr auto kPath = "path";
80 constexpr auto kNetName = "net_name";
81 constexpr auto kInputOutput = "input_output";
82 constexpr auto kKernels = "kernels";
83 }  // namespace dump
84 
85 #ifdef ENABLE_ARM64
86 struct PerfResult {
87   int64_t nr;
88   struct {
89     int64_t value;
90     int64_t id;
91   } values[2];
92 };
93 struct PerfCount {
94   int64_t value[2];
95 };
96 #endif
97 
98 struct MS_API CheckTensor {
99   CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data,
100               const std::vector<std::string> &strings_data = {""}) {
101     this->shape = shape;
102     this->data = data;
103     this->strings_data = strings_data;
104   }
105   std::vector<size_t> shape;
106   std::vector<float> data;
107   std::vector<std::string> strings_data;
108 };
109 
110 class MS_API BenchmarkFlags : public virtual FlagParser {
111  public:
BenchmarkFlags()112   BenchmarkFlags() {
113     // common
114     AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", "");
115     AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
116     AddFlag(&BenchmarkFlags::config_file_, "configFile", "Config file", "");
117     AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend310 | NNRT", "CPU");
118     AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
119             "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1);
120     // MarkPerformance
121     AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10);
122     AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2);
123     AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false);
124     AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false);
125     AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3);
126     AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false);
127     AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling",
128             "Perf event profiling(only instructions statics enabled currently)", false);
129     AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE");
130     // MarkAccuracy
131     AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", "");
132     AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType",
133             "Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT");
134     AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5);
135     AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes",
136             "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", "");
137   }
138 
139   ~BenchmarkFlags() override = default;
140 
141   void InitInputDataList();
142 
143   void InitResizeDimsList();
144 
145  public:
146   // common
147   std::string model_file_;
148   std::string in_data_file_;
149   std::string config_file_;
150   std::vector<std::string> input_data_list_;
151   InDataType in_data_type_ = kBinary;
152   std::string in_data_type_in_ = "bin";
153   int cpu_bind_mode_ = 1;
154   // MarkPerformance
155   int loop_count_ = 10;
156   int num_threads_ = 2;
157   bool enable_fp16_ = false;
158   bool enable_parallel_ = false;
159   int warm_up_loop_count_ = 3;
160   // MarkAccuracy
161   std::string benchmark_data_file_;
162   std::string benchmark_data_type_ = "FLOAT";
163   float accuracy_threshold_ = 0.5;
164   // Resize
165   std::string resize_dims_in_;
166   std::vector<std::vector<int>> resize_dims_;
167 
168   std::string device_ = "CPU";
169   bool time_profiling_ = false;
170   bool perf_profiling_ = false;
171   std::string perf_event_ = "CYCLE";
172   bool dump_tensor_data_ = false;
173   bool print_tensor_data_ = false;
174 };
175 
176 class MS_API BenchmarkBase {
177  public:
BenchmarkBase(BenchmarkFlags * flags)178   explicit BenchmarkBase(BenchmarkFlags *flags) : flags_(flags) {}
179 
180   virtual ~BenchmarkBase();
181 
182   int Init();
183   virtual int RunBenchmark() = 0;
184 
185  protected:
186   int LoadInput();
187 
188   virtual int GenerateInputData() = 0;
189 
190   int GenerateRandomData(size_t size, void *data, int data_type);
191 
192   virtual int ReadInputFile() = 0;
193 
194   int ReadCalibData();
195 
196   int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector<size_t> &dims);
197 
198   virtual int GetDataTypeByTensorName(const std::string &tensor_name) = 0;
199 
200   virtual int CompareOutput() = 0;
201 
202   int CompareStringData(const std::string &name, const std::vector<std::string> &calib_strings,
203                         const std::vector<std::string> &output_strings);
204 
205   int InitDumpConfigFromJson(char *path);
206 
207   int InitCallbackParameter();
208 
209   virtual int InitTimeProfilingCallbackParameter() = 0;
210 
211   virtual int InitPerfProfilingCallbackParameter() = 0;
212 
213   virtual int InitDumpTensorDataCallbackParameter() = 0;
214 
215   virtual int InitPrintTensorDataCallbackParameter() = 0;
216 
217   int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result);
218 
219 #ifdef ENABLE_ARM64
220   int PrintPerfResult(const std::vector<std::string> &title,
221                       const std::map<std::string, std::pair<int, struct PerfCount>> &result);
222 #endif
223 
224   // tensorData need to be converter first
225   template <typename T, typename ST>
CompareData(const std::string & nodeName,const std::vector<ST> & msShape,const void * tensor_data)226   float CompareData(const std::string &nodeName, const std::vector<ST> &msShape, const void *tensor_data) {
227     const T *msTensorData = static_cast<const T *>(tensor_data);
228     auto iter = this->benchmark_data_.find(nodeName);
229     if (iter != this->benchmark_data_.end()) {
230       std::vector<size_t> castedMSShape;
231       size_t shapeSize = 1;
232       for (ST dim : msShape) {
233         castedMSShape.push_back(dim);
234         shapeSize *= dim;
235       }
236 
237       CheckTensor *calibTensor = iter->second;
238       if (calibTensor->shape != castedMSShape) {
239         std::ostringstream oss;
240         oss << "Shape of mslite output(";
241         for (auto dim : castedMSShape) {
242           oss << dim << ",";
243         }
244         oss << ") and shape source model output(";
245         for (auto dim : calibTensor->shape) {
246           oss << dim << ",";
247         }
248         oss << ") are different";
249         std::cerr << oss.str() << std::endl;
250         MS_LOG(ERROR) << oss.str().c_str();
251         return RET_ERROR;
252       }
253       size_t errorCount = 0;
254       float meanError = 0;
255       std::cout << "Data of node " << nodeName << " : ";
256       for (size_t j = 0; j < shapeSize; j++) {
257         if (j < 50) {
258           std::cout << static_cast<float>(msTensorData[j]) << " ";
259         }
260 
261         if (std::is_same<T, float>::value && (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j]))) {
262           std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl;
263           MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail";
264           return RET_ERROR;
265         }
266 
267         auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j));
268         auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j));
269         if (absoluteError > tolerance) {
270           if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) {
271             if (absoluteError > 1e-5) {
272               meanError += absoluteError;
273               errorCount++;
274             } else {
275               continue;
276             }
277           } else {
278             // just assume that atol = rtol
279             meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN);
280             errorCount++;
281           }
282         }
283       }
284       std::cout << std::endl;
285       if (meanError > 0.0f) {
286         meanError /= errorCount;
287       }
288 
289       if (meanError <= 0.0000001) {
290         std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl;
291       } else {
292         std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl;
293       }
294       return meanError;
295     } else {
296       MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str();
297       return RET_ERROR;
298     }
299   }
300 
301   template <typename T, typename Distribution>
FillInputData(size_t size,void * data,Distribution distribution)302   void FillInputData(size_t size, void *data, Distribution distribution) {
303     MS_ASSERT(data != nullptr);
304     size_t elements_num = size / sizeof(T);
305     (void)std::generate_n(static_cast<T *>(data), elements_num,
306                           [&]() { return static_cast<T>(distribution(random_engine_)); });
307   }
308 
309   int CheckThreadNumValid();
310 
311   int CheckDeviceTypeValid();
312 
313  protected:
314   BenchmarkFlags *flags_;
315   std::vector<std::string> benchmark_tensor_names_;
316   std::unordered_map<std::string, CheckTensor *> benchmark_data_;
317   std::unordered_map<std::string, int> data_type_map_{
318     {"FLOAT", kNumberTypeFloat}, {"INT8", kNumberTypeInt8}, {"INT32", kNumberTypeInt32}, {"UINT8", kNumberTypeUInt8}};
319   int msCalibDataType = kNumberTypeFloat;
320 
321   // callback parameters
322   uint64_t op_begin_ = 0;
323   int op_call_times_total_ = 0;
324   float op_cost_total_ = 0.0f;
325   std::map<std::string, std::pair<int, float>> op_times_by_type_;
326   std::map<std::string, std::pair<int, float>> op_times_by_name_;
327 #ifndef BENCHMARK_CLIP_JSON
328   // dump data
329   nlohmann::json dump_cfg_json_;
330 #endif
331   std::string dump_file_output_dir_;
332 #ifdef ENABLE_ARM64
333   int perf_fd = 0;
334   int perf_fd2 = 0;
335   float op_cost2_total_ = 0.0f;
336   std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_type_;
337   std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_name_;
338 #endif
339   std::mt19937 random_engine_;
340 };
341 
342 }  // namespace mindspore::lite
343 #endif  // MINNIE_BENCHMARK_BENCHMARK_BASE_H_
344