• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "tools/benchmark/benchmark_unified_api.h"
18 #include <algorithm>
19 #include <utility>
20 #include <functional>
21 #include "include/context.h"
22 #include "include/ms_tensor.h"
23 #include "include/version.h"
24 #include "schema/model_generated.h"
25 #include "src/common/common.h"
26 #include "src/tensor.h"
27 #ifdef ENABLE_ARM64
28 #include <linux/perf_event.h>
29 #include <sys/ioctl.h>
30 #include <asm/unistd.h>
31 #include <unistd.h>
32 #endif
33 
34 namespace mindspore {
35 constexpr size_t kDataToStringMaxNum = 40;
36 constexpr int kPrintDataNum = 20;
37 constexpr int kFrequencyDefault = 3;
38 constexpr int kPercentageDivisor = 100;
39 #ifndef BENCHMARK_CLIP_JSON
40 constexpr int kDumpInputsAndOutputs = 0;
41 constexpr int kDumpOutputs = 2;
42 #endif
43 
44 namespace lite {
GenerateInputData()45 int BenchmarkUnifiedApi::GenerateInputData() {
46   for (auto &tensor : ms_inputs_for_api_) {
47     if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
48       MSTensor *input = MSTensor::StringsToTensor(tensor.Name(), {"you're the best."});
49       if (input == nullptr) {
50         std::cerr << "StringsToTensor failed" << std::endl;
51         MS_LOG(ERROR) << "StringsToTensor failed";
52         return RET_ERROR;
53       }
54       tensor = *input;
55     } else {
56       auto input_data = tensor.MutableData();
57       if (input_data == nullptr) {
58         MS_LOG(ERROR) << "MallocData for inTensor failed";
59         return RET_ERROR;
60       }
61       int status = GenerateRandomData(tensor.DataSize(), input_data, static_cast<int>(tensor.DataType()));
62       if (status != RET_OK) {
63         std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl;
64         MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status;
65         return status;
66       }
67     }
68   }
69   return RET_OK;
70 }
71 
ReadInputFile()72 int BenchmarkUnifiedApi::ReadInputFile() {
73   if (ms_inputs_for_api_.empty()) {
74     return RET_OK;
75   }
76 
77   if (this->flags_->in_data_type_ == kImage) {
78     MS_LOG(ERROR) << "Not supported image input";
79     return RET_ERROR;
80   } else {
81     for (size_t i = 0; i < flags_->input_data_list_.size(); i++) {
82       auto &cur_tensor = ms_inputs_for_api_.at(i);
83       MS_ASSERT(cur_tensor != nullptr);
84       size_t size;
85       char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size);
86       if (bin_buf == nullptr) {
87         MS_LOG(ERROR) << "ReadFile return nullptr";
88         return RET_ERROR;
89       }
90       if (static_cast<int>(cur_tensor.DataType()) == kObjectTypeString) {
91         std::string str(bin_buf, size);
92         MSTensor *input = MSTensor::StringsToTensor(cur_tensor.Name(), {str});
93         if (input == nullptr) {
94           std::cerr << "StringsToTensor failed" << std::endl;
95           MS_LOG(ERROR) << "StringsToTensor failed";
96           delete[] bin_buf;
97           return RET_ERROR;
98         }
99         cur_tensor = *input;
100       } else {
101         auto tensor_data_size = cur_tensor.DataSize();
102         if (size != tensor_data_size) {
103           std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size
104                     << std::endl;
105           MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size;
106           delete[] bin_buf;
107           return RET_ERROR;
108         }
109         auto input_data = cur_tensor.MutableData();
110         if (input_data == nullptr) {
111           MS_LOG(ERROR) << "input_data is nullptr.";
112           return RET_ERROR;
113         }
114         memcpy(input_data, bin_buf, tensor_data_size);
115       }
116       delete[] bin_buf;
117     }
118   }
119   return RET_OK;
120 }
121 
GetDataTypeByTensorName(const std::string & tensor_name)122 int BenchmarkUnifiedApi::GetDataTypeByTensorName(const std::string &tensor_name) {
123   return static_cast<int>(ms_model_.GetOutputByTensorName(tensor_name).DataType());
124 }
125 
InitMSContext(const std::shared_ptr<mindspore::Context> & context)126 void BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context> &context) {
127   context->SetThreadNum(flags_->num_threads_);
128   context->SetEnableParallel(flags_->enable_parallel_);
129   context->SetThreadAffinity(flags_->cpu_bind_mode_);
130   auto &device_list = context->MutableDeviceInfo();
131 
132   if (flags_->device_ == "GPU") {
133     std::shared_ptr<GPUDeviceInfo> gpu_device_info = std::make_shared<GPUDeviceInfo>();
134     gpu_device_info->SetEnableFP16(flags_->enable_fp16_);
135     device_list.push_back(gpu_device_info);
136   }
137 
138   if (flags_->device_ == "NPU") {
139     std::shared_ptr<KirinNPUDeviceInfo> npu_device_info = std::make_shared<KirinNPUDeviceInfo>();
140     npu_device_info->SetFrequency(kFrequencyDefault);
141     device_list.push_back(npu_device_info);
142   }
143 
144   if (flags_->device_ == "Ascend310") {
145     std::shared_ptr<Ascend310DeviceInfo> ascend310_device_info = std::make_shared<Ascend310DeviceInfo>();
146     ascend310_device_info->SetDeviceID(0);
147     device_list.push_back(ascend310_device_info);
148   }
149 
150   if (flags_->device_ == "NNRT") {
151     std::shared_ptr<NNRTDeviceInfo> nnrt_device_info = std::make_shared<NNRTDeviceInfo>();
152     device_list.push_back(nnrt_device_info);
153   }
154 
155   // CPU priority is behind GPU and NPU
156   std::shared_ptr<CPUDeviceInfo> device_info = std::make_shared<CPUDeviceInfo>();
157   device_info->SetEnableFP16(flags_->enable_fp16_);
158   device_list.push_back(device_info);
159 }
160 
CompareOutput()161 int BenchmarkUnifiedApi::CompareOutput() {
162   std::cout << "================ Comparing Output data ================" << std::endl;
163   float total_bias = 0;
164   int total_size = 0;
165   // check the output tensor name.
166   if (this->benchmark_tensor_names_ != ms_model_.GetOutputTensorNames()) {
167     MS_LOG(ERROR) << "The output tensor name is wrong.";
168     return RET_ERROR;
169   }
170   for (const auto &calib_tensor : benchmark_data_) {
171     std::string tensor_name = calib_tensor.first;
172     mindspore::MSTensor tensor = ms_model_.GetOutputByTensorName(tensor_name);
173     if (tensor == nullptr) {
174       MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
175       return RET_ERROR;
176     }
177     int ret;
178     if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
179       std::vector<std::string> output_strings = MSTensor::TensorToStrings(tensor);
180       ret = CompareStringData(tensor_name, calib_tensor.second->strings_data, output_strings);
181     } else {
182       ret = CompareDataGetTotalBiasAndSize(tensor_name, &tensor, &total_bias, &total_size);
183     }
184     if (ret != RET_OK) {
185       MS_LOG(ERROR) << "Error in CompareData";
186       std::cerr << "Error in CompareData" << std::endl;
187       std::cout << "=======================================================" << std::endl << std::endl;
188       return ret;
189     }
190   }
191   float mean_bias;
192   if (total_size != 0) {
193     mean_bias = ((total_bias / float_t(total_size)) * kPercentageDivisor);
194   } else {
195     mean_bias = 0;
196   }
197 
198   std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl;
199   std::cout << "=======================================================" << std::endl << std::endl;
200 
201   if (mean_bias > this->flags_->accuracy_threshold_) {
202     MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%";
203     std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl;
204     return RET_ERROR;
205   }
206   return RET_OK;
207 }
208 
CompareDataGetTotalBiasAndSize(const std::string & name,mindspore::MSTensor * tensor,float * total_bias,int * total_size)209 int BenchmarkUnifiedApi::CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor,
210                                                         float *total_bias, int *total_size) {
211   float bias = 0;
212   auto mutableData = tensor->MutableData();
213   if (mutableData == nullptr) {
214     MS_LOG(ERROR) << "mutableData is nullptr.";
215     return RET_ERROR;
216   }
217   switch (static_cast<int>(tensor->DataType())) {
218     case TypeId::kNumberTypeFloat:
219     case TypeId::kNumberTypeFloat32: {
220       bias = CompareData<float, int64_t>(name, tensor->Shape(), mutableData);
221       break;
222     }
223     case TypeId::kNumberTypeInt8: {
224       bias = CompareData<int8_t, int64_t>(name, tensor->Shape(), mutableData);
225       break;
226     }
227     case TypeId::kNumberTypeUInt8: {
228       bias = CompareData<uint8_t, int64_t>(name, tensor->Shape(), mutableData);
229       break;
230     }
231     case TypeId::kNumberTypeInt32: {
232       bias = CompareData<int32_t, int64_t>(name, tensor->Shape(), mutableData);
233       break;
234     }
235     case TypeId::kNumberTypeInt16: {
236       bias = CompareData<int16_t, int64_t>(name, tensor->Shape(), mutableData);
237       break;
238     }
239     case TypeId::kNumberTypeBool: {
240       bias = CompareData<bool, int64_t>(name, tensor->Shape(), mutableData);
241       break;
242     }
243     default:
244       MS_LOG(ERROR) << "Datatype " << static_cast<int>(tensor->DataType()) << " is not supported.";
245       return RET_ERROR;
246   }
247   if (bias < 0) {
248     MS_LOG(ERROR) << "CompareData failed, name: " << name;
249     return RET_ERROR;
250   }
251   *total_bias += bias;
252   *total_size += 1;
253   return RET_OK;
254 }
255 
MarkPerformance()256 int BenchmarkUnifiedApi::MarkPerformance() {
257   MS_LOG(INFO) << "Running warm up loops...";
258   std::cout << "Running warm up loops..." << std::endl;
259   std::vector<MSTensor> outputs;
260 
261   for (int i = 0; i < flags_->warm_up_loop_count_; i++) {
262     auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs);
263     if (status != kSuccess) {
264       MS_LOG(ERROR) << "Inference error ";
265       std::cerr << "Inference error " << std::endl;
266       return RET_ERROR;
267     }
268   }
269 
270   MS_LOG(INFO) << "Running benchmark loops...";
271   std::cout << "Running benchmark loops..." << std::endl;
272   uint64_t time_min = 1000000;
273   uint64_t time_max = 0;
274   uint64_t time_avg = 0;
275 
276   for (int i = 0; i < flags_->loop_count_; i++) {
277     auto inputs = ms_model_.GetInputs();
278     for (auto tensor : inputs) {
279       tensor.MutableData();  // prepare data
280     }
281     auto start = GetTimeUs();
282     auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_);
283     if (status != kSuccess) {
284       MS_LOG(ERROR) << "Inference error ";
285       std::cerr << "Inference error ";
286       return RET_ERROR;
287     }
288 
289     auto end = GetTimeUs();
290     auto time = end - start;
291     time_min = std::min(time_min, time);
292     time_max = std::max(time_max, time);
293     time_avg += time;
294   }
295 
296   if (flags_->time_profiling_) {
297     const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
298     const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
299     PrintResult(per_op_name, op_times_by_name_);
300     PrintResult(per_op_type, op_times_by_type_);
301 #ifdef ENABLE_ARM64
302   } else if (flags_->perf_profiling_) {
303     if (flags_->perf_event_ == "CACHE") {
304       const std::vector<std::string> per_op_name = {"opName", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"};
305       const std::vector<std::string> per_op_type = {"opType", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"};
306       PrintPerfResult(per_op_name, op_perf_by_name_);
307       PrintPerfResult(per_op_type, op_perf_by_type_);
308     } else if (flags_->perf_event_ == "STALL") {
309       const std::vector<std::string> per_op_name = {"opName", "frontend(k)", "frontend(%)", "backendend(k)",
310                                                     "backendend(%)"};
311       const std::vector<std::string> per_op_type = {"opType", "frontend(k)", "frontend(%)", "backendend(k)",
312                                                     "backendend(%)"};
313       PrintPerfResult(per_op_name, op_perf_by_name_);
314       PrintPerfResult(per_op_type, op_perf_by_type_);
315     } else {
316       const std::vector<std::string> per_op_name = {"opName", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"};
317       const std::vector<std::string> per_op_type = {"opType", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"};
318       PrintPerfResult(per_op_name, op_perf_by_name_);
319       PrintPerfResult(per_op_type, op_perf_by_type_);
320     }
321 #endif
322   }
323 
324   if (flags_->loop_count_ > 0) {
325     time_avg /= flags_->loop_count_;
326     MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
327                  << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / kFloatMSEC
328                  << ", MaxRuntime = " << time_max / kFloatMSEC << ", AvgRunTime = " << time_avg / kFloatMSEC;
329     printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n",
330            flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_,
331            time_min / kFloatMSEC, time_max / kFloatMSEC, time_avg / kFloatMSEC);
332   }
333   return RET_OK;
334 }
335 
MarkAccuracy()336 int BenchmarkUnifiedApi::MarkAccuracy() {
337   MS_LOG(INFO) << "MarkAccuracy";
338   std::cout << "MarkAccuracy" << std::endl;
339 
340   auto status = PrintInputData();
341   if (status != RET_OK) {
342     MS_LOG(ERROR) << "PrintInputData error " << status;
343     std::cerr << "PrintInputData error " << status << std::endl;
344     return status;
345   }
346   std::vector<MSTensor> outputs;
347   auto ret = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_);
348   if (ret != kSuccess) {
349     MS_LOG(ERROR) << "Inference error ";
350     std::cerr << "Inference error " << std::endl;
351     return RET_ERROR;
352   }
353   status = ReadCalibData();
354   if (status != RET_OK) {
355     MS_LOG(ERROR) << "Read calib data error " << status;
356     std::cerr << "Read calib data error " << status << std::endl;
357     return status;
358   }
359   status = CompareOutput();
360   if (status != RET_OK) {
361     MS_LOG(ERROR) << "Compare output error " << status;
362     std::cerr << "Compare output error " << status << std::endl;
363     return status;
364   }
365   return RET_OK;
366 }
367 
PrintInputData()368 int BenchmarkUnifiedApi::PrintInputData() {
369   for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
370     auto input = ms_inputs_for_api_[i];
371     MS_ASSERT(input != nullptr);
372     auto tensor_data_type = static_cast<int>(input.DataType());
373 
374     std::cout << "InData" << i << ": ";
375     if (tensor_data_type == TypeId::kObjectTypeString) {
376       std::vector<std::string> output_strings = MSTensor::TensorToStrings(input);
377       size_t print_num = std::min(output_strings.size(), static_cast<size_t>(20));
378       for (size_t j = 0; j < print_num; j++) {
379         std::cout << output_strings[j] << std::endl;
380       }
381       continue;
382     }
383     size_t print_num = std::min(static_cast<int>(input.ElementNum()), kPrintDataNum);
384     const void *in_data = input.MutableData();
385     if (in_data == nullptr) {
386       MS_LOG(ERROR) << "in_data is nullptr.";
387       return RET_ERROR;
388     }
389 
390     for (size_t j = 0; j < print_num; j++) {
391       if (tensor_data_type == TypeId::kNumberTypeFloat32 || tensor_data_type == TypeId::kNumberTypeFloat) {
392         std::cout << static_cast<const float *>(in_data)[j] << " ";
393       } else if (tensor_data_type == TypeId::kNumberTypeInt8) {
394         std::cout << static_cast<const int8_t *>(in_data)[j] << " ";
395       } else if (tensor_data_type == TypeId::kNumberTypeUInt8) {
396         std::cout << static_cast<const uint8_t *>(in_data)[j] << " ";
397       } else if (tensor_data_type == TypeId::kNumberTypeInt32) {
398         std::cout << static_cast<const int32_t *>(in_data)[j] << " ";
399       } else if (tensor_data_type == TypeId::kNumberTypeInt64) {
400         std::cout << static_cast<const int64_t *>(in_data)[j] << " ";
401       } else if (tensor_data_type == TypeId::kNumberTypeBool) {
402         std::cout << static_cast<const bool *>(in_data)[j] << " ";
403       } else {
404         MS_LOG(ERROR) << "Datatype: " << tensor_data_type << " is not supported.";
405         return RET_ERROR;
406       }
407     }
408     std::cout << std::endl;
409   }
410   return RET_OK;
411 }
412 
RunBenchmark()413 int BenchmarkUnifiedApi::RunBenchmark() {
414   auto start_prepare_time = GetTimeUs();
415   // Load graph
416   std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
417 
418   MS_LOG(INFO) << "start reading model file";
419   std::cout << "start reading model file" << std::endl;
420   size_t size = 0;
421   char *graph_buf = ReadFile(flags_->model_file_.c_str(), &size);
422   if (graph_buf == nullptr) {
423     MS_LOG(ERROR) << "Read model file failed while running " << model_name.c_str();
424     std::cerr << "Read model file failed while running " << model_name.c_str() << std::endl;
425     return RET_ERROR;
426   }
427 
428   auto context = std::make_shared<mindspore::Context>();
429   if (context == nullptr) {
430     MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
431     std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
432     return RET_ERROR;
433   }
434 
435   (void)InitMSContext(context);
436 
437   if (!flags_->config_file_.empty()) {
438     auto config_ret = ms_model_.LoadConfig(flags_->config_file_);
439     if (config_ret != kSuccess) {
440       MS_LOG(ERROR) << "ms_model_.LoadConfig failed while running ", model_name.c_str();
441       std::cout << "ms_model_.LoadConfig failed while running ", model_name.c_str();
442     }
443   }
444 
445   auto ret = ms_model_.Build(graph_buf, size, kMindIR, context);
446   delete[] graph_buf;
447   if (ret != kSuccess) {
448     MS_LOG(ERROR) << "ms_model_.Build failed while running ", model_name.c_str();
449     std::cout << "ms_model_.Build failed while running ", model_name.c_str();
450     return RET_ERROR;
451   }
452 
453   if (!flags_->resize_dims_.empty()) {
454     std::vector<std::vector<int64_t>> resize_dims;
455     (void)std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(resize_dims),
456                          [&](auto &shapes) { return this->ConverterToInt64Vector<int>(shapes); });
457 
458     ret = ms_model_.Resize(ms_model_.GetInputs(), resize_dims);
459     if (ret != kSuccess) {
460       MS_LOG(ERROR) << "Input tensor resize failed.";
461       std::cout << "Input tensor resize failed.";
462       return RET_ERROR;
463     }
464   }
465 
466   ms_inputs_for_api_ = ms_model_.GetInputs();
467   auto end_prepare_time = GetTimeUs();
468   MS_LOG(INFO) << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms";
469   std::cout << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms" << std::endl;
470 
471   // Load input
472   MS_LOG(INFO) << "start generate input data";
473   auto status = LoadInput();
474   if (status != 0) {
475     MS_LOG(ERROR) << "Generate input data error";
476     return status;
477   }
478   if (!flags_->benchmark_data_file_.empty()) {
479     status = MarkAccuracy();
480     if (status != 0) {
481       MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
482       std::cout << "Run MarkAccuracy error: " << status << std::endl;
483       return status;
484     }
485   } else {
486     status = MarkPerformance();
487     if (status != 0) {
488       MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
489       std::cout << "Run MarkPerformance error: " << status << std::endl;
490       return status;
491     }
492   }
493   if (flags_->dump_tensor_data_) {
494     std::cout << "Dumped file is saved to : " + dump_file_output_dir_ << std::endl;
495   }
496   return RET_OK;
497 }
498 
InitTimeProfilingCallbackParameter()499 int BenchmarkUnifiedApi::InitTimeProfilingCallbackParameter() {
500   // before callback
501   ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
502                              const std::vector<mindspore::MSTensor> &before_outputs,
503                              const MSCallBackParam &call_param) {
504     if (before_inputs.empty()) {
505       MS_LOG(INFO) << "The num of beforeInputs is empty";
506     }
507     if (before_outputs.empty()) {
508       MS_LOG(INFO) << "The num of beforeOutputs is empty";
509     }
510     if (op_times_by_type_.find(call_param.node_type) == op_times_by_type_.end()) {
511       op_times_by_type_.insert(std::make_pair(call_param.node_type, std::make_pair(0, 0.0f)));
512     }
513     if (op_times_by_name_.find(call_param.node_name) == op_times_by_name_.end()) {
514       op_times_by_name_.insert(std::make_pair(call_param.node_name, std::make_pair(0, 0.0f)));
515     }
516 
517     op_call_times_total_++;
518     op_begin_ = GetTimeUs();
519     return true;
520   };
521 
522   // after callback
523   ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
524                             const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
525     uint64_t opEnd = GetTimeUs();
526 
527     if (after_inputs.empty()) {
528       MS_LOG(INFO) << "The num of after inputs is empty";
529     }
530     if (after_outputs.empty()) {
531       MS_LOG(INFO) << "The num of after outputs is empty";
532     }
533 
534     float cost = static_cast<float>(opEnd - op_begin_) / kFloatMSEC;
535     if (flags_->device_ == "GPU") {
536       auto gpu_param = reinterpret_cast<const GPUCallBackParam &>(call_param);
537       cost = static_cast<float>(gpu_param.execute_time);
538     }
539     op_cost_total_ += cost;
540     op_times_by_type_[call_param.node_type].first++;
541     op_times_by_type_[call_param.node_type].second += cost;
542     op_times_by_name_[call_param.node_name].first++;
543     op_times_by_name_[call_param.node_name].second += cost;
544     return true;
545   };
546   return RET_OK;
547 }
548 
InitPerfProfilingCallbackParameter()549 int BenchmarkUnifiedApi::InitPerfProfilingCallbackParameter() {
550 #ifndef ENABLE_ARM64
551   MS_LOG(ERROR) << "Only support perf_profiling on arm64.";
552   return RET_ERROR;
553 #else
554   struct perf_event_attr pe, pe2;
555   memset(&pe, 0, sizeof(struct perf_event_attr));
556   memset(&pe2, 0, sizeof(struct perf_event_attr));
557   pe.type = PERF_TYPE_HARDWARE;
558   pe2.type = PERF_TYPE_HARDWARE;
559   pe.size = sizeof(struct perf_event_attr);
560   pe2.size = sizeof(struct perf_event_attr);
561   pe.disabled = 1;
562   pe2.disabled = 1;
563   pe.exclude_kernel = 1;   // don't count kernel
564   pe2.exclude_kernel = 1;  // don't count kernel
565   pe.exclude_hv = 1;       // don't count hypervisor
566   pe2.exclude_hv = 1;      // don't count hypervisor
567   pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
568   pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
569   if (flags_->perf_event_ == "CACHE") {
570     pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
571     pe2.config = PERF_COUNT_HW_CACHE_MISSES;
572   } else if (flags_->perf_event_ == "STALL") {
573     pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND;
574     pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND;
575   } else {
576     pe.config = PERF_COUNT_HW_CPU_CYCLES;
577     pe2.config = PERF_COUNT_HW_INSTRUCTIONS;
578   }
579   perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0);
580   if (perf_fd == -1) {
581     MS_LOG(ERROR) << "Failed to open perf event " << pe.config;
582     return RET_ERROR;
583   }
584   perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0);
585   if (perf_fd2 == -1) {
586     MS_LOG(ERROR) << "Failed to open perf event " << pe2.config;
587     return RET_ERROR;
588   }
589   struct PerfCount zero;
590   zero.value[0] = 0;
591   zero.value[1] = 0;
592   // before callback
593   ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
594                              const std::vector<mindspore::MSTensor> &before_outputs,
595                              const MSCallBackParam &call_param) {
596     if (before_inputs.empty()) {
597       MS_LOG(INFO) << "The num of beforeInputs is empty";
598     }
599     if (before_outputs.empty()) {
600       MS_LOG(INFO) << "The num of beforeOutputs is empty";
601     }
602     if (op_perf_by_type_.find(call_param.node_type) == op_perf_by_type_.end()) {
603       op_perf_by_type_.insert(std::make_pair(call_param.node_type, std::make_pair(0, zero)));
604     }
605     if (op_perf_by_name_.find(call_param.node_name) == op_perf_by_name_.end()) {
606       op_perf_by_name_.insert(std::make_pair(call_param.node_name, std::make_pair(0, zero)));
607     }
608 
609     op_call_times_total_++;
610     ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
611     ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
612     return true;
613   };
614 
615   // after callback
616   ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
617                             const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
618     struct PerfResult res;
619     ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
620     if (read(perf_fd, &res, sizeof(struct PerfResult)) == -1) {
621       MS_LOG(ERROR) << "Failed to read perf_fd";
622       return false;
623     }
624 
625     if (after_inputs.empty()) {
626       MS_LOG(INFO) << "The num of after inputs is empty";
627     }
628     if (after_outputs.empty()) {
629       MS_LOG(INFO) << "The num of after outputs is empty";
630     }
631     float cost1 = static_cast<float>(res.values[0].value);
632     float cost2 = static_cast<float>(res.values[1].value);
633     op_cost_total_ += cost1;
634     op_cost2_total_ += cost2;
635     op_perf_by_type_[call_param.node_type].first++;
636     op_perf_by_type_[call_param.node_type].second.value[0] += cost1;
637     op_perf_by_type_[call_param.node_type].second.value[1] += cost2;
638     op_perf_by_name_[call_param.node_name].first++;
639     op_perf_by_name_[call_param.node_name].second.value[0] += cost1;
640     op_perf_by_name_[call_param.node_name].second.value[1] += cost2;
641     return true;
642   };
643 #endif
644   return RET_OK;
645 }
646 
647 namespace {
648 template <typename T>
DataToString(void * data,size_t data_number)649 std::string DataToString(void *data, size_t data_number) {
650   if (data == nullptr) {
651     return "Data of tensor is nullptr";
652   }
653   std::ostringstream oss;
654   auto casted_data = static_cast<T *>(data);
655   for (size_t i = 0; i < kDataToStringMaxNum && i < data_number; i++) {
656     oss << " " << casted_data[i];
657   }
658   return oss.str();
659 }
660 
DumpMSTensor(mindspore::MSTensor * tensor)661 std::string DumpMSTensor(mindspore::MSTensor *tensor) {
662   if (tensor == nullptr) {
663     return "Tensor is nullptr";
664   }
665   std::ostringstream oss;
666   oss << " DataType: " << static_cast<int>(tensor->DataType());
667   oss << " Shape:";
668   for (auto &dim : tensor->Shape()) {
669     oss << " " << dim;
670   }
671   oss << std::endl << " Data:";
672   switch (static_cast<int>(tensor->DataType())) {
673     case kNumberTypeFloat32: {
674       oss << DataToString<float>(tensor->MutableData(), tensor->ElementNum());
675     } break;
676     case kNumberTypeFloat16: {
677       oss << DataToString<int16_t>(tensor->MutableData(), tensor->ElementNum());
678     } break;
679     case kNumberTypeInt32: {
680       oss << DataToString<int32_t>(tensor->MutableData(), tensor->ElementNum());
681     } break;
682     case kNumberTypeInt16: {
683       oss << DataToString<int16_t>(tensor->MutableData(), tensor->ElementNum());
684     } break;
685     case kNumberTypeInt8: {
686       oss << DataToString<int8_t>(tensor->MutableData(), tensor->ElementNum());
687     } break;
688     default:
689       oss << "Unsupported data type to print";
690       break;
691   }
692   return oss.str();
693 }
694 #ifndef BENCHMARK_CLIP_JSON
GenerateOutputFileName(mindspore::MSTensor * tensor,const std::string & op_name,const std::string & file_type,const size_t & idx)695 std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
696                                    const std::string &file_type, const size_t &idx) {
697   std::string file_name = op_name;
698   auto pos = file_name.find_first_of('/');
699   while (pos != std::string::npos) {
700     file_name.replace(pos, 1, ".");
701     pos = file_name.find_first_of('/');
702   }
703   file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_";
704   for (const auto &dim : tensor->Shape()) {
705     file_name += std::to_string(dim) + "_";
706   }
707   if (kTypeIdMap.find(static_cast<int>(tensor->DataType())) != kTypeIdMap.end()) {
708     file_name += kTypeIdMap.at(static_cast<int>(tensor->DataType()));
709   }
710 
711   auto tensor_format = tensor->format();
712   if (kTensorFormatMap.find(tensor_format) != kTensorFormatMap.end()) {
713     file_name += "_" + kTensorFormatMap.at(tensor_format) + ".bin";
714   }
715 
716   file_name += +".bin";
717   return file_name;
718 }
719 #endif
720 }  // namespace
721 
InitPrintTensorDataCallbackParameter()722 int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() {
723   // before callback
724   ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
725                              const std::vector<mindspore::MSTensor> &before_outputs,
726                              const MSCallBackParam &call_param) { return true; };
727 
728   // after callback
729   ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
730                             const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
731     std::cout << "================================================================" << std::endl;
732     std::cout << call_param.node_name << " inputs : " << std::endl;
733     for (auto ms_tensor : after_inputs) {
734       std::cout << DumpMSTensor(&ms_tensor) << std::endl;
735     }
736     std::cout << "----------------------------------------------------------------" << std::endl;
737     std::cout << call_param.node_name << " outputs : " << std::endl;
738     for (auto ms_tensor : after_outputs) {
739       std::cout << DumpMSTensor(&ms_tensor) << std::endl;
740     }
741     std::cout << "================================================================" << std::endl;
742     return true;
743   };
744   return RET_OK;
745 }
InitDumpTensorDataCallbackParameter()746 int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() {
747 #ifndef BENCHMARK_CLIP_JSON
748   // before callback
749   ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
750                              const std::vector<mindspore::MSTensor> &before_outputs,
751                              const MSCallBackParam &call_param) {
752     auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get<int>();
753     auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get<int>();
754     auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get<std::vector<std::string>>();
755     if (dump_mode == 0 || std::find(kernels.begin(), kernels.end(), call_param.node_name) != kernels.end()) {
756       if (input_output_mode == 0 || input_output_mode == 1) {
757         for (size_t i = 0; i < before_inputs.size(); i++) {
758           auto ms_tensor = before_inputs.at(i);
759           auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name, "input", i);
760           auto abs_file_path = dump_file_output_dir_ + "/" + file_name;
761           if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) {  // save to file
762             MS_LOG(ERROR) << "write tensor data to file failed.";
763             return false;
764           }
765         }
766       }
767     }
768     return true;
769   };
770 
771   // after callback
772   ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
773                             const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
774     auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get<int>();
775     auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get<int>();
776     auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get<std::vector<std::string>>();
777     if (dump_mode == kDumpInputsAndOutputs ||
778         std::find(kernels.begin(), kernels.end(), call_param.node_name) != kernels.end()) {
779       if (input_output_mode == kDumpInputsAndOutputs || input_output_mode == kDumpOutputs) {
780         for (size_t i = 0; i < after_outputs.size(); i++) {
781           auto ms_tensor = after_outputs.at(i);
782           auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name, "output", i);
783           auto abs_file_path = dump_file_output_dir_ + "/" + file_name;
784           if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) {  // save to file
785             MS_LOG(ERROR) << "write tensor data to file failed.";
786             return false;
787           }
788         }
789       }
790     }
791     return true;
792   };
793 #endif
794   return RET_OK;
795 }
796 
~BenchmarkUnifiedApi()797 BenchmarkUnifiedApi::~BenchmarkUnifiedApi() {}
798 }  // namespace lite
799 }  // namespace mindspore
800