• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_H_
18 #define MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_H_
19 
20 #include <getopt.h>
21 #include <signal.h>
22 #include <unordered_map>
23 #include <fstream>
24 #include <iostream>
25 #include <map>
26 #include <cmath>
27 #include <string>
28 #include <vector>
29 #include <memory>
30 #include <cfloat>
31 #include <utility>
32 #include <algorithm>
33 
34 #include "tools/common/flag_parser.h"
35 #include "src/common/file_utils.h"
36 #include "src/common/utils.h"
37 #include "include/lite_session.h"
38 
39 namespace mindspore::lite {
40 enum MS_API DataType { kImage = 0, kBinary = 1 };
41 
42 constexpr float relativeTolerance = 1e-5;
43 constexpr float absoluteTolerance = 1e-8;
44 
45 template <typename T>
TensorSum(void * data,int size)46 float TensorSum(void *data, int size) {
47   T *typed_data = reinterpret_cast<T *>(data);
48   float sum = 0.f;
49   for (int i = 0; i < size; i++) {
50     sum += static_cast<float>(typed_data[i]);
51   }
52   return sum;
53 }
54 
55 class MS_API NetTrainFlags : public virtual FlagParser {
56  public:
NetTrainFlags()57   NetTrainFlags() {
58     // common
59     AddFlag(&NetTrainFlags::model_file_, "modelFile", "Input model file", "");
60     AddFlag(&NetTrainFlags::bb_model_file_, "bbModelFile", "Backboine model for transfer session", "");
61     AddFlag(&NetTrainFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
62     // MarkPerformance
63     AddFlag(&NetTrainFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 0);
64     AddFlag(&NetTrainFlags::time_profiling_, "timeProfiling", "Run time profiling", false);
65     AddFlag(&NetTrainFlags::epochs_, "epochs", "Number of training epochs to run", 1);
66     AddFlag(&NetTrainFlags::num_threads_, "numThreads", "Run threads number", 1);
67     // MarkAccuracy
68     AddFlag(&NetTrainFlags::data_file_, "expectedDataFile", "Expected results data file path", "");
69     AddFlag(&NetTrainFlags::export_file_, "exportFile", "MS File to export trained model into", "");
70     AddFlag(&NetTrainFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5);
71     AddFlag(&NetTrainFlags::layer_checksum_, "layerCheckSum", "layer output checksum print (debug)", false);
72     AddFlag(&NetTrainFlags::enable_fp16_, "enableFp16", "Enable float16", false);
73     AddFlag(&NetTrainFlags::loss_name_, "lossName", "loss layer name", "");
74     AddFlag(&NetTrainFlags::inference_file_, "inferenceFile", "MS file to export inference model", "");
75     AddFlag(&NetTrainFlags::virtual_batch_, "virtualBatch", "use virtual batch", false);
76     AddFlag(&NetTrainFlags::resize_dims_in_, "inputShapes",
77             "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", "");
78     AddFlag(&NetTrainFlags::is_raw_mix_precision_, "isRawMixPrecision",
79             "If model is mix precision export from MindSpore,please set true", false);
80   }
81 
82   ~NetTrainFlags() override = default;
83   void InitResizeDimsList();
84 
85  public:
86   // common
87   std::string model_file_;
88   std::string in_data_file_;
89   std::string bb_model_file_;
90   std::vector<std::string> input_data_list_;
91   DataType in_data_type_;
92   std::string in_data_type_in_ = "bin";
93   int cpu_bind_mode_ = 1;
94   bool enable_fp16_ = false;
95   bool virtual_batch_ = false;
96   // MarkPerformance
97   int num_threads_ = 1;
98   int warm_up_loop_count_ = 0;
99   bool time_profiling_;
100   int epochs_ = 1;
101   // MarkAccuracy
102   std::string data_file_;
103   std::string data_type_ = "FLOAT";
104   float accuracy_threshold_;
105   // Resize
106   std::string export_file_ = "";
107   std::string resize_dims_in_ = "";
108   bool layer_checksum_ = false;
109   std::vector<std::vector<int>> resize_dims_;
110   std::string loss_name_ = "";
111   std::string inference_file_ = "";
112   bool is_raw_mix_precision_ = false;
113 };
114 
115 class MS_API NetTrain {
116  public:
NetTrain(NetTrainFlags * flags)117   explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {}
118   virtual ~NetTrain() = default;
119 
120   int Init();
121   int RunNetTrain();
122 
123  private:
124   // call GenerateInputData or ReadInputFile to init inputTensors
125   int LoadInput(Vector<tensor::MSTensor *> *ms_inputs);
126   void CheckSum(mindspore::tensor::MSTensor *tensor, std::string node_type, int id, std::string in_out);
127   // call GenerateRandomData to fill inputTensors
128   int GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);
129 
130   int GenerateRandomData(size_t size, void *data);
131 
132   int ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);
133   int CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, int train_session, int epochs,
134                           bool check_accuracy = true);
135 
136   std::unique_ptr<session::LiteSession> CreateAndRunNetworkForInference(const std::string &filename,
137                                                                         const Context &context);
138 
139   std::unique_ptr<session::LiteSession> CreateAndRunNetworkForTrain(const std::string &filename,
140                                                                     const std::string &bb_filename,
141                                                                     const Context &context, const TrainCfg &train_cfg,
142                                                                     int epochs);
143 
144   int InitCallbackParameter();
145 
146   int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result);
147 
148   template <typename T>
PrintInputData(tensor::MSTensor * input)149   void PrintInputData(tensor::MSTensor *input) {
150     MS_ASSERT(input != nullptr);
151     static int i = 0;
152     auto inData = reinterpret_cast<T *>(input->MutableData());
153     size_t tensorSize = input->ElementsNum();
154     size_t len = (tensorSize < 20) ? tensorSize : 20;
155     std::cout << "InData" << i++ << ": ";
156     for (size_t j = 0; j < len; j++) {
157       std::cout << inData[j] << " ";
158     }
159     std::cout << std::endl;
160   }
161 
162   // tensorData need to be converter first
163   template <typename T>
CompareData(const float * refOutput,int size,T * msTensorData)164   float CompareData(const float *refOutput, int size, T *msTensorData) {
165     size_t errorCount = 0;
166     float meanError = 0;
167     std::cout << "Data of model output: ";
168     for (int j = 0; j < std::min(50, size); j++) {
169       std::cout << static_cast<float>(msTensorData[j]) << " ";
170     }
171     std::cout << std::endl;
172     std::cout << "Data of Ref output  : ";
173     for (int j = 0; j < std::min(50, size); j++) {
174       std::cout << refOutput[j] << " ";
175     }
176     std::cout << std::endl;
177     for (int j = 0; j < size; j++) {
178       if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) {
179         std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl;
180         MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail";
181         return RET_ERROR;
182       }
183 
184       auto tolerance = absoluteTolerance + relativeTolerance * fabs(refOutput[j]);
185       auto absoluteError = std::fabs(static_cast<float>(msTensorData[j]) - refOutput[j]);
186       if (absoluteError > tolerance) {
187         if (fabs(refOutput[j]) == 0) {
188           if (absoluteError > 1e-5) {
189             meanError += absoluteError;
190             errorCount++;
191           } else {
192             continue;
193           }
194         } else {
195           // just assume that atol = rtol
196           meanError += absoluteError / (fabs(refOutput[j]) + FLT_MIN);
197           errorCount++;
198         }
199       }
200     }
201     std::cout << std::endl;
202     if (meanError > 0.0f) {
203       meanError /= errorCount;
204     }
205 
206     if (meanError <= 0.0000001) {
207       std::cout << "Mean bias of tensor: 0%" << std::endl;
208     } else {
209       std::cout << "Mean bias of tensor: " << meanError * 100 << "%" << std::endl;
210     }
211     return meanError;
212   }
213 
214   int MarkPerformance(const std::unique_ptr<session::LiteSession> &session);
215   int MarkAccuracy(const std::unique_ptr<session::LiteSession> &session, bool enforce_accuracy = true);
216   int CompareOutput(const session::LiteSession &lite_session);
217   int SaveModels(const std::unique_ptr<session::LiteSession> &session);
218   int CheckExecutionOfSavedModels();
TensorNan(float * data,int size)219   void TensorNan(float *data, int size) {
220     for (int i = 0; i < size; i++) {
221       if (std::isnan(data[i])) {
222         std::cout << "nan value of index=" << i << std::endl;
223         break;
224       }
225     }
226   }
227   NetTrainFlags *flags_;
228 
229   // callback parameters
230   uint64_t op_begin_ = 0;
231   int op_call_times_total_ = 0;
232   float op_cost_total_ = 0.0f;
233   std::map<std::string, std::pair<int, float>> op_times_by_type_;
234   std::map<std::string, std::pair<int, float>> op_times_by_name_;
235 
236   mindspore::KernelCallBack before_call_back_;
237   mindspore::KernelCallBack after_call_back_;
238 };
239 
240 int MS_API RunNetTrain(int argc, const char **argv);
241 }  // namespace mindspore::lite
242 #endif  // MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_H_
243