• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "tools/converter/quantizer/parameter_tunner.h"
18 #include <set>
19 #include <functional>
20 #include <map>
21 #include <memory>
22 #include <vector>
23 #include <algorithm>
24 #include "tools/converter/preprocess/cv_calib_data.h"
25 #include "tools/converter/preprocess/image_preprocess.h"
26 #ifdef MSLITE_DEPS_OPENCV
27 #include "tools/converter/preprocess/opencv_utils.h"
28 #endif
29 #include "tools/converter/quantizer/quantize_util.h"
30 #include "tools/converter/quantizer/weight_quantizer.h"
31 #include "tools/converter/export_model.h"
32 #include "tools/common/tensor_util.h"
33 #include "tools/converter/parser/parser_utils.h"
34 
35 namespace mindspore::lite::quant {
36 namespace {
37 static const int kOneChannel = 1;
38 static const int kThreeChannels = 3;
39 static const int kSixChannels = 6;
40 static const int kDefaultRangeStart = 1;
41 static const int kDefaultRangeEnd = 30000;
42 static const int kDefaultExtendFactor = 4;
43 static const int kMaxStepCoarseSearch = 173;
44 static const int kMaxStepFineSearch = 20;
45 static const int kMaxFineSearchIterations = 3;
46 static const int kFinestGranularity = 2;
47 static const int kCompressToInt8Ratio = 4.0;
48 static const int kTwo = 2;
49 static const float kScaleFactor = (0.01 * 0.01 * 0.01 * 24.0);
50 }  // namespace
GetMinScale(const std::set<tensor::TensorPtr> & weight_quantized_tensors)51 float GetMinScale(const std::set<tensor::TensorPtr> &weight_quantized_tensors) {
52   size_t max_tensor_size = 1;
53   for (const auto &tensor : weight_quantized_tensors) {
54     max_tensor_size = std::max(max_tensor_size, tensor->DataSize());
55   }
56   return (max_tensor_size > 0) ? std::sqrt(kScaleFactor / max_tensor_size) : kScaleFactor;
57 }
58 
ExtendBatchSize(const std::shared_ptr<mindspore::Model> & model,std::vector<MSTensor> * inputs,int batch)59 static Status ExtendBatchSize(const std::shared_ptr<mindspore::Model> &model, std::vector<MSTensor> *inputs,
60                               int batch) {
61   std::vector<std::vector<int64_t>> dims(inputs->size());
62   int i = 0;
63   for (auto input : *inputs) {
64     dims.at(i) = input.Shape();
65     dims.at(i).at(0) = batch;
66     input.SetShape(dims.at(i));
67     i++;
68   }
69   return model->Resize(*inputs, dims);
70 }
71 
CopyDataAndRun(const std::shared_ptr<mindspore::Model> & origin_model,const std::shared_ptr<mindspore::Model> & quant_model)72 int ParameterOptimizer::CopyDataAndRun(const std::shared_ptr<mindspore::Model> &origin_model,
73                                        const std::shared_ptr<mindspore::Model> &quant_model) {
74   auto weight_quant_inputs = quant_model->GetInputs();
75   for (auto input : weight_quant_inputs) {
76     auto origin_tensor = origin_model->GetInputByTensorName(input.Name());
77     auto weight_quant_tensor_data = input.MutableData();
78     if (memcpy_s(weight_quant_tensor_data, input.DataSize(), origin_tensor.MutableData(), origin_tensor.DataSize()) !=
79         EOK) {
80       MS_LOG(ERROR) << "memcpy data failed.";
81       return RET_ERROR;
82     }
83   }
84   auto weight_quant_outputs = quant_model->GetOutputs();
85   auto model_status = quant_model->Predict(weight_quant_inputs, &weight_quant_outputs);
86   if (model_status != kSuccess) {
87     MS_LOG(ERROR) << "Run origin session failed.";
88     return RET_ERROR;
89   }
90 
91   return RET_OK;
92 }
93 
WeightQuantModelInference(const FuncGraphPtr & func_graph,const std::shared_ptr<ConverterPara> & param,const std::shared_ptr<mindspore::Model> & origin_model,size_t origin_model_size,SearchParams * s_param,int * ret_scale,float * best_compress_ratio,bool * found_valid_scale)94 int ParameterOptimizer::WeightQuantModelInference(const FuncGraphPtr &func_graph,
95                                                   const std::shared_ptr<ConverterPara> &param,
96                                                   const std::shared_ptr<mindspore::Model> &origin_model,
97                                                   size_t origin_model_size, SearchParams *s_param, int *ret_scale,
98                                                   float *best_compress_ratio, bool *found_valid_scale) {
99   CHECK_NULL_RETURN(param);
100   CHECK_NULL_RETURN(origin_model);
101   CHECK_NULL_RETURN(ret_scale);
102   CHECK_NULL_RETURN(best_compress_ratio);
103   CHECK_NULL_RETURN(found_valid_scale);
104   CHECK_NULL_RETURN(s_param);
105   const float threshold = 0.995f;
106   *best_compress_ratio = 0.0f;
107   *found_valid_scale = false;
108   for (int scale = s_param->range_start; scale <= s_param->range_end; scale += s_param->step) {
109     param->commonQuantParam.quant_type = quant::QUANT_WEIGHT;
110     FuncGraphPtr func_graph_bak;
111     auto ret = CloneFuncGraph(func_graph, param, &func_graph_bak);
112     if (ret != RET_OK) {
113       MS_LOG(ERROR) << "Clone FuncGraph failed.";
114       return ret;
115     }
116 
117     auto quantizer = std::make_unique<quant::WeightQuantizer>(param, 1.0f / scale);
118     CHECK_NULL_RETURN(quantizer);
119     auto status = quantizer->DoQuantize(func_graph_bak);
120     if (status != RET_OK) {
121       MS_LOG(WARNING) << "DoQuantization failed " << status;
122       continue;
123     }
124 
125     if (scale == 1) {
126       float inv_min_scale = GetMinScale(quantizer->GetWeightQuantizedTensors());
127       if (inv_min_scale != 0) {
128         if ((1.0 / inv_min_scale) > s_param->range_end) {
129           // extend scale end
130           int num_of_steps = (s_param->range_end - s_param->range_start) / s_param->step;
131           s_param->range_end = static_cast<int>(1.0 / inv_min_scale);
132           s_param->step = s_param->range_end / num_of_steps;
133         }
134       }
135       std::cout << "=== Basic search in range [1," << s_param->range_end << "] === " << s_param->step << "\n";
136     }
137 
138     MS_LOG(INFO) << "create quant session";
139     size_t weight_quant_size = 0;
140     auto weight_quant_model = std::make_shared<mindspore::Model>();
141     CHECK_NULL_RETURN(weight_quant_model);
142     auto build_status = BuildModelByFuncGraph(weight_quant_model, func_graph_bak, param, &weight_quant_size);
143     if (build_status != kSuccess) {
144       MS_LOG(WARNING) << "build model failed!";
145       continue;
146     }
147     auto weight_quant_inputs = weight_quant_model->GetInputs();
148     if ((param->dataPreProcessParam.calibrate_size == 0) && (param->mixedBitWeightQuantParam.use_cv_data)) {
149       if (ExtendBatchSize(weight_quant_model, &weight_quant_inputs, kNumOfCalibrationImages) != kSuccess) {
150         MS_LOG(ERROR) << "Resize session for CV calibration failed!";
151         return RET_ERROR;
152       }
153     }
154     auto model_status = CopyDataAndRun(origin_model, weight_quant_model);
155     if (model_status != kSuccess) {
156       MS_LOG(ERROR) << "Copy Input Data to model failed.";
157       return RET_ERROR;
158     }
159     auto cos_sim = CompareDataByCosineDistance<float>(origin_model, weight_quant_model);
160     MS_CHECK_TRUE_MSG(weight_quant_size > 0, RET_ERROR, "weight quant size must be larger than 0");
161     const auto compress_ratio = 1.0 * origin_model_size / weight_quant_size;
162     std::cout << " scale:" << scale << " cos_sim:" << cos_sim << " compression ratio:" << compress_ratio << std::endl;
163     if (cos_sim >= threshold) {
164       if (compress_ratio > *best_compress_ratio) {
165         *best_compress_ratio = compress_ratio;
166         *found_valid_scale = true;
167         *ret_scale = scale;
168         return RET_OK;
169       }
170     }
171   }
172   *found_valid_scale = false;
173   MS_LOG(DEBUG) << "Couldn't reach cosine similarity constraint";
174   return RET_OK;
175 }
176 
177 #ifdef MSLITE_DEPS_OPENCV
PrepareSingleImage(const uint8_t * buf,int len,const std::vector<int64_t> & shape,uint8_t * out_buf,size_t * out_len)178 static int PrepareSingleImage(const uint8_t *buf, int len, const std::vector<int64_t> &shape, uint8_t *out_buf,
179                               size_t *out_len) {
180   cv::Mat mat;
181   const int HEIGHT_INDEX = 1;
182   const int WIDTH_INDEX = 2;
183   const std::vector<double> mean = {127.5, 127.5, 127.5};
184   const std::vector<double> standard_deviation = {127.5, 127.5, 127.5};
185   auto ret = preprocess::DecodeBuffer(buf, len, &mat);
186   if (ret != RET_OK) {
187     MS_LOG(ERROR) << "PrepareSingleImage error in decode: " << ret;
188     return ret;
189   }
190   ret = preprocess::Resize(&mat, shape.at(WIDTH_INDEX), shape.at(HEIGHT_INDEX), cv::INTER_LINEAR);
191   if (ret != RET_OK) {
192     MS_LOG(ERROR) << "PrepareSingleImage error in Resize: " << ret;
193     return ret;
194   }
195   ret = preprocess::Normalize(&mat, mean, standard_deviation);
196   if (ret != RET_OK) {
197     MS_LOG(ERROR) << "PrepareSingleImage error in Normalize: " << ret;
198     return ret;
199   }
200   if (shape.at(kNHWC_C) == kOneChannel) {
201     ret = preprocess::ConvertImageFormat(&mat, cv::COLOR_BGR2GRAY);
202     if (ret != RET_OK) {
203       MS_LOG(ERROR) << "PrepareSingleImage error in Gray Scaling: " << ret;
204       return ret;
205     }
206   }
207   if (shape.at(kNHWC_C) == kSixChannels) {  // in case of 2 stacked images
208     std::vector<cv::Mat> channels(kThreeChannels);
209     cv::split(mat, channels);
210     std::vector<cv::Mat> mat_6_channels;
211     for (int i = 0; i < kThreeChannels; i++) {
212       mat_6_channels.push_back(channels[i]);
213     }
214     for (int i = 0; i < kThreeChannels; i++) {
215       mat_6_channels.push_back(channels[i]);
216     }
217     cv::merge(mat_6_channels, mat);
218   }
219   uint8_t *data = nullptr;
220   size_t size = 0;
221   ret = preprocess::GetMatData(mat, reinterpret_cast<void **>(&data), &size);
222   if (data == nullptr || size == 0) {
223     MS_LOG(ERROR) << "GetMatData data is nullptr or size == 0";
224     return RET_ERROR;
225   }
226   if (ret != RET_OK) {
227     delete[] data;
228     MS_LOG(ERROR) << "Get mat data failed.";
229     return ret;
230   }
231 
232   if (size > *out_len) {
233     delete[] data;
234     MS_LOG(ERROR) << "Buffer Size mismatch " << size << " vs " << *out_len;
235     return ret;
236   }
237   std::copy(data, data + size, out_buf);
238   delete[] data;
239   *out_len = size;
240   return RET_OK;
241 }
242 
GenerateCvData(mindspore::MSTensor * tensor)243 static int GenerateCvData(mindspore::MSTensor *tensor) {
244   MS_ASSERT(tensor != nullptr);
245   auto input_data = tensor->MutableData();
246   if (input_data == nullptr) {
247     MS_LOG(ERROR) << "MallocData for tensor failed";
248     return RET_ERROR;
249   }
250 
251   const int num_of_images = kNumOfCalibrationImages;
252   const uint8_t *ims[num_of_images] = {COCO_train2014_0581821, COCO_train2014_0581821, COCO_train2014_0581821};
253   int im_sizes[num_of_images] = {sizeof(COCO_train2014_0581882), sizeof(COCO_train2014_0581909),
254                                  sizeof(COCO_train2014_0581909)};
255   uint8_t *t_data = reinterpret_cast<uint8_t *>(tensor->MutableData());
256   size_t t_size = tensor->DataSize();
257   size_t loc = 0;
258 
259   for (int i = 0; i < num_of_images; i++) {
260     size_t o_size = t_size - loc;
261     auto ret = PrepareSingleImage(ims[i], im_sizes[i], tensor->Shape(), t_data + loc, &o_size);
262     if (ret != RET_OK) {
263       MS_LOG(ERROR) << "Preparing Single image error";
264       return ret;
265     }
266     loc += o_size;
267   }
268   return RET_OK;
269 }
270 #endif
271 
OriginModelInference(const FuncGraphPtr & func_graph,const std::shared_ptr<ConverterPara> & param,const std::shared_ptr<mindspore::Model> & origin_model,size_t * origin_model_size)272 int ParameterOptimizer::OriginModelInference(const FuncGraphPtr &func_graph,
273                                              const std::shared_ptr<ConverterPara> &param,
274                                              const std::shared_ptr<mindspore::Model> &origin_model,
275                                              size_t *origin_model_size) {
276   CHECK_NULL_RETURN(param);
277   CHECK_NULL_RETURN(origin_model);
278   CHECK_NULL_RETURN(origin_model_size);
279   FuncGraphPtr func_graph_bak;
280   auto ret = CloneFuncGraph(func_graph, param, &func_graph_bak);
281   if (ret != RET_OK) {
282     MS_LOG(ERROR) << "Clone FuncGraph failed.";
283     return RET_ERROR;
284   }
285   param->commonQuantParam.quant_type = quant::QUANT_NONE;
286   *origin_model_size = 0;
287   auto status = BuildModelByFuncGraph(origin_model, func_graph_bak, param, origin_model_size);
288   if (status != kSuccess) {
289     MS_LOG(ERROR) << "build model failed!";
290     return RET_ERROR;
291   }
292   auto origin_inputs = origin_model->GetInputs();
293   if ((param->dataPreProcessParam.calibrate_size == 0) && (param->mixedBitWeightQuantParam.use_cv_data)) {
294     if (ExtendBatchSize(origin_model, &origin_inputs, kNumOfCalibrationImages) != kSuccess) {
295       MS_LOG(ERROR) << "Resize session for CV calibration failed!";
296       return RET_ERROR;
297     }
298   }
299 
300   for (auto input : origin_inputs) {
301     if (param->dataPreProcessParam.calibrate_size > 0) {
302       ret = preprocess::PreProcess(param->dataPreProcessParam, input.Name(), 0, &input);
303     } else {
304       if (param->mixedBitWeightQuantParam.use_cv_data && (input.Shape().size() == DIMENSION_4D) &&
305           (input.Shape().at(0) == kNumOfCalibrationImages) &&
306           ((input.Shape().at(kNHWC_C) == kOneChannel) || (input.Shape().at(kNHWC_C) == kThreeChannels) ||
307            (input.Shape().at(kNHWC_C) == kSixChannels)) &&
308           (input.DataType() == DataType::kNumberTypeFloat32)) {
309 #ifdef MSLITE_DEPS_OPENCV
310         ret = GenerateCvData(&input);
311       } else {
312 #endif
313         ret = GenerateRandomData(&input);
314       }
315     }
316     if (ret != RET_OK) {
317       MS_LOG(ERROR) << input.Name() << ":"
318                     << "Generate random data failed.";
319       return ret;
320     }
321   }
322   auto origin_outputs = origin_model->GetOutputs();
323   auto model_status = origin_model->Predict(origin_inputs, &origin_outputs);
324   if (model_status != kSuccess) {
325     MS_LOG(ERROR) << "Run origin predict failed.";
326     return RET_ERROR;
327   }
328   return RET_OK;
329 }
330 
GridSearchForScale(const FuncGraphPtr & func_graph,const std::shared_ptr<ConverterPara> & param,double * init_scale)331 int ParameterOptimizer::GridSearchForScale(const FuncGraphPtr &func_graph, const std::shared_ptr<ConverterPara> &param,
332                                            double *init_scale) {
333   CHECK_NULL_RETURN(param);
334   CHECK_NULL_RETURN(init_scale);
335 
336   auto origin_model = std::make_shared<mindspore::Model>();
337   size_t origin_model_size;
338   int ord_param = kDefaultRangeStart;
339   auto ret = OriginModelInference(func_graph, param, origin_model, &origin_model_size);
340   if (ret != RET_OK) {
341     MS_LOG(ERROR) << "Origin Model Inference failed.";
342     return ret;
343   }
344 
345   float best_compress_ratio = 0;
346   bool found_valid_scale = false;
347   int steps_per_stage = param->mixedBitWeightQuantParam.max_iterations / (kMaxFineSearchIterations + 1);
348   if (steps_per_stage > kMaxStepCoarseSearch) {
349     steps_per_stage = kMaxStepCoarseSearch;
350   }
351 
352   int range_start = kDefaultRangeStart;
353   int range_end = kDefaultRangeEnd;
354   int step = (range_end - range_start) / steps_per_stage;
355   SearchParams search_param = {range_start, range_end, step};
356 
357   std::cout << "====== Search for the best scale =======\n";
358   ret = WeightQuantModelInference(func_graph, param, origin_model, origin_model_size, &search_param, &ord_param,
359                                   &best_compress_ratio, &found_valid_scale);
360   if (ret != RET_OK) {
361     MS_LOG(ERROR) << "Weight quant graph inference failed.";
362     return ret;
363   }
364   step = search_param.step;
365 
366   if (found_valid_scale == false || (ord_param == kDefaultRangeStart && (best_compress_ratio < kCompressToInt8Ratio))) {
367     range_start = search_param.range_end;
368     range_end = kDefaultExtendFactor * search_param.range_end;
369     SearchParams wider_range_param = {range_start, range_end, step};
370     std::cout << "=== Couldn't find proper compression, extending the search range ===\n";
371     ret = WeightQuantModelInference(func_graph, param, origin_model, origin_model_size, &wider_range_param, &ord_param,
372                                     &best_compress_ratio, &found_valid_scale);
373     if (ret != RET_OK) {
374       MS_LOG(ERROR) << "Weight quant graph inference failed.";
375       return ret;
376     }
377     if (found_valid_scale == false) {
378       std::cout << "=== Couldn't find compression that will match similarity constraints. Aborting! ===\n";
379       std::cout << "======================= You may try fixed 8bit quantization =======================\n";
380       return RET_ERROR;
381     }
382   }
383 
384   if (steps_per_stage > kMaxStepFineSearch) {
385     steps_per_stage = kMaxStepFineSearch;
386   }
387   for (int search_cnt = 0; search_cnt < kMaxFineSearchIterations; search_cnt++) {
388     int prev_prev_val = ord_param - kTwo * step;
389     range_end = ord_param;
390     range_start = std::max(1, prev_prev_val);
391     step = (range_end - range_start) / steps_per_stage;
392     if (step < static_cast<int>(sqrt(static_cast<float>(range_end - range_start))) / kTwo) {
393       step = static_cast<int>(sqrt(static_cast<float>(range_end - range_start))) / kTwo;
394     }
395     range_start = ord_param - ((ord_param - range_start) / step) * step;  // align search to meet prev scale
396     if ((range_start == search_param.range_start) || (range_start == prev_prev_val) || (range_start == 1)) {
397       range_start += step;
398     }
399 
400     search_param.range_start = range_start;
401     search_param.range_end = range_end;
402     search_param.step = step;
403     std::cout << "=== Fine search " << search_cnt << " in range [" << range_start << "," << range_end << "] ===\n";
404     ret = WeightQuantModelInference(func_graph, param, origin_model, origin_model_size, &search_param, &ord_param,
405                                     &best_compress_ratio, &found_valid_scale);
406     if (ret != RET_OK) {
407       MS_LOG(ERROR) << "Weight quant graph inference failed.";
408       return ret;
409     }
410     if (step <= kFinestGranularity) {
411       break;
412     }
413   }
414   std::cout << "best compression is " << best_compress_ratio << " at scale " << ord_param << std::endl;
415   *init_scale = 1.0 / (ord_param);
416   return RET_OK;
417 }
418 }  // namespace mindspore::lite::quant
419