• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
18 #define MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
19 
20 #include <map>
21 #include <utility>
22 #include <vector>
23 #include <queue>
24 #include <limits>
25 #include <string>
26 #include <cmath>
27 #include "nnacl/matmul_parameter.h"
28 #include "src/lite_kernel.h"
29 #include "src/common/utils.h"
30 #include "src/tensor.h"
31 
32 static constexpr int kPerTensor = 1;
33 static constexpr int kBitNum1 = 1;
34 static constexpr int kBitNum8 = 8;
35 static constexpr int kBitNum16 = 16;
36 
37 #ifndef WEIGHT_DECODE_CLIP
38 namespace mindspore::lite {
39 
40 template <typename T>
UnIndexTensorData(const std::vector<int> & unique_values,const std::vector<size_t> & indices,void * dst_data,size_t dst_data_size)41 STATUS UnIndexTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices, void *dst_data,
42                          size_t dst_data_size) {
43   std::vector<T> un_indexed_data;
44   for (auto index : indices) {
45     if (index >= unique_values.size()) {
46       MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size();
47       return RET_ERROR;
48     }
49     if (unique_values[index] > std::numeric_limits<T>::max() || unique_values[index] < std::numeric_limits<T>::min()) {
50       MS_LOG(ERROR) << "data: " << unique_values[index] << " max: " << std::numeric_limits<T>::max()
51                     << " min: " << std::numeric_limits<T>::min();
52       return RET_ERROR;
53     }
54     un_indexed_data.push_back(static_cast<T>(unique_values[index]));
55   }
56   if (un_indexed_data.size() * sizeof(T) != dst_data_size) {
57     MS_LOG(ERROR) << "un idnexed data size: " << un_indexed_data.size() * sizeof(T)
58                   << " expected by tensor: " << dst_data_size;
59     return false;
60   }
61   memcpy(dst_data, un_indexed_data.data(), un_indexed_data.size() * sizeof(T));
62 
63   return RET_OK;
64 }
65 
66 template <typename T>
UnSparseTensorData(const std::vector<int> & unique_values,const std::vector<size_t> & indices,const std::vector<size_t> & coors,const flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>> * quant_params,size_t elem_cnt,size_t coor_best_bit,void * dst_data,size_t dst_data_size)67 STATUS UnSparseTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices,
68                           const std::vector<size_t> &coors,
69                           const flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>> *quant_params,
70                           size_t elem_cnt, size_t coor_best_bit, void *dst_data, size_t dst_data_size) {
71   std::vector<T> un_sparsed_data;
72   size_t data_index = 0;
73   auto nz_cnt = indices.size();
74   MS_ASSERT(nz_cnt == coors.size());
75   auto channel_cnt = quant_params->size();
76   MS_CHECK_GT(channel_cnt, 0, RET_ERROR);
77   auto elem_perchannel = elem_cnt / channel_cnt;
78   MS_CHECK_GT(elem_perchannel, 0, RET_ERROR);
79   for (size_t i = 0; i < nz_cnt; i++) {
80     auto index = indices[i];
81     if (index >= unique_values.size()) {
82       MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size();
83       return RET_ERROR;
84     }
85     auto nz = unique_values[index];
86     if (nz > std::numeric_limits<T>::max() || nz < std::numeric_limits<T>::min()) {
87       MS_LOG(ERROR) << "data: " << nz << " max: " << std::numeric_limits<T>::max()
88                     << " min: " << std::numeric_limits<T>::min();
89       return RET_ERROR;
90     }
91     auto coor = coors[i];
92     for (size_t j = 0; j < coor; j++) {
93       auto cur_channel = data_index / elem_perchannel;
94       auto zp = quant_params->Get(cur_channel)->zeroPoint();
95       un_sparsed_data.push_back(zp);
96       data_index++;
97     }
98     un_sparsed_data.push_back(static_cast<T>(unique_values[index]));
99     data_index++;
100   }
101   if (un_sparsed_data.size() * sizeof(T) > dst_data_size) {
102     MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T)
103                   << " tensor size: " << dst_data_size;
104     return false;
105   } else if (un_sparsed_data.size() * sizeof(T) < dst_data_size &&
106              (un_sparsed_data.size() + (1 << coor_best_bit) - 1) * sizeof(T) < dst_data_size) {
107     MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) << " tensor size: " << dst_data_size
108                   << " coor_best_bit: " << coor_best_bit;
109     return false;
110   }
111 
112   for (; data_index < dst_data_size / sizeof(T); data_index++) {
113     auto cur_channel = data_index / elem_perchannel;
114     auto zp = quant_params->Get(cur_channel)->zeroPoint();
115     un_sparsed_data.push_back(static_cast<T>(zp));
116   }
117 
118   memcpy(dst_data, un_sparsed_data.data(), un_sparsed_data.size() * sizeof(T));
119 
120   return RET_OK;
121 }
122 
123 std::vector<bool> StringToBitVector(const std::string &str);
124 
125 STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor);
126 
127 STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor);
128 
129 class WeightDecoder {
130  public:
131   static int DequantNode(OpParameter *op_parameter, const std::vector<Tensor *> &in_tensors, TypeId dst_data_type);
132 
133   static int UnPack(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
134 
135  private:
136   static int DequantTensor(Tensor *tensor, bool channel_first = true, TypeId dst_data_type = kNumberTypeFloat32);
137 
138   static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
139 
140   static int DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
141 
142   template <typename ST, typename DT = float>
143   static DT *DequantData(lite::Tensor *input_tensor, bool channel_first = true) {
144     const auto *quant_datas = static_cast<const ST *>(input_tensor->data());
145     if (quant_datas == nullptr) {
146       MS_LOG(ERROR) << "Get quant tensor failed.";
147       return nullptr;
148     }
149     DT *dequant_datas = static_cast<DT *>(malloc(input_tensor->ElementsNum() * sizeof(DT)));
150     if (dequant_datas == nullptr) {
151       MS_LOG(ERROR) << "Malloc failed.";
152       return nullptr;
153     }
154     auto quant_param = input_tensor->quant_params();
155     if (quant_param.size() != kPerTensor) {
156       auto shapes = input_tensor->shape();
157       auto channels = quant_param.size();
158       if (!channel_first) {
159         if (static_cast<int>(shapes.size()) != 2 || shapes[1] != static_cast<int>(channels)) {
160           MS_LOG(ERROR) << "shape size: " << shapes.size() << " quant params size: " << channels;
161           free(dequant_datas);
162           return nullptr;
163         }
164       }
165       MS_CHECK_GT(channels, 0, nullptr);
166       size_t per_channel_size = input_tensor->ElementsNum() / channels;
167       for (size_t i = 0; i < channels; i++) {
168         auto param = quant_param.at(i);
169         auto scale = param.scale;
170         auto zero_point = param.zeroPoint;
171         auto var_corr = param.var_corr;
172         auto mean_corr = param.mean_corr;
173         if (var_corr < 0 || var_corr > 10) {
174           MS_LOG(WARNING) << "unexpected var_corr: " << var_corr;
175           var_corr = 1;
176         }
177         for (size_t j = 0; j < per_channel_size; j++) {
178           auto index = per_channel_size * i + j;
179           if (!channel_first) {
180             index = channels * j + i;
181           }
182 #ifdef ENABLE_ARM32
183           volatile float dequant_data = (quant_datas[index] - zero_point) * scale * var_corr + mean_corr;
184           dequant_datas[index] = static_cast<DT>(dequant_data);
185 #else
186           dequant_datas[index] = static_cast<DT>((quant_datas[index] - zero_point) * scale * var_corr + mean_corr);
187 #endif
188         }
189       }
190     } else {
191       auto quant_clusters = input_tensor->quant_clusters();
192       auto param = quant_param.front();
193       auto scale = param.scale;
194       auto zero_point = param.zeroPoint;
195       for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
196         if (!quant_clusters.empty()) {
197           int8_t index = quant_datas[j];
198           if (index > INT8_MAX || index < INT8_MIN) {
199             MS_LOG(ERROR) << "KMeans param quant is error.";
200             free(dequant_datas);
201             return nullptr;
202           }
203           dequant_datas[j] = static_cast<DT>(param.clusters[index - INT8_MIN]);
204         } else {
205 #ifdef ENABLE_ARM32
206           volatile float dequant_data = (quant_datas[j] - zero_point) * scale;
207           dequant_datas[j] = static_cast<DT>(dequant_data);
208 #else
209           dequant_datas[j] = static_cast<DT>((quant_datas[j] - zero_point) * scale);
210 #endif
211         }
212       }
213     }
214     return dequant_datas;
215   }
216 
IsChannelFirst(int index,const OpParameter * op_parameter)217   inline static bool IsChannelFirst(int index, const OpParameter *op_parameter) {
218     MS_ASSERT(op_parameter != nullptr);
219     if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
220       const auto *param = reinterpret_cast<const MatMulParameter *>(op_parameter);
221       if (index == 0) {
222         return !(param->a_transpose_);
223       } else if (index == 1) {
224         return param->b_transpose_;
225       }
226     }
227     return true;
228   }
229 
230   static int DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type = kNumberTypeFloat32);
231 
232   template <typename T1, typename T2>
UnPackData(int origin_bit,const T2 & packed_data,std::queue<bool> * unpack_bit_data,void * unpack_int,size_t * count,bool is_last)233   static void UnPackData(int origin_bit, const T2 &packed_data, std::queue<bool> *unpack_bit_data, void *unpack_int,
234                          size_t *count, bool is_last) {
235     T2 uint_result = 0;
236     T1 result;
237     UnPackFromUintToOrigin<T2>(packed_data, unpack_bit_data);
238     while (static_cast<int>(unpack_bit_data->size()) >= origin_bit) {
239       for (int k = 0; k < origin_bit; k++) {
240         bool bit_tmp = unpack_bit_data->front();
241         uint_result = (static_cast<int>(bit_tmp) << static_cast<unsigned int>(k)) + uint_result;
242         unpack_bit_data->pop();
243       }
244       result = uint_result - static_cast<T2>(pow(2, origin_bit - 1));
245       (static_cast<T1 *>(unpack_int))[*count] = result;
246       uint_result = 0;
247       (*count)++;
248     }
249     size_t remainder = unpack_bit_data->size();
250     if (is_last && remainder > 0) {
251       for (size_t i = 0; i < remainder; i++) {
252         bool bit = unpack_bit_data->front();
253         uint_result = (static_cast<unsigned int>(bit) << i) + uint_result;
254         unpack_bit_data->pop();
255       }
256       result = static_cast<T1>(uint_result - static_cast<T2>(pow(2, origin_bit - 1)));
257       (static_cast<T1 *>(unpack_int))[*count] = result;
258     }
259   }
260 
261   template <typename T1, typename T2>
UnPackUtil(const schema::Tensor * input_tensor,int origin_bit,void * unpack_int_data)262   static void UnPackUtil(const schema::Tensor *input_tensor, int origin_bit, void *unpack_int_data) {
263     if (input_tensor == nullptr || input_tensor->data() == nullptr) {
264       MS_LOG(ERROR) << "tensor data is null";
265       return;
266     }
267     auto weight_data = input_tensor->data()->data();
268     int pack_size =
269       input_tensor->dataType() == kNumberTypeInt8 ? input_tensor->data()->size() : input_tensor->data()->size() / 2;
270     std::queue<bool> unpack_bit_data;
271     size_t count = 0;
272     for (int i = 0; i < pack_size; ++i) {
273       T2 pack_data = (static_cast<const T2 *>(static_cast<const void *>(weight_data)))[i];
274       bool is_last = i == pack_size - 1;
275       UnPackData<T1, T2>(origin_bit, pack_data, &unpack_bit_data, unpack_int_data, &count, is_last);
276     }
277   }
278 
279   template <typename T2>
UnPackFromUintToOrigin(const T2 & packed_data,std::queue<bool> * unpack_bit_data)280   static void UnPackFromUintToOrigin(const T2 &packed_data, std::queue<bool> *unpack_bit_data) {
281     auto n = packed_data;
282     size_t bit_count = 0;
283     while (bit_count < sizeof(T2) * 8) {
284       bool a = n % 2;
285       n = n >> 1;
286       bit_count++;
287       unpack_bit_data->push(a);
288     }
289   }
290 };
291 }  // namespace mindspore::lite
292 #endif
293 #endif  // MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
294