1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
18 #define MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
19
20 #include <map>
21 #include <utility>
22 #include <vector>
23 #include <queue>
24 #include <limits>
25 #include <string>
26 #include <cmath>
27 #include "nnacl/matmul_parameter.h"
28 #include "src/lite_kernel.h"
29 #include "src/common/utils.h"
30 #include "src/tensor.h"
31
32 static constexpr int kPerTensor = 1;
33 static constexpr int kBitNum1 = 1;
34 static constexpr int kBitNum8 = 8;
35 static constexpr int kBitNum16 = 16;
36
37 #ifndef WEIGHT_DECODE_CLIP
38 namespace mindspore::lite {
39
40 template <typename T>
UnIndexTensorData(const std::vector<int> & unique_values,const std::vector<size_t> & indices,void * dst_data,size_t dst_data_size)41 STATUS UnIndexTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices, void *dst_data,
42 size_t dst_data_size) {
43 std::vector<T> un_indexed_data;
44 for (auto index : indices) {
45 if (index >= unique_values.size()) {
46 MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size();
47 return RET_ERROR;
48 }
49 if (unique_values[index] > std::numeric_limits<T>::max() || unique_values[index] < std::numeric_limits<T>::min()) {
50 MS_LOG(ERROR) << "data: " << unique_values[index] << " max: " << std::numeric_limits<T>::max()
51 << " min: " << std::numeric_limits<T>::min();
52 return RET_ERROR;
53 }
54 un_indexed_data.push_back(static_cast<T>(unique_values[index]));
55 }
56 if (un_indexed_data.size() * sizeof(T) != dst_data_size) {
57 MS_LOG(ERROR) << "un idnexed data size: " << un_indexed_data.size() * sizeof(T)
58 << " expected by tensor: " << dst_data_size;
59 return false;
60 }
61 memcpy(dst_data, un_indexed_data.data(), un_indexed_data.size() * sizeof(T));
62
63 return RET_OK;
64 }
65
66 template <typename T>
UnSparseTensorData(const std::vector<int> & unique_values,const std::vector<size_t> & indices,const std::vector<size_t> & coors,const flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>> * quant_params,size_t elem_cnt,size_t coor_best_bit,void * dst_data,size_t dst_data_size)67 STATUS UnSparseTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices,
68 const std::vector<size_t> &coors,
69 const flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>> *quant_params,
70 size_t elem_cnt, size_t coor_best_bit, void *dst_data, size_t dst_data_size) {
71 std::vector<T> un_sparsed_data;
72 size_t data_index = 0;
73 auto nz_cnt = indices.size();
74 MS_ASSERT(nz_cnt == coors.size());
75 auto channel_cnt = quant_params->size();
76 MS_CHECK_GT(channel_cnt, 0, RET_ERROR);
77 auto elem_perchannel = elem_cnt / channel_cnt;
78 MS_CHECK_GT(elem_perchannel, 0, RET_ERROR);
79 for (size_t i = 0; i < nz_cnt; i++) {
80 auto index = indices[i];
81 if (index >= unique_values.size()) {
82 MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size();
83 return RET_ERROR;
84 }
85 auto nz = unique_values[index];
86 if (nz > std::numeric_limits<T>::max() || nz < std::numeric_limits<T>::min()) {
87 MS_LOG(ERROR) << "data: " << nz << " max: " << std::numeric_limits<T>::max()
88 << " min: " << std::numeric_limits<T>::min();
89 return RET_ERROR;
90 }
91 auto coor = coors[i];
92 for (size_t j = 0; j < coor; j++) {
93 auto cur_channel = data_index / elem_perchannel;
94 auto zp = quant_params->Get(cur_channel)->zeroPoint();
95 un_sparsed_data.push_back(zp);
96 data_index++;
97 }
98 un_sparsed_data.push_back(static_cast<T>(unique_values[index]));
99 data_index++;
100 }
101 if (un_sparsed_data.size() * sizeof(T) > dst_data_size) {
102 MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T)
103 << " tensor size: " << dst_data_size;
104 return false;
105 } else if (un_sparsed_data.size() * sizeof(T) < dst_data_size &&
106 (un_sparsed_data.size() + (1 << coor_best_bit) - 1) * sizeof(T) < dst_data_size) {
107 MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) << " tensor size: " << dst_data_size
108 << " coor_best_bit: " << coor_best_bit;
109 return false;
110 }
111
112 for (; data_index < dst_data_size / sizeof(T); data_index++) {
113 auto cur_channel = data_index / elem_perchannel;
114 auto zp = quant_params->Get(cur_channel)->zeroPoint();
115 un_sparsed_data.push_back(static_cast<T>(zp));
116 }
117
118 memcpy(dst_data, un_sparsed_data.data(), un_sparsed_data.size() * sizeof(T));
119
120 return RET_OK;
121 }
122
123 std::vector<bool> StringToBitVector(const std::string &str);
124
125 STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor);
126
127 STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor);
128
129 class WeightDecoder {
130 public:
131 static int DequantNode(OpParameter *op_parameter, const std::vector<Tensor *> &in_tensors, TypeId dst_data_type);
132
133 static int UnPack(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
134
135 private:
136 static int DequantTensor(Tensor *tensor, bool channel_first = true, TypeId dst_data_type = kNumberTypeFloat32);
137
138 static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
139
140 static int DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
141
142 template <typename ST, typename DT = float>
143 static DT *DequantData(lite::Tensor *input_tensor, bool channel_first = true) {
144 const auto *quant_datas = static_cast<const ST *>(input_tensor->data());
145 if (quant_datas == nullptr) {
146 MS_LOG(ERROR) << "Get quant tensor failed.";
147 return nullptr;
148 }
149 DT *dequant_datas = static_cast<DT *>(malloc(input_tensor->ElementsNum() * sizeof(DT)));
150 if (dequant_datas == nullptr) {
151 MS_LOG(ERROR) << "Malloc failed.";
152 return nullptr;
153 }
154 auto quant_param = input_tensor->quant_params();
155 if (quant_param.size() != kPerTensor) {
156 auto shapes = input_tensor->shape();
157 auto channels = quant_param.size();
158 if (!channel_first) {
159 if (static_cast<int>(shapes.size()) != 2 || shapes[1] != static_cast<int>(channels)) {
160 MS_LOG(ERROR) << "shape size: " << shapes.size() << " quant params size: " << channels;
161 free(dequant_datas);
162 return nullptr;
163 }
164 }
165 MS_CHECK_GT(channels, 0, nullptr);
166 size_t per_channel_size = input_tensor->ElementsNum() / channels;
167 for (size_t i = 0; i < channels; i++) {
168 auto param = quant_param.at(i);
169 auto scale = param.scale;
170 auto zero_point = param.zeroPoint;
171 auto var_corr = param.var_corr;
172 auto mean_corr = param.mean_corr;
173 if (var_corr < 0 || var_corr > 10) {
174 MS_LOG(WARNING) << "unexpected var_corr: " << var_corr;
175 var_corr = 1;
176 }
177 for (size_t j = 0; j < per_channel_size; j++) {
178 auto index = per_channel_size * i + j;
179 if (!channel_first) {
180 index = channels * j + i;
181 }
182 #ifdef ENABLE_ARM32
183 volatile float dequant_data = (quant_datas[index] - zero_point) * scale * var_corr + mean_corr;
184 dequant_datas[index] = static_cast<DT>(dequant_data);
185 #else
186 dequant_datas[index] = static_cast<DT>((quant_datas[index] - zero_point) * scale * var_corr + mean_corr);
187 #endif
188 }
189 }
190 } else {
191 auto quant_clusters = input_tensor->quant_clusters();
192 auto param = quant_param.front();
193 auto scale = param.scale;
194 auto zero_point = param.zeroPoint;
195 for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
196 if (!quant_clusters.empty()) {
197 int8_t index = quant_datas[j];
198 if (index > INT8_MAX || index < INT8_MIN) {
199 MS_LOG(ERROR) << "KMeans param quant is error.";
200 free(dequant_datas);
201 return nullptr;
202 }
203 dequant_datas[j] = static_cast<DT>(param.clusters[index - INT8_MIN]);
204 } else {
205 #ifdef ENABLE_ARM32
206 volatile float dequant_data = (quant_datas[j] - zero_point) * scale;
207 dequant_datas[j] = static_cast<DT>(dequant_data);
208 #else
209 dequant_datas[j] = static_cast<DT>((quant_datas[j] - zero_point) * scale);
210 #endif
211 }
212 }
213 }
214 return dequant_datas;
215 }
216
IsChannelFirst(int index,const OpParameter * op_parameter)217 inline static bool IsChannelFirst(int index, const OpParameter *op_parameter) {
218 MS_ASSERT(op_parameter != nullptr);
219 if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
220 const auto *param = reinterpret_cast<const MatMulParameter *>(op_parameter);
221 if (index == 0) {
222 return !(param->a_transpose_);
223 } else if (index == 1) {
224 return param->b_transpose_;
225 }
226 }
227 return true;
228 }
229
230 static int DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type = kNumberTypeFloat32);
231
232 template <typename T1, typename T2>
UnPackData(int origin_bit,const T2 & packed_data,std::queue<bool> * unpack_bit_data,void * unpack_int,size_t * count,bool is_last)233 static void UnPackData(int origin_bit, const T2 &packed_data, std::queue<bool> *unpack_bit_data, void *unpack_int,
234 size_t *count, bool is_last) {
235 T2 uint_result = 0;
236 T1 result;
237 UnPackFromUintToOrigin<T2>(packed_data, unpack_bit_data);
238 while (static_cast<int>(unpack_bit_data->size()) >= origin_bit) {
239 for (int k = 0; k < origin_bit; k++) {
240 bool bit_tmp = unpack_bit_data->front();
241 uint_result = (static_cast<int>(bit_tmp) << static_cast<unsigned int>(k)) + uint_result;
242 unpack_bit_data->pop();
243 }
244 result = uint_result - static_cast<T2>(pow(2, origin_bit - 1));
245 (static_cast<T1 *>(unpack_int))[*count] = result;
246 uint_result = 0;
247 (*count)++;
248 }
249 size_t remainder = unpack_bit_data->size();
250 if (is_last && remainder > 0) {
251 for (size_t i = 0; i < remainder; i++) {
252 bool bit = unpack_bit_data->front();
253 uint_result = (static_cast<unsigned int>(bit) << i) + uint_result;
254 unpack_bit_data->pop();
255 }
256 result = static_cast<T1>(uint_result - static_cast<T2>(pow(2, origin_bit - 1)));
257 (static_cast<T1 *>(unpack_int))[*count] = result;
258 }
259 }
260
261 template <typename T1, typename T2>
UnPackUtil(const schema::Tensor * input_tensor,int origin_bit,void * unpack_int_data)262 static void UnPackUtil(const schema::Tensor *input_tensor, int origin_bit, void *unpack_int_data) {
263 if (input_tensor == nullptr || input_tensor->data() == nullptr) {
264 MS_LOG(ERROR) << "tensor data is null";
265 return;
266 }
267 auto weight_data = input_tensor->data()->data();
268 int pack_size =
269 input_tensor->dataType() == kNumberTypeInt8 ? input_tensor->data()->size() : input_tensor->data()->size() / 2;
270 std::queue<bool> unpack_bit_data;
271 size_t count = 0;
272 for (int i = 0; i < pack_size; ++i) {
273 T2 pack_data = (static_cast<const T2 *>(static_cast<const void *>(weight_data)))[i];
274 bool is_last = i == pack_size - 1;
275 UnPackData<T1, T2>(origin_bit, pack_data, &unpack_bit_data, unpack_int_data, &count, is_last);
276 }
277 }
278
279 template <typename T2>
UnPackFromUintToOrigin(const T2 & packed_data,std::queue<bool> * unpack_bit_data)280 static void UnPackFromUintToOrigin(const T2 &packed_data, std::queue<bool> *unpack_bit_data) {
281 auto n = packed_data;
282 size_t bit_count = 0;
283 while (bit_count < sizeof(T2) * 8) {
284 bool a = n % 2;
285 n = n >> 1;
286 bit_count++;
287 unpack_bit_data->push(a);
288 }
289 }
290 };
291 } // namespace mindspore::lite
292 #endif
293 #endif // MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
294