• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_TENSOR_COMPRESSOR_H_
18 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_TENSOR_COMPRESSOR_H_
19 
20 #include <memory>
21 #include <string>
22 #include <vector>
23 #include <set>
24 #include <map>
25 #include <numeric>
26 #include <limits>
27 #include <functional>
28 #include <algorithm>
29 #include "include/errorcode.h"
30 #include "ir/anf.h"
31 #include "ir/tensor.h"
32 #include "src/common/log_adapter.h"
33 #include "src/common/log_util.h"
34 #include "schema/inner/model_generated.h"
35 #include "tools/converter/quantizer/bitpacking.h"
36 #include "tools/converter/quantizer/quant_params.h"
37 #include "tools/converter/quantizer/quantize_util.h"
38 
39 using mindspore::ParameterPtr;
40 namespace mindspore::lite::quant {
41 class TensorCompressor {
42  public:
43   template <typename T>
DoSparseCompress(const ParameterPtr & weight,size_t bit_num,const std::vector<schema::QuantParamT> & quant_params)44   int DoSparseCompress(const ParameterPtr &weight, size_t bit_num,
45                        const std::vector<schema::QuantParamT> &quant_params) {
46     auto tensor_info = weight->default_param()->cast<tensor::TensorPtr>();
47     CHECK_NULL_RETURN(tensor_info);
48     if (tensor_info->compression_type() != mindspore::kNoCompression) {
49       MS_LOG(INFO) << weight->fullname_with_scope() << " is shared weight.";
50       return RET_OK;
51     }
52     auto max_size = tensor_info->Size();
53     auto quant_data_array = static_cast<T *>(tensor_info->data().data());
54 
55     std::vector<T> quant_data(quant_data_array, quant_data_array + max_size / sizeof(T));
56     auto elem_cnt = quant_data.size();
57     auto dims = tensor_info->shape_c();
58     size_t elem_cnt_by_dims = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<>());
59     if (elem_cnt != elem_cnt_by_dims) {
60       MS_LOG(ERROR) << weight->fullname_with_scope() << " elem_cnt: " << elem_cnt
61                     << " not equal elem_cnt_by_dims: " << elem_cnt_by_dims;
62       return RET_ERROR;
63     }
64 
65     std::set<T> quant_data_set;
66     for (auto quant_value : quant_data) {
67       quant_data_set.insert(quant_value);
68     }
69     std::map<T, size_t> unique_value_index_map;
70     auto index = 0;
71     for (auto iter = quant_data_set.cbegin(); iter != quant_data_set.cend(); ++iter) {
72       unique_value_index_map[*iter] = index++;
73     }
74 
75     auto unique_value_cnt = quant_data_set.size();
76     size_t unique_value_bit = ceil(log2(unique_value_cnt));
77     auto pack_repetition_size_in_bit = bit_num + bit_num * unique_value_cnt + unique_value_bit * elem_cnt;
78     size_t pack_repetition_size_in_byte = ceil(1.0 * pack_repetition_size_in_bit / k8Bit);
79     size_t origin_size_in_byte = ceil(1.0 * bit_num * elem_cnt / k8Bit);
80 
81     size_t coor_best_bit = 0;
82     auto nz_cnt = CalCoorBestBit<T>(quant_data, elem_cnt, quant_params, unique_value_bit, &coor_best_bit);
83     // 1. coor_best_bit 2. nz_cnt 3. quant_data_set size 4. unique_values 5. unique_value indexing 6. nz values coord
84     const auto pack_sparsity_size_in_bit =
85       1 * k8Bit + 4 * k8Bit + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit;
86     size_t pack_sparsity_size_in_byte = ceil(1.0 * pack_sparsity_size_in_bit / k8Bit);
87     MS_LOG(DEBUG) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
88                   << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte;
89     auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte});
90     if (min_byte_need == origin_size_in_byte) {
91       return RET_NO_CHANGE;
92     } else if (min_byte_need == pack_repetition_size_in_byte) {
93       MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
94       return IndexingCompress<T>(weight, quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
95                                  pack_repetition_size_in_byte, bit_num);
96     } else if (min_byte_need == pack_sparsity_size_in_byte) {
97       MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
98       return SparsityCompress<T>(weight, quant_params, quant_data_set, unique_value_index_map, unique_value_bit,
99                                  unique_value_cnt, pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num);
100     } else {
101       MS_LOG(DEBUG) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
102                     << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}";
103     }
104     return RET_NO_CHANGE;
105   }
106   int DoBitPack(const size_t &bit_num, schema::TensorT *tensor_input);
107 
108   int DoBitPack(const ParameterPtr &weight, size_t bit_num);
109 
110  private:
111   template <typename T>
IndexingCompress(const ParameterPtr & weight,const std::set<T> & quant_data_set,const std::map<T,size_t> & unique_value_index_map,size_t unique_value_bit,size_t unique_value_cnt,size_t pack_repetition_size_in_byte,size_t bit_num)112   int IndexingCompress(const ParameterPtr &weight, const std::set<T> &quant_data_set,
113                        const std::map<T, size_t> &unique_value_index_map, size_t unique_value_bit,
114                        size_t unique_value_cnt, size_t pack_repetition_size_in_byte, size_t bit_num) {
115     std::vector<bool> bits(pack_repetition_size_in_byte * k8Bit);
116     size_t index = 0;
117     // write unique_value_cnt: bit_num bit for unsigned
118     for (size_t i = 0; i < bit_num; i++) {
119       bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & (0x1);
120     }
121     // write the unique value set: each value has bit_num bit signed
122     for (auto iter = quant_data_set.cbegin(); iter != quant_data_set.cend(); ++iter) {
123       for (size_t i = 0; i < bit_num; i++) {
124         bits[index++] = ((*iter + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1);
125       }
126     }
127 
128     auto tensor_info = weight->default_param()->cast<tensor::TensorPtr>();
129     CHECK_NULL_RETURN(tensor_info);
130     auto max_size = tensor_info->ElementsNum();
131     auto quant_data = static_cast<T *>(tensor_info->data().data());
132     // write the index: each index has unique_value_bit unsigned
133     for (int i = 0; i < max_size; i++) {
134       auto quant_value = quant_data[i];
135       for (size_t j = 0; j < unique_value_bit; j++) {
136         bits[index++] = (unique_value_index_map.at(quant_value) >> (unique_value_bit - j - 1)) & (0x1);
137       }
138     }
139     if (index > pack_repetition_size_in_byte * k8Bit) {
140       MS_LOG(ERROR) << "unexpected index: " << index << " should not be greater than "
141                     << pack_repetition_size_in_byte * k8Bit;
142       return RET_ERROR;
143     }
144 
145     auto ret = SetNewCompressionTensor(weight, bits, bit_num, tensor_info, mindspore::kIndexing);
146     if (ret != RET_OK) {
147       MS_LOG(ERROR) << "Add New tensor failed.";
148       return RET_ERROR;
149     }
150     return RET_OK;
151   }
152 
153   template <typename T>
SparsityCompress(const ParameterPtr & weight,const std::vector<schema::QuantParamT> & quant_params,const std::set<T> & quant_data_set,const std::map<T,size_t> & unique_value_index_map,size_t unique_value_bit,size_t unique_value_cnt,size_t pack_sparsity_size_in_byte,size_t nz_cnt,size_t coor_best_bit,size_t bit_num)154   int SparsityCompress(const ParameterPtr &weight, const std::vector<schema::QuantParamT> &quant_params,
155                        const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map,
156                        size_t unique_value_bit, size_t unique_value_cnt, size_t pack_sparsity_size_in_byte,
157                        size_t nz_cnt, size_t coor_best_bit, size_t bit_num) {
158     auto tensor_info = weight->default_param()->cast<tensor::TensorPtr>();
159     CHECK_NULL_RETURN(tensor_info);
160     auto quant_data = static_cast<T *>(tensor_info->data().data());
161     int elem_cnt = tensor_info->DataSize();
162     auto channel_cnt = quant_params.size();
163     if (channel_cnt == 0) {
164       MS_LOG(ERROR) << "quant_params is empty.";
165       return RET_ERROR;
166     }
167     auto elem_perchannel = elem_cnt / channel_cnt;
168 
169     std::vector<bool> bits(pack_sparsity_size_in_byte * k8Bit);
170     size_t index = 0;
171     // coor_best_bit
172     for (size_t i = 0; i < k8Bit; i++) {
173       bits[index++] = (coor_best_bit >> (k8Bit - i - 1)) & 0x1;
174     }
175     // nz_cnt
176     for (size_t i = 0; i < k32Bit; i++) {
177       bits[index++] = (nz_cnt >> (k32Bit - i - 1)) & 0x1;
178     }
179     // unique_value cnt
180     for (size_t i = 0; i < bit_num; i++) {
181       bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & 0x1;
182     }
183     // unique_values
184     for (auto unique_value : quant_data_set) {
185       for (size_t i = 0; i < bit_num; i++) {
186         bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1);
187       }
188     }
189     // nz values indexing && get coor
190     std::vector<size_t> coors(nz_cnt);
191     int coors_index = 0;
192     int prev_index = -1;
193     for (int di = 0; di < elem_cnt; di++) {
194       auto cur_channel = di / elem_perchannel;
195       auto zp = quant_params[cur_channel].zeroPoint;
196       auto nz_value = quant_data[di];
197       if (nz_value != zp || static_cast<size_t>(di - prev_index) >= (1u << coor_best_bit)) {
198         MS_ASSERT(coors_index < nz_cnt);
199         coors[coors_index++] = di - prev_index - 1;
200         prev_index = di;
201         for (size_t i = 0; i < unique_value_bit; i++) {
202           bits[index++] = (unique_value_index_map.at(nz_value) >> (unique_value_bit - i - 1)) & (0x1);
203         }
204       }
205     }
206     // write coor
207     for (auto coor : coors) {
208       for (size_t i = 0; i < coor_best_bit; i++) {
209         bits[index++] = (coor >> (coor_best_bit - i - 1)) & 0x1;
210       }
211     }
212     if (index > pack_sparsity_size_in_byte * k8Bit) {
213       MS_LOG(ERROR) << "unexpected index: " << index << " should not be greater than "
214                     << pack_sparsity_size_in_byte * k8Bit;
215       return RET_ERROR;
216     }
217 
218     auto ret = SetNewCompressionTensor(weight, bits, bit_num, tensor_info, mindspore::kSparse);
219     if (ret != RET_OK) {
220       MS_LOG(ERROR) << "Add New tensor failed.";
221       return RET_ERROR;
222     }
223     return RET_OK;
224   }
225 
226   template <typename T>
CalCoorBestBit(const std::vector<T> & quant_data,size_t elem_cnt,const std::vector<schema::QuantParamT> & quant_params,int unique_value_bit,size_t * coor_best_bit)227   size_t CalCoorBestBit(const std::vector<T> &quant_data, size_t elem_cnt,
228                         const std::vector<schema::QuantParamT> &quant_params, int unique_value_bit,
229                         size_t *coor_best_bit) {
230     MS_ASSERT(!quant_params.empty());
231     size_t best_nn_cnt = 0;
232     size_t min_len_in_bit = std::numeric_limits<size_t>::max();
233     for (size_t bit = k2Bit; bit <= k10Bit; bit++) {
234       // search
235       int nn_cnt = 0;
236       int prev_index = -1;
237       auto channel_cnt = quant_params.size();
238       MS_ASSERT(channel_cnt > 0);
239       auto elem_perchannel = elem_cnt / channel_cnt;
240       for (size_t i = 0; i < elem_cnt; i++) {
241         auto cur_channel = i / elem_perchannel;
242         auto zp = quant_params[cur_channel].zeroPoint;
243         if (quant_data[i] != zp || (static_cast<int>(i) - prev_index) >= ((1 << bit))) {
244           nn_cnt++;
245           prev_index = i;
246         }
247       }
248 
249       size_t len_in_bit = nn_cnt * bit + nn_cnt * unique_value_bit;
250       if (len_in_bit < min_len_in_bit) {
251         min_len_in_bit = len_in_bit;
252         *coor_best_bit = bit;
253         best_nn_cnt = nn_cnt;
254       }
255     }
256     return best_nn_cnt;
257   }
258 
259   void WriteBufferWithAlignByte(const std::vector<bool> &bool_vec, int8_t *data);
260 
261   int SetNewCompressionTensor(const ParameterPtr &weight, const std::vector<bool> &bits, size_t bit_num,
262                               const tensor::TensorPtr &tensor_info, TensorCompressionType compression_type);
263 };
264 }  // namespace mindspore::lite::quant
265 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_TENSOR_COMPRESSOR_H_
266