• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_TOOLS_COMMON_GRAPH_UTIL_H
18 #define MINDSPORE_LITE_TOOLS_COMMON_GRAPH_UTIL_H
19 
20 #include <cstdlib>
21 #include <unordered_map>
22 #include <unordered_set>
23 #include <string>
24 #include <memory>
25 #include <vector>
26 #include <map>
27 #include <set>
28 #include <algorithm>
29 #include <numeric>
30 #include <limits>
31 #include <functional>
32 #include "include/errorcode.h"
33 #include "schema/inner/model_generated.h"
34 #include "src/common/graph_util.h"
35 #include "ir/anf.h"
36 #include "ir/func_graph.h"
37 #include "nnacl/op_base.h"
38 
39 namespace mindspore {
40 namespace lite {
41 using STATUS = int;
42 enum InsertPlace { kBefore, kAfter };
43 
44 using NodeIter = std::vector<std::unique_ptr<schema::CNodeT>>::iterator;
45 
46 using OpDefCopyer = std::function<std::unique_ptr<schema::CNodeT>(schema::CNodeT *)>;
47 
48 OpDefCopyer GetSimpleOpCopyer();
49 
50 int SetFuncGraphOutput(const FuncGraphPtr &graph, const std::vector<AnfNodePtr> &outputs);
51 
52 std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const size_t &nodeIdx, int inputIndexIdx = -1);
53 
54 std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const schema::CNodeT &node,
55                                     int inputIndexIdx = -1);
56 
57 std::vector<size_t> GetOutputNodeIdx(const schema::MetaGraphT &graphT, const size_t &nodeIdx, int outputIndexIdx = -1);
58 
59 std::vector<size_t> GetOutputNodeIdx(const schema::MetaGraphT &graphT, const schema::CNodeT &node,
60                                      int output_index_idx = -1);
61 
62 std::vector<size_t> GetLinkedPreIdx(const schema::MetaGraphT &graphT, const size_t &tensorIdx);
63 
64 std::vector<size_t> GetLinkedPostIdx(const schema::MetaGraphT &graphT, const size_t &tensor_idx);
65 
66 void ReplaceOutput(const uint32_t &old_index, const uint32_t &new_index, schema::MetaGraphT *graphT);
67 
68 STATUS IsolateNode(schema::MetaGraphT *subGraph, schema::CNodeT *node);
69 
70 STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool removeTensor = true);
71 
72 STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t subGraphIdx, size_t nodeIdx, bool removeTensor = true);
73 
74 STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, schema::CNodeT *node, bool removeTensor = true);
75 
76 STATUS UpdateNodeIndex(schema::CNodeT *node, uint32_t deleteIdx);
77 
78 STATUS RemoveTensor(schema::MetaGraphT *graphT, std::vector<uint32_t> toDeleteTensorIdxes, bool forceDelete = false);
79 
80 STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_ptr<schema::TensorT> tensor,
81                       InsertPlace place = kBefore);
82 
83 STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_t inTensorIdx,
84                            std::unique_ptr<schema::TensorT> tensor);
85 
86 int DoBitPack(const int &bit_num, schema::TensorT *tensor_input);
87 
88 NodeIter InsertNode(schema::MetaGraphT *graphT, uint32_t existNodeIdx, InsertPlace place, size_t inoutIndex,
89                     std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
90                     const OpDefCopyer &opDefCopyer = GetSimpleOpCopyer());
91 
92 NodeIter InsertNode(schema::MetaGraphT *graphT, NodeIter existNodeIter, InsertPlace place, size_t inoutIndexIdx,
93                     std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
94                     const OpDefCopyer &opDefCopyer = GetSimpleOpCopyer());
95 
96 NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, size_t inputIndexIdx,
97                           std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
98                           const OpDefCopyer &opDefCopyer);
99 
100 NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, size_t outputIndexIdx,
101                          std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
102                          const OpDefCopyer &opDefCopyery);
103 
104 STATUS ValidateFileStr(const std::string &modelFile, const std::string &fileType);
105 
106 STATUS SetSubgraphTensorIndices(schema::MetaGraphT *meta_graphT);
107 
108 std::string GetModelName(const std::string &modelFile);
109 
110 std::vector<int> GetTransposePerm(schema::MetaGraphT *graph, const std::unique_ptr<schema::CNodeT> &cnode);
111 
112 std::string BoolVectorToString(const std::vector<bool> &bool_vec);
113 
114 TypeId GetAbstractTensorDtype(const abstract::AbstractTensorPtr &tensor);
115 
116 TypeId GetParameterDtype(const ParameterPtr &param_node);
117 
118 STATUS UpdateFuncGraphInputsAndOutputsDtype(const FuncGraphPtr &func_graph);
119 
120 template <typename T>
IndexingCompress(const std::set<T> & quant_data_set,const std::map<T,size_t> & unique_value_index_map,size_t unique_value_bit,size_t unique_value_cnt,size_t pack_repetition_size_in_byte,size_t bit_num,schema::TensorT * tensor)121 bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map,
122                       size_t unique_value_bit, size_t unique_value_cnt, size_t pack_repetition_size_in_byte,
123                       size_t bit_num, schema::TensorT *tensor) {
124   auto quant_data_array = reinterpret_cast<T *>(tensor->data.data());
125   std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T));
126 
127   std::vector<bool> bits(pack_repetition_size_in_byte * 8);
128   size_t index = 0;
129   // write unique_value_cnt: bit_num bit for unsigned
130   for (size_t i = 0; i < bit_num; i++) {
131     bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & (0x1);
132   }
133   // write the unique value set: each value has bit_num bit signed
134   for (auto unique_value : quant_data_set) {
135     for (size_t i = 0; i < bit_num; i++) {
136       bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1);
137     }
138   }
139   // write the index: each index has unique_value_bit unsigned
140   for (auto quant_value : quant_data) {
141     for (size_t i = 0; i < unique_value_bit; i++) {
142       bits[index++] = (unique_value_index_map.at(quant_value) >> (unique_value_bit - i - 1)) & (0x1);
143     }
144   }
145   if (index > pack_repetition_size_in_byte * 8) {
146     MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_repetition_size_in_byte * 8;
147     return false;
148   }
149   // update tensor data
150   auto new_data_str = BoolVectorToString(bits);
151   auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size());
152   if (ret != EOK) {
153     MS_LOG(ERROR) << "memcpy error";
154     return false;
155   }
156   tensor->data.resize(new_data_str.size());
157 
158   tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING;
159   MS_LOG(DEBUG) << "set WeightQunatCompressType_INDEXING";
160   return true;
161 }
162 
163 template <typename T>
SparsityCompress(const std::set<T> & quant_data_set,const std::map<T,size_t> & unique_value_index_map,size_t unique_value_bit,size_t unique_value_cnt,size_t pack_sparsity_size_in_byte,size_t nz_cnt,size_t coor_best_bit,size_t bit_num,schema::TensorT * tensor)164 bool SparsityCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map,
165                       size_t unique_value_bit, size_t unique_value_cnt, size_t pack_sparsity_size_in_byte,
166                       size_t nz_cnt, size_t coor_best_bit, size_t bit_num, schema::TensorT *tensor) {
167   auto quant_data_array = reinterpret_cast<T *>(tensor->data.data());
168   std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T));
169   auto &quant_params = tensor->quantParams;
170   auto elem_cnt = quant_data.size();
171   auto channel_cnt = quant_params.size();
172   MS_CHECK_TRUE_MSG(channel_cnt != 0, false, "div zero.");
173   auto elem_perchannel = elem_cnt / channel_cnt;
174 
175   std::vector<bool> bits(pack_sparsity_size_in_byte * 8);
176   int index = 0;
177   // coor_best_bit
178   for (size_t i = 0; i < 8; i++) {
179     bits[index++] = (coor_best_bit >> (8 - i - 1)) & 0x1;
180   }
181   // nz_cnt
182   for (size_t i = 0; i < 32; i++) {
183     bits[index++] = (nz_cnt >> (32 - i - 1)) & 0x1;
184   }
185   // unique_value cnt
186   for (size_t i = 0; i < bit_num; i++) {
187     bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & 0x1;
188   }
189   // unique_values
190   for (auto unique_value : quant_data_set) {
191     for (size_t i = 0; i < bit_num; i++) {
192       bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1);
193     }
194   }
195   // nz values indexing && get coor
196   std::vector<size_t> coors(nz_cnt);
197   size_t coors_index = 0;
198   size_t prev_index = -1;
199   for (size_t di = 0; di < elem_cnt; di++) {
200     auto cur_channel = di / elem_perchannel;
201     auto zp = quant_params[cur_channel]->zeroPoint;
202     auto nz_value = quant_data[di];
203     if (nz_value != zp || (di - prev_index) >= (size_t)(1 << coor_best_bit)) {
204       MS_ASSERT(coors_index < nz_cnt);
205       coors[coors_index++] = di - prev_index - 1;
206       prev_index = di;
207       for (size_t i = 0; i < unique_value_bit; i++) {
208         bits[index++] = (unique_value_index_map.at(nz_value) >> (unique_value_bit - i - 1)) & (0x1);
209       }
210     }
211   }
212   // write coor
213   for (auto coor : coors) {
214     for (size_t i = 0; i < coor_best_bit; i++) {
215       bits[index++] = (coor >> (coor_best_bit - i - 1)) & 0x1;
216     }
217   }
218   if ((unsigned int)index > pack_sparsity_size_in_byte * 8) {
219     MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_sparsity_size_in_byte * 8;
220     return false;
221   }
222   auto new_data_str = BoolVectorToString(bits);
223   auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size());
224   if (ret != EOK) {
225     MS_LOG(ERROR) << "memcpy error";
226     return false;
227   }
228   tensor->data.resize(new_data_str.size());
229 
230   tensor->weightQunatCompressType = schema::WeightQunatCompressType_SPARSE;
231   MS_LOG(INFO) << "set WeightQunatCompressType_SPARSITY";
232   return true;
233 }
234 
235 template <typename T>
CalCoorBestBit(const std::vector<T> & quant_data,size_t elem_cnt,const std::vector<std::unique_ptr<schema::QuantParamT>> & quant_params,int unique_value_bit,size_t * coor_best_bit)236 size_t CalCoorBestBit(const std::vector<T> &quant_data, size_t elem_cnt,
237                       const std::vector<std::unique_ptr<schema::QuantParamT>> &quant_params, int unique_value_bit,
238                       size_t *coor_best_bit) {
239   size_t best_nn_cnt = 0;
240   size_t min_len_in_bit = std::numeric_limits<size_t>::max();
241   for (int bit = 2; bit <= 10; bit++) {
242     // search
243     size_t nn_cnt = 0;
244     size_t prev_index = -1;
245     auto channel_cnt = quant_params.size();
246     auto elem_perchannel = elem_cnt / channel_cnt;
247     for (size_t i = 0; i < elem_cnt; i++) {
248       auto cur_channel = i / elem_perchannel;
249       auto zp = quant_params[cur_channel]->zeroPoint;
250       if (quant_data[i] != zp || (i - prev_index) >= (size_t)(1 << bit)) {
251         nn_cnt++;
252         prev_index = i;
253       }
254     }
255 
256     size_t len_in_bit = nn_cnt * bit + nn_cnt * unique_value_bit;
257     if (len_in_bit < min_len_in_bit) {
258       min_len_in_bit = len_in_bit;
259       *coor_best_bit = bit;
260       best_nn_cnt = nn_cnt;
261     }
262   }
263   return best_nn_cnt;
264 }
265 
266 template <typename T>
PackRepetition(size_t bit_num,schema::TensorT * tensor)267 bool PackRepetition(size_t bit_num, schema::TensorT *tensor) {
268   auto quant_data_array = reinterpret_cast<T *>(tensor->data.data());
269   std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T));
270   auto elem_cnt = quant_data.size();
271   auto dims = tensor->dims;
272   size_t elem_cnt_by_dims = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<>());
273   if (elem_cnt != elem_cnt_by_dims) {
274     MS_LOG(ERROR) << "elem_cnt: " << elem_cnt << " not equal elem_cnt_by_dims: " << elem_cnt_by_dims;
275     return false;
276   }
277 
278   auto &quant_params = tensor->quantParams;
279 
280   std::set<T> quant_data_set;
281   for (auto quant_value : quant_data) {
282     quant_data_set.insert(quant_value);
283   }
284   std::map<T, size_t> unique_value_index_map;
285   auto index = 0;
286   for (auto value : quant_data_set) {
287     unique_value_index_map[value] = index++;
288   }
289 
290   auto unique_value_cnt = quant_data_set.size();
291   size_t unique_value_bit = ceil(log2(unique_value_cnt));
292   auto pack_repetition_size_in_bit = bit_num + bit_num * unique_value_cnt + unique_value_bit * elem_cnt;
293   size_t pack_repetition_size_in_byte = ceil(pack_repetition_size_in_bit / 8.0);
294   size_t origin_size_in_byte = ceil(bit_num * elem_cnt / 8.0);
295 
296   size_t coor_best_bit = 0;
297   auto nz_cnt = CalCoorBestBit<T>(quant_data, elem_cnt, quant_params, unique_value_bit, &coor_best_bit);
298   // 1. coor_best_bit 2. nz_cnt 3. quant_data_set size 4. unique_values 5. unique_value indexing 6. nz values coord
299   auto pack_sparsity_size_in_bit =
300     1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit;
301   size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0);
302   MS_LOG(DEBUG) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
303                 << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte;
304   auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte});
305   if (min_byte_need == origin_size_in_byte) {
306     return false;
307   } else if (min_byte_need == pack_repetition_size_in_byte) {
308     MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
309     return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
310                                pack_repetition_size_in_byte, bit_num, tensor);
311   } else if (min_byte_need == pack_sparsity_size_in_byte) {
312     MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
313     return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
314                                pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor);
315   } else {
316     MS_LOG(DEBUG) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
317                   << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}";
318   }
319   return false;
320 }
321 
322 }  // namespace lite
323 }  // namespace mindspore
324 
325 #endif  // MINDSPORE_LITE_TOOLS_COMMON_GRAPH_UTIL_H
326