• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "tools/converter/quantizer/tensor_compressor.h"
18 #include <memory>
19 #include <numeric>
20 #include <limits>
21 #include <string>
22 #include <vector>
23 #include <functional>
24 #include <set>
25 #include <map>
26 #include <algorithm>
27 
28 namespace mindspore::lite::quant {
WriteBufferWithAlignByte(const std::vector<bool> & bool_vec,int8_t * data)29 void TensorCompressor::WriteBufferWithAlignByte(const std::vector<bool> &bool_vec, int8_t *data) {
30   CHECK_NULL_RETURN_VOID(data);
31   size_t shift = kBitNumPerByte;
32   for (bool bit : bool_vec) {
33     *data |= bit << (shift - 1);
34     if (--shift == 0) {
35       data++;
36       shift = kBitNumPerByte;
37     }
38   }
39 }
40 
DoBitPack(const size_t & bit_num,schema::TensorT * tensor_input)41 int TensorCompressor::DoBitPack(const size_t &bit_num, schema::TensorT *tensor_input) {
42   CHECK_NULL_RETURN(tensor_input);
43   if (bit_num > 0 && bit_num < k8Bit) {
44     std::vector<int8_t> origin_data(tensor_input->data.size());
45     auto status = memcpy_s(origin_data.data(), origin_data.size() * sizeof(int8_t), tensor_input->data.data(),
46                            tensor_input->data.size() * sizeof(uint8_t));
47     if (status != EOK) {
48       MS_LOG(ERROR) << tensor_input->name << " memcpy failed. " << status;
49       return RET_ERROR;
50     }
51     std::vector<uint8_t> pack_data{};
52     BitPack::BitPacking<int8_t, uint8_t>(bit_num, origin_data, &pack_data);
53     tensor_input->data.resize(pack_data.size() * sizeof(uint8_t));
54     status = memcpy_s(tensor_input->data.data(), tensor_input->data.size() * sizeof(uint8_t), pack_data.data(),
55                       pack_data.size() * sizeof(uint8_t));
56     if (status != EOK) {
57       MS_LOG(ERROR) << "memcpy_s failed. " << status;
58       return RET_ERROR;
59     }
60   } else if (bit_num > k8Bit && bit_num < k16Bit) {
61     int shape_size;
62     auto status = GetElementNumFromShape(tensor_input->dims, &shape_size);
63     if (status != RET_OK) {
64       MS_LOG(ERROR) << "Get ElementNum from shape failed.";
65       return status;
66     }
67     std::vector<int16_t> origin_data(shape_size);
68     status = memcpy_s(origin_data.data(), origin_data.size() * sizeof(int16_t), tensor_input->data.data(),
69                       tensor_input->data.size() * sizeof(uint8_t));
70     if (status != EOK) {
71       MS_LOG(ERROR) << "memcpy failed. " << status;
72       return RET_ERROR;
73     }
74     std::vector<uint16_t> pack_data{};
75     BitPack::BitPacking<int16_t, uint16_t>(bit_num, origin_data, &pack_data);
76     tensor_input->data.resize(pack_data.size() * sizeof(uint16_t));
77     status = memcpy_s(tensor_input->data.data(), tensor_input->data.size() * sizeof(uint8_t), pack_data.data(),
78                       pack_data.size() * sizeof(uint16_t));
79     if (status != EOK) {
80       MS_LOG(ERROR) << "memcpy_s failed. " << status;
81       return RET_ERROR;
82     }
83   }
84   return RET_OK;
85 }
86 
SetNewCompressionTensor(const ParameterPtr & weight,const std::vector<bool> & bits,size_t bit_num,const tensor::TensorPtr & tensor_info,TensorCompressionType compression_type)87 int TensorCompressor::SetNewCompressionTensor(const ParameterPtr &weight, const std::vector<bool> &bits, size_t bit_num,
88                                               const tensor::TensorPtr &tensor_info,
89                                               TensorCompressionType compression_type) {
90   // Add New Tensor
91   auto size_in_byte = static_cast<size_t>(ceil(bits.size() / kBitNumPerByte));
92   std::shared_ptr<mindspore::tensor::Tensor> compression_tensor = nullptr;
93   if (bit_num >= k1Bit && bit_num <= k8Bit) {
94     compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt8, tensor_info->shape(),
95                                                                      size_in_byte, compression_type);
96   } else if (bit_num > k8Bit && bit_num <= k16Bit) {
97     compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt16, tensor_info->shape(),
98                                                                      size_in_byte, compression_type);
99   } else {
100     MS_LOG(ERROR) << "bit_num only support 1 ~ 16 bit.";
101     return RET_ERROR;
102   }
103   CHECK_NULL_RETURN(compression_tensor);
104   // set quant param
105   compression_tensor->set_quant_param(tensor_info->quant_params());
106   // update tensor data
107   WriteBufferWithAlignByte(bits, static_cast<int8_t *>(compression_tensor->data().data()));
108   weight->set_default_param(compression_tensor);
109   weight->set_abstract(compression_tensor->ToAbstract());
110   return RET_OK;
111 }
112 
DoBitPack(const ParameterPtr & weight,size_t bit_num)113 int TensorCompressor::DoBitPack(const ParameterPtr &weight, size_t bit_num) {
114   auto tensor_info = weight->default_param()->cast<tensor::TensorPtr>();
115   CHECK_NULL_RETURN(tensor_info);
116   auto elements_num = tensor_info->ElementsNum();
117   std::shared_ptr<mindspore::tensor::Tensor> compression_tensor = nullptr;
118   if (bit_num > 0 && bit_num < k8Bit) {
119     auto quant_data = static_cast<int8_t *>(tensor_info->data().data());
120     std::vector<int8_t> origin_data(quant_data, quant_data + elements_num);
121     std::vector<uint8_t> pack_data{};
122     BitPack::BitPacking<int8_t, uint8_t>(bit_num, origin_data, &pack_data);
123     auto buffer_size = pack_data.size() * sizeof(int8_t);
124     compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt8, tensor_info->shape(), buffer_size,
125                                                                      mindspore::kBitPacking);
126     CHECK_NULL_RETURN(compression_tensor);
127     auto ret = memcpy_s(compression_tensor->data_c(), buffer_size, pack_data.data(), buffer_size);
128     if (ret != EOK) {
129       MS_LOG(ERROR) << weight->name() << " memcpy failed.";
130       return RET_ERROR;
131     }
132   } else if (bit_num > k8Bit && bit_num < k16Bit) {
133     auto quant_data = static_cast<int16_t *>(tensor_info->data().data());
134     std::vector<int16_t> origin_data(quant_data, quant_data + elements_num);
135     std::vector<uint16_t> pack_data{};
136     BitPack::BitPacking<int16_t, uint16_t>(bit_num, origin_data, &pack_data);
137     auto buffer_size = pack_data.size() * sizeof(int16_t);
138     compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt16, tensor_info->shape(),
139                                                                      buffer_size, mindspore::kBitPacking);
140     CHECK_NULL_RETURN(compression_tensor);
141     auto ret = memcpy_s(compression_tensor->data_c(), buffer_size, pack_data.data(), buffer_size);
142     if (ret != EOK) {
143       MS_LOG(ERROR) << weight->name() << " memcpy failed.";
144       return RET_ERROR;
145     }
146   }
147   // set quant param
148   compression_tensor->set_quant_param(tensor_info->quant_params());
149   // update tensor data
150   weight->set_default_param(compression_tensor);
151   weight->set_abstract(compression_tensor->ToAbstract());
152   return RET_OK;
153 }
154 }  // namespace mindspore::lite::quant
155