1 /**
2 * Copyright 2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "tools/converter/quantizer/tensor_compressor.h"
18 #include <memory>
19 #include <numeric>
20 #include <limits>
21 #include <string>
22 #include <vector>
23 #include <functional>
24 #include <set>
25 #include <map>
26 #include <algorithm>
27
28 namespace mindspore::lite::quant {
WriteBufferWithAlignByte(const std::vector<bool> & bool_vec,int8_t * data)29 void TensorCompressor::WriteBufferWithAlignByte(const std::vector<bool> &bool_vec, int8_t *data) {
30 CHECK_NULL_RETURN_VOID(data);
31 size_t shift = kBitNumPerByte;
32 for (bool bit : bool_vec) {
33 *data |= bit << (shift - 1);
34 if (--shift == 0) {
35 data++;
36 shift = kBitNumPerByte;
37 }
38 }
39 }
40
DoBitPack(const size_t & bit_num,schema::TensorT * tensor_input)41 int TensorCompressor::DoBitPack(const size_t &bit_num, schema::TensorT *tensor_input) {
42 CHECK_NULL_RETURN(tensor_input);
43 if (bit_num > 0 && bit_num < k8Bit) {
44 std::vector<int8_t> origin_data(tensor_input->data.size());
45 auto status = memcpy_s(origin_data.data(), origin_data.size() * sizeof(int8_t), tensor_input->data.data(),
46 tensor_input->data.size() * sizeof(uint8_t));
47 if (status != EOK) {
48 MS_LOG(ERROR) << tensor_input->name << " memcpy failed. " << status;
49 return RET_ERROR;
50 }
51 std::vector<uint8_t> pack_data{};
52 BitPack::BitPacking<int8_t, uint8_t>(bit_num, origin_data, &pack_data);
53 tensor_input->data.resize(pack_data.size() * sizeof(uint8_t));
54 status = memcpy_s(tensor_input->data.data(), tensor_input->data.size() * sizeof(uint8_t), pack_data.data(),
55 pack_data.size() * sizeof(uint8_t));
56 if (status != EOK) {
57 MS_LOG(ERROR) << "memcpy_s failed. " << status;
58 return RET_ERROR;
59 }
60 } else if (bit_num > k8Bit && bit_num < k16Bit) {
61 int shape_size;
62 auto status = GetElementNumFromShape(tensor_input->dims, &shape_size);
63 if (status != RET_OK) {
64 MS_LOG(ERROR) << "Get ElementNum from shape failed.";
65 return status;
66 }
67 std::vector<int16_t> origin_data(shape_size);
68 status = memcpy_s(origin_data.data(), origin_data.size() * sizeof(int16_t), tensor_input->data.data(),
69 tensor_input->data.size() * sizeof(uint8_t));
70 if (status != EOK) {
71 MS_LOG(ERROR) << "memcpy failed. " << status;
72 return RET_ERROR;
73 }
74 std::vector<uint16_t> pack_data{};
75 BitPack::BitPacking<int16_t, uint16_t>(bit_num, origin_data, &pack_data);
76 tensor_input->data.resize(pack_data.size() * sizeof(uint16_t));
77 status = memcpy_s(tensor_input->data.data(), tensor_input->data.size() * sizeof(uint8_t), pack_data.data(),
78 pack_data.size() * sizeof(uint16_t));
79 if (status != EOK) {
80 MS_LOG(ERROR) << "memcpy_s failed. " << status;
81 return RET_ERROR;
82 }
83 }
84 return RET_OK;
85 }
86
SetNewCompressionTensor(const ParameterPtr & weight,const std::vector<bool> & bits,size_t bit_num,const tensor::TensorPtr & tensor_info,TensorCompressionType compression_type)87 int TensorCompressor::SetNewCompressionTensor(const ParameterPtr &weight, const std::vector<bool> &bits, size_t bit_num,
88 const tensor::TensorPtr &tensor_info,
89 TensorCompressionType compression_type) {
90 // Add New Tensor
91 auto size_in_byte = static_cast<size_t>(ceil(bits.size() / kBitNumPerByte));
92 std::shared_ptr<mindspore::tensor::Tensor> compression_tensor = nullptr;
93 if (bit_num >= k1Bit && bit_num <= k8Bit) {
94 compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt8, tensor_info->shape(),
95 size_in_byte, compression_type);
96 } else if (bit_num > k8Bit && bit_num <= k16Bit) {
97 compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt16, tensor_info->shape(),
98 size_in_byte, compression_type);
99 } else {
100 MS_LOG(ERROR) << "bit_num only support 1 ~ 16 bit.";
101 return RET_ERROR;
102 }
103 CHECK_NULL_RETURN(compression_tensor);
104 // set quant param
105 compression_tensor->set_quant_param(tensor_info->quant_params());
106 // update tensor data
107 WriteBufferWithAlignByte(bits, static_cast<int8_t *>(compression_tensor->data().data()));
108 weight->set_default_param(compression_tensor);
109 weight->set_abstract(compression_tensor->ToAbstract());
110 return RET_OK;
111 }
112
DoBitPack(const ParameterPtr & weight,size_t bit_num)113 int TensorCompressor::DoBitPack(const ParameterPtr &weight, size_t bit_num) {
114 auto tensor_info = weight->default_param()->cast<tensor::TensorPtr>();
115 CHECK_NULL_RETURN(tensor_info);
116 auto elements_num = tensor_info->ElementsNum();
117 std::shared_ptr<mindspore::tensor::Tensor> compression_tensor = nullptr;
118 if (bit_num > 0 && bit_num < k8Bit) {
119 auto quant_data = static_cast<int8_t *>(tensor_info->data().data());
120 std::vector<int8_t> origin_data(quant_data, quant_data + elements_num);
121 std::vector<uint8_t> pack_data{};
122 BitPack::BitPacking<int8_t, uint8_t>(bit_num, origin_data, &pack_data);
123 auto buffer_size = pack_data.size() * sizeof(int8_t);
124 compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt8, tensor_info->shape(), buffer_size,
125 mindspore::kBitPacking);
126 CHECK_NULL_RETURN(compression_tensor);
127 auto ret = memcpy_s(compression_tensor->data_c(), buffer_size, pack_data.data(), buffer_size);
128 if (ret != EOK) {
129 MS_LOG(ERROR) << weight->name() << " memcpy failed.";
130 return RET_ERROR;
131 }
132 } else if (bit_num > k8Bit && bit_num < k16Bit) {
133 auto quant_data = static_cast<int16_t *>(tensor_info->data().data());
134 std::vector<int16_t> origin_data(quant_data, quant_data + elements_num);
135 std::vector<uint16_t> pack_data{};
136 BitPack::BitPacking<int16_t, uint16_t>(bit_num, origin_data, &pack_data);
137 auto buffer_size = pack_data.size() * sizeof(int16_t);
138 compression_tensor = std::make_shared<mindspore::tensor::Tensor>(kNumberTypeInt16, tensor_info->shape(),
139 buffer_size, mindspore::kBitPacking);
140 CHECK_NULL_RETURN(compression_tensor);
141 auto ret = memcpy_s(compression_tensor->data_c(), buffer_size, pack_data.data(), buffer_size);
142 if (ret != EOK) {
143 MS_LOG(ERROR) << weight->name() << " memcpy failed.";
144 return RET_ERROR;
145 }
146 }
147 // set quant param
148 compression_tensor->set_quant_param(tensor_info->quant_params());
149 // update tensor data
150 weight->set_default_param(compression_tensor);
151 weight->set_abstract(compression_tensor->ToAbstract());
152 return RET_OK;
153 }
154 } // namespace mindspore::lite::quant
155