1 /**
2 * Copyright 2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "src/litert/pack_weight_manager.h"
17 #include <vector>
18 #include <map>
19 #include <string>
20 #include "src/common/graph_util.h"
21 namespace mindspore::lite {
22 namespace {
23 #ifndef __ANDROID__
24 constexpr size_t kMemAlignSize = 64;
25 #endif
26
27 #ifdef SHARING_MODEL_WEIGHT
ParseNumaId(const std::map<std::string,std::map<std::string,std::string>> * config_info)28 std::string ParseNumaId(const std::map<std::string, std::map<std::string, std::string>> *config_info) {
29 std::string numa_id = "-1";
30 if (config_info == nullptr) {
31 return numa_id;
32 }
33 auto it_id = config_info->find(kInnerModelParallelRunnerSection);
34 if (it_id != config_info->end()) {
35 auto item_numa = it_id->second.find(kInnerNumaIDKey);
36 if (item_numa != it_id->second.end()) {
37 numa_id = it_id->second.at(kInnerNumaIDKey);
38 }
39 }
40 return numa_id;
41 }
42
ParseRunnerId(const std::map<std::string,std::map<std::string,std::string>> * config_info)43 std::string ParseRunnerId(const std::map<std::string, std::map<std::string, std::string>> *config_info) {
44 std::string runner_id = "";
45 if (config_info == nullptr) {
46 return runner_id;
47 }
48 auto it_id = config_info->find(kInnerModelParallelRunnerSection);
49 if (it_id != config_info->end()) {
50 auto item_runner = it_id->second.find(kInnerRunnerIDKey);
51 if (item_runner != it_id->second.end()) {
52 runner_id = it_id->second.at(kInnerRunnerIDKey);
53 }
54 }
55 return runner_id;
56 }
57
ParseCopyBuf(const std::map<std::string,std::map<std::string,std::string>> * config_info)58 bool ParseCopyBuf(const std::map<std::string, std::map<std::string, std::string>> *config_info) {
59 // default copy model buf.
60 bool need_copy = true;
61 if (config_info == nullptr) {
62 return need_copy;
63 }
64 std::string copy_buf = "";
65 auto inner_item = config_info->find(kInnerModelParallelRunnerSection);
66 if (inner_item != config_info->end()) {
67 auto item_copy_buf = inner_item->second.find(kInnerSharingWeightCopyBufKey);
68 if (item_copy_buf != inner_item->second.end()) {
69 copy_buf = inner_item->second.at(kInnerSharingWeightCopyBufKey);
70 }
71 }
72 if (copy_buf == "false") {
73 return false;
74 }
75 return need_copy;
76 }
77 #endif
78 } // namespace
79
GetInstance()80 PackWeightManager *PackWeightManager::GetInstance() {
81 static PackWeightManager instance;
82 return &instance;
83 }
84
GenModelID()85 std::string PackWeightManager::GenModelID() {
86 std::string model_id = "model_" + std::to_string(model_id_);
87 model_ids_.push_back(model_id);
88 model_id_++;
89 MS_LOG(INFO) << "generate model id: " << model_id;
90 return model_id;
91 }
92
IsCopyTensor(int op_type)93 bool PackWeightManager::IsCopyTensor(int op_type) {
94 #ifdef SHARING_MODEL_WEIGHT
95 return true;
96 #endif
97 if (IsPackedOp(op_type)) {
98 return true;
99 }
100 return false;
101 }
102
InitPackWeightManager(const char * model_buf,size_t model_size,std::string * model_id,std::string * runner_id,const std::map<std::string,std::map<std::string,std::string>> * config_info)103 STATUS PackWeightManager::InitPackWeightManager(
104 const char *model_buf, size_t model_size, std::string *model_id, std::string *runner_id,
105 const std::map<std::string, std::map<std::string, std::string>> *config_info) {
106 #ifdef SHARING_MODEL_WEIGHT
107 std::unique_lock<std::mutex> l(manager_mutex_);
108 if (pack_weight_ == nullptr) {
109 pack_weight_ = std::make_shared<PackWeight>();
110 if (pack_weight_ == nullptr) {
111 MS_LOG(ERROR) << "pack_weight_ is nullptr.";
112 return RET_ERROR;
113 }
114 }
115 auto numa_id = std::atoi(ParseNumaId(config_info).c_str());
116 *model_id = GenModelID();
117 std::string id = ParseRunnerId(config_info);
118 *runner_id = id;
119 if (id.empty()) {
120 MS_LOG(INFO) << "model use share pack weight.";
121 id = *model_id;
122 }
123 bool need_copy_buf = ParseCopyBuf(config_info);
124 return pack_weight_->InitPackWeight(static_cast<const void *>(model_buf), model_size, id, numa_id, need_copy_buf);
125 #endif
126 return RET_OK;
127 }
128
GetSharedModelBuf(const char * model_buf,std::string model_id,const std::map<std::string,std::map<std::string,std::string>> * config_info,bool * is_shared)129 char *PackWeightManager::GetSharedModelBuf(const char *model_buf, std::string model_id,
130 const std::map<std::string, std::map<std::string, std::string>> *config_info,
131 bool *is_shared) {
132 #ifdef SHARING_MODEL_WEIGHT
133 std::unique_lock<std::mutex> l(manager_mutex_);
134 std::string id = ParseRunnerId(config_info);
135 int numa_id = std::atoi(ParseNumaId(config_info).c_str());
136 if (id.empty()) {
137 MS_LOG(INFO) << "model use share pack weight.";
138 id = model_id;
139 }
140 auto new_model_buf = pack_weight_->GetSharedModelBuf(id, numa_id);
141 *is_shared = true;
142 return new_model_buf;
143 #endif
144 MS_LOG(INFO) << "model buf not shared.";
145 *is_shared = false;
146 return const_cast<char *>(model_buf);
147 }
148
StoreOriginTensorData(Model * model,std::vector<Tensor * > * all_tensors)149 STATUS PackWeightManager::StoreOriginTensorData(Model *model, std::vector<Tensor *> *all_tensors) {
150 #ifdef SHARING_MODEL_WEIGHT
151 MS_CHECK_TRUE_MSG(model != nullptr, RET_ERROR, "model is nullptr in pack weight manager.");
152 if (pack_weight_ == nullptr) {
153 MS_LOG(DEBUG) << "define SHARING_MODEL_WEIGHT but not use parallel predict.";
154 return RET_OK;
155 }
156 auto lite_model = reinterpret_cast<LiteModel *>(model);
157 auto kernel_num = model->graph_.all_nodes_.size();
158 for (size_t i = 0; i < kernel_num; i++) {
159 auto node = model->graph_.all_nodes_[i];
160 for (size_t j = 0; j < node->input_indices_.size(); j++) {
161 auto tensor_index = node->input_indices_[j];
162 auto src_tensor = lite_model->GetSchemaTensor(tensor_index);
163 if (src_tensor == nullptr || src_tensor->handler() == nullptr || src_tensor->data() == nullptr ||
164 src_tensor->length() == 0) {
165 continue;
166 }
167 if (all_tensors->at(tensor_index)->own_data()) {
168 auto status = pack_weight_->ReplaceOriginTensorData(lite_model->buf, all_tensors, tensor_index);
169 if (status != RET_OK) {
170 MS_LOG(DEBUG) << "ReplaceOriginTensorData failed.";
171 return RET_ERROR;
172 }
173 }
174 auto status = pack_weight_->StoreOriginTensorData(lite_model->buf, all_tensors->at(tensor_index)->data());
175 if (status != RET_OK) {
176 MS_LOG(DEBUG) << "data not packed.";
177 return RET_ERROR;
178 }
179 }
180 }
181 #endif
182 return RET_OK;
183 }
184
ReplaceFp16Data(void * origin_fp16_data,size_t size,bool * replace)185 void *PackWeightManager::ReplaceFp16Data(void *origin_fp16_data, size_t size, bool *replace) {
186 #ifdef SHARING_MODEL_WEIGHT
187 *replace = true;
188 return pack_weight_->ReplaceFp16Data(origin_fp16_data, size);
189 #endif
190 *replace = false;
191 return nullptr;
192 }
193
MallocData(size_t size)194 void *PackWeightManager::MallocData(size_t size) {
195 if (size > MAX_MALLOC_SIZE || size == 0) {
196 MS_LOG(ERROR) << "malloc size is wrong.";
197 return nullptr;
198 }
199 void *data = nullptr;
200 #ifdef _WIN32
201 size_t round_size = (size + kMemAlignSize - 1) & (~(kMemAlignSize - 1));
202 data = _aligned_malloc(round_size, kMemAlignSize);
203 if (data == nullptr) {
204 MS_LOG(ERROR) << "malloc failed.";
205 return nullptr;
206 }
207 #elif defined(__ANDROID__)
208 data = malloc(size);
209 if (data == nullptr) {
210 MS_LOG(ERROR) << "malloc failed.";
211 return nullptr;
212 }
213 #else
214 size_t round_size = (size + kMemAlignSize - 1) & (~(kMemAlignSize - 1));
215 auto ret = posix_memalign(&data, kMemAlignSize, round_size);
216 if (ret != 0) {
217 MS_LOG(ERROR) << "posix_memalign failed.";
218 return nullptr;
219 }
220 #endif
221 return data;
222 }
223
GetPackData(const void * tensor_data,const size_t size,bool * is_packed)224 void *PackWeightManager::GetPackData(const void *tensor_data, const size_t size, bool *is_packed) {
225 #ifdef SHARING_MODEL_WEIGHT
226 if (pack_weight_ == nullptr) {
227 void *data = MallocData(size);
228 *is_packed = false;
229 return data;
230 }
231 return pack_weight_->GetPackData(tensor_data, size, is_packed);
232 #endif
233 void *data = MallocData(size);
234 *is_packed = false;
235 return data;
236 }
237
FreeData(void * tensor_data)238 void PackWeightManager::FreeData(void *tensor_data) {
239 if (tensor_data != nullptr) {
240 #ifdef _WIN32
241 _aligned_free(tensor_data);
242 #else
243 free(tensor_data);
244 #endif
245 tensor_data = nullptr;
246 }
247 }
248
Free(void * tensor_data)249 void PackWeightManager::Free(void *tensor_data) {
250 #ifdef SHARING_MODEL_WEIGHT
251 if (pack_weight_ == nullptr) {
252 FreeData(tensor_data);
253 }
254 return;
255 #endif
256 FreeData(tensor_data);
257 }
258
FreePackWeight(std::string runner_id,std::string model_id)259 void PackWeightManager::FreePackWeight(std::string runner_id, std::string model_id) {
260 #ifdef SHARING_MODEL_WEIGHT
261 std::unique_lock<std::mutex> l(manager_mutex_);
262 if (pack_weight_ != nullptr) {
263 if (!runner_id.empty()) {
264 MS_LOG(INFO) << "free pack weight of runner id: " << runner_id;
265 pack_weight_->FreePackWeight(runner_id);
266 }
267 } else {
268 MS_LOG(INFO) << "pack_weight_ is nullptr.";
269 return;
270 }
271 if (model_id.empty()) {
272 MS_LOG(INFO) << "model id is empty.";
273 return;
274 }
275 pack_weight_->FreePackWeight(model_id);
276 auto it = find(model_ids_.begin(), model_ids_.end(), model_id);
277 if (it != model_ids_.end()) {
278 model_ids_.erase(it);
279 }
280 #endif
281 return;
282 }
283 } // namespace mindspore::lite
284