1 /** 2 * Copyright 2019-2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_ 17 #define MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_ 18 19 #include <memory> 20 #include <vector> 21 #include <map> 22 #include <mutex> 23 #include <tuple> 24 #include <string> 25 #include <utility> 26 #include <deque> 27 #include <algorithm> 28 #include "include/backend/debug/tensor_data.h" 29 #ifndef OFFLINE_DBG_MODE 30 #include "include/backend/debug/data_dump/dump_json_parser.h" 31 #endif 32 namespace mindspore { 33 class TensorLoader { 34 public: TensorLoader()35 TensorLoader() : mem_total_(0), mem_usage_(0) {} 36 ~TensorLoader()37 ~TensorLoader() { EmptyTensor(); } 38 MoveTensorCurrentToPrev(const std::string & tensor_name)39 void MoveTensorCurrentToPrev(const std::string &tensor_name) { 40 auto handle = tensor_list_map_.extract(tensor_name); 41 if (!handle.empty()) { 42 MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map"; 43 prev_tensor_list_map_.insert(std::move(handle)); 44 } 45 } 46 SwapCurrentPrev()47 void SwapCurrentPrev() { tensor_list_map_.swap(prev_tensor_list_map_); } 48 TensorExistsInCurrent(const std::string & tensor_name)49 bool TensorExistsInCurrent(const std::string &tensor_name) const { 50 return tensor_list_map_.find(tensor_name) != tensor_list_map_.end(); 51 } 52 53 // only parameters will return true PrevTensorExistsInCurrent(const std::string & tensor_name)54 bool PrevTensorExistsInCurrent(const std::string &tensor_name) const { 55 return TensorExistsInCurrent(tensor_name + ":prev"); 56 } 57 MoveParametersCurrentToPrev()58 void MoveParametersCurrentToPrev() { 59 MS_LOG(INFO) << "Moving parameters from current map to previous map"; 60 auto iter = tensor_list_map_.begin(); 61 while (iter != tensor_list_map_.end()) { 62 auto key = iter->first; 63 if (PrevTensorExistsInCurrent(key)) { 64 // :prev tensor only exists for parameter. Move it to prev 65 ++iter; 66 MoveTensorCurrentToPrev(key); 67 } else { 68 ++iter; 69 } 70 } 71 } 72 IsPrevTensor(std::string tensor_name)73 bool IsPrevTensor(std::string tensor_name) const { 74 const std::string suffix = ":prev"; 75 if (tensor_name.length() <= suffix.length()) { 76 return false; 77 } 78 return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin()); 79 } 80 81 /* 82 * Feature group: Dump, Online debugger and Offline debugger. 83 * Target device group: Ascend, GPU. 84 * Runtime category: Old runtime, MindRT. 85 * Description: Load new tensor into tensor_list_map_ (debugger backend cache). In offline debugger, add ":prev" to 86 * the previous tensor's name to avoid segfault caused by wrongly evicting the tensor when memory limit is enabled. 87 */ LoadNewTensor(const std::shared_ptr<TensorData> & tensor,bool keep_prev)88 bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) { 89 std::lock_guard<std::mutex> lg(lock_); 90 auto tensor_name = tensor->GetName(); 91 if (keep_prev) { 92 // add prev step tensor into current step map with ":prev" suffix 93 auto handle = prev_tensor_list_map_.extract(tensor_name); 94 if (!handle.empty()) { 95 handle.key() = tensor_name + ":prev"; 96 tensor_list_map_.insert(std::move(handle)); 97 } 98 } 99 std::string key_name = tensor_name; 100 #ifdef OFFLINE_DBG_MODE 101 std::string output_type = tensor->GetIsOutput() ? "1" : "0"; 102 key_name += (":" + std::to_string(tensor->GetDeviceId()) + ":" + std::to_string(tensor->GetRootGraphId()) + ":" + 103 output_type + ":" + std::to_string(tensor->GetSlot())); 104 if (tensor_list_map_.find(key_name) != tensor_list_map_.end() && 105 tensor->GetIteration() == tensor_list_map_[key_name]->GetPrevIteration()) { 106 key_name += ":prev"; 107 } 108 #endif 109 tensor_list_map_[key_name] = tensor; // use [] instead of insert to ensure latest value 110 return true; 111 } 112 GetTensor()113 std::vector<std::shared_ptr<TensorData>> GetTensor() { 114 std::vector<std::shared_ptr<TensorData>> tensor_list; 115 for (auto it = tensor_list_map_.cbegin(); it != tensor_list_map_.cend(); ++it) { 116 if (!IsPrevTensor(it->first)) { 117 tensor_list.push_back(it->second); 118 } 119 } 120 return tensor_list; 121 } 122 GetTensor(const std::string & tensor_name)123 std::shared_ptr<TensorData> GetTensor(const std::string &tensor_name) const { 124 auto iter = tensor_list_map_.find(tensor_name); 125 if (iter != tensor_list_map_.end()) { 126 return iter->second; 127 } 128 return nullptr; 129 } 130 GetPrevTensor(const std::string & tensor_name)131 std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) { 132 if (tensor_list_map_.find(tensor_name + ":prev") != tensor_list_map_.end()) { 133 return tensor_list_map_[tensor_name + ":prev"]; 134 } 135 return nullptr; 136 } 137 138 /* 139 * Feature group: Online debugger. 140 * Target device group: Ascend, GPU. 141 * Runtime category: Old runtime, MindRT. 142 * Description: Search and obtain TensorData for a list of tensors from tensor_list_map_ (debugger backend cache). 143 * Return nullptr if the tensor is not found. 144 */ SearchTensors(const std::vector<std::string> & search_list,std::vector<std::tuple<std::string,std::shared_ptr<TensorData>>> * result_list)145 void SearchTensors(const std::vector<std::string> &search_list, 146 std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) { 147 for (auto i : search_list) { 148 std::map<std::string, std::shared_ptr<TensorData>>::const_iterator iter = tensor_list_map_.find(i); 149 if (iter != tensor_list_map_.cend()) { 150 result_list->push_back(std::make_tuple(i, iter->second)); 151 } else { 152 result_list->push_back(std::make_tuple(i, nullptr)); 153 } 154 } 155 } 156 EmptyTensor()157 void EmptyTensor() noexcept { 158 std::lock_guard<std::mutex> lg(lock_); 159 prev_tensor_list_map_.clear(); 160 tensor_list_map_.swap(prev_tensor_list_map_); 161 } 162 EmptyCurrentTensor()163 void EmptyCurrentTensor() { tensor_list_map_.clear(); } 164 EnableMemoryControl()165 bool EnableMemoryControl() const { return mem_total_ > 0; } 166 167 /* 168 * Feature group: Offline debugger. 169 * Target device group: Ascend, GPU. 170 * Runtime category: Old runtime, MindRT. 171 * Description: This function is for memory control feature only. When finishing using a tensor in offline debugger, 172 * it will be added to cache_evict_queue_ and become an eviction candidate. Once there is no memory to read in a new 173 * tensor, it will be evicted from cache. 174 */ AppendToCacheEvictQueue(const std::string & tensor_name)175 void AppendToCacheEvictQueue(const std::string &tensor_name) { 176 std::lock_guard<std::mutex> lk(mem_lock_); 177 if (std::find(cache_evict_queue_.begin(), cache_evict_queue_.end(), tensor_name) == cache_evict_queue_.end()) { 178 cache_evict_queue_.push_back(tensor_name); 179 evict_cond.notify_one(); 180 } 181 } 182 183 /* 184 * Feature group: Offline debugger. 185 * Target device group: Ascend, GPU. 186 * Runtime category: Old runtime, MindRT. 187 * Description: This function is for memory control feature only. Check if the tensor size is greater than the preset 188 * limit. If not, evect the candidate tensor in cache_evict_queue_ to make room for it. 189 */ CheckMemoryAvailable(const std::string & backend_name,const uint64_t data_size)190 bool CheckMemoryAvailable(const std::string &backend_name, const uint64_t data_size) { 191 // 1. Check if the tensor can fit in the entire limit. If not, don't attempt any read or evictions and generate 192 // warning. 193 if (data_size > mem_total_) { 194 MS_LOG(ERROR) << "Failed to load data of tensor " << backend_name << " because the its data size (" << data_size 195 << ") exceeds the maximum memory limit (" << mem_total_ << ")."; 196 return false; 197 } 198 // 2. Check if there's is enough cache space available for current tensor. If not, try evict cache. 199 bool ret = CheckAndEvictTensorCache(data_size); 200 return ret; 201 } 202 203 /* 204 * Feature group: Offline debugger. 205 * Target device group: Ascend, GPU. 206 * Runtime category: Old runtime, MindRT. 207 * Description: This function is for memory control feature only. Greedily evict not-in-use tensors from cache queue. 208 * If no candidate in the queue, block the thread until there is any candidate available. 209 */ CheckAndEvictTensorCache(const uint64_t data_size)210 bool CheckAndEvictTensorCache(const uint64_t data_size) { 211 std::string candidate_name; 212 uint64_t candidates_size; 213 std::unique_lock<std::mutex> lk(mem_lock_); 214 while (data_size > mem_total_ - mem_usage_) { 215 // wait until there is any not-in-use candidate to be evicted from cache 216 evict_cond.wait(lk, [this] { return !cache_evict_queue_.empty(); }); 217 candidate_name = cache_evict_queue_.front(); 218 cache_evict_queue_.pop_front(); 219 // evict candidate tensor 220 auto tensor = GetTensor(candidate_name); 221 if (tensor == nullptr) { 222 MS_LOG(INFO) << "Tensor: " << candidate_name << " has already been evicted."; 223 lock_.unlock(); 224 continue; 225 } 226 candidates_size = tensor->GetByteSize(); 227 tensor_list_map_.erase(candidate_name); 228 mem_usage_ = std::max(uint64_t(0), mem_usage_ - candidates_size); 229 MS_LOG(INFO) << "Evict tensor: " << candidate_name; 230 } 231 // Reserve space for the current target tensor. 232 mem_usage_ = std::min(mem_total_, mem_usage_ + data_size); 233 return true; 234 } 235 SetMemTotal(uint64_t total_mem_size)236 void SetMemTotal(uint64_t total_mem_size) { this->mem_total_ = total_mem_size; } 237 238 #ifndef OFFLINE_DBG_MODE 239 /* 240 * Feature group: Dump. 241 * Target device group: GPU, Ascend. 242 * Runtime category: Old runtime, MindRT. 243 * Description: Load tensor data from debugger backend cache (tensor_list_map_) and dump to file in npy format, 244 * used for GPU and Ascend KernelByKernel mode. 245 */ DumpTensorToFile(const std::string & filepath,const std::string & tensor_name,size_t slot)246 bool DumpTensorToFile(const std::string &filepath, const std::string &tensor_name, size_t slot) { 247 if (filepath.empty()) { 248 MS_LOG(ERROR) << "Dump file path is null!"; 249 return false; 250 } 251 252 std::string tensor_loader_name = tensor_name + ":" + std::to_string(slot); 253 std::map<std::string, std::shared_ptr<TensorData>>::const_iterator iter = tensor_list_map_.find(tensor_loader_name); 254 if (iter != tensor_list_map_.cend()) { 255 std::shared_ptr<TensorData> node = iter->second; 256 std::string path = filepath + '.' + node->GetFormat(); 257 if (node->GetByteSize() == 0) { 258 MS_LOG(INFO) << "The byte size is 0 for tensor: " << tensor_loader_name; 259 return false; 260 } 261 auto type_string = node->GetTypeString(); 262 if (type_string == "bfloat16") { 263 std::shared_ptr<tensor::Tensor> bfloat16_tensor = std::make_shared<tensor::Tensor>( 264 TypeId::kNumberTypeBFloat16, node->GetShape(), static_cast<void *>(const_cast<char *>(node->GetDataPtr())), 265 node->GetByteSize()); 266 std::shared_ptr<tensor::Tensor> float32_tensor = 267 std::make_shared<tensor::Tensor>(*bfloat16_tensor, TypeId::kNumberTypeFloat32); 268 return DumpJsonParser::DumpToFile(path, float32_tensor->data_c(), float32_tensor->Size(), 269 float32_tensor->shape_c(), 270 static_cast<TypeId>(float32_tensor->data_type_c())); 271 } 272 return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), node->GetByteSize(), node->GetShape(), 273 StringToTypeId(node->GetTypeString())); 274 } 275 MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map_"; 276 return false; 277 } 278 #endif 279 280 private: 281 // the pair is (device_id, iteration) 282 std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map_; 283 std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map_; 284 std::mutex lock_; 285 std::mutex mem_lock_; 286 uint64_t mem_total_; 287 uint64_t mem_usage_; 288 std::deque<std::string> cache_evict_queue_; 289 std::condition_variable evict_cond; 290 }; 291 } // namespace mindspore 292 #endif // MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_ 293