1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MIINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_DATA_H_
18 #define MIINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_DATA_H_
19
20 #include <map>
21 #include <memory>
22 #include <vector>
23 #include <string>
24 #include <thread>
25 #include <utility>
26
27 #include "distributed/persistent/storage/local_file.h"
28 #include "utils/log_adapter.h"
29
30 namespace mindspore {
31 namespace distributed {
32 namespace persistent {
33 // The data class is used to save and manage the tensor in memory, and provides
34 // interfaces for persistence and disaster recovery.
35 template <typename T>
36 class Data {
37 public:
38 explicit Data(const std::shared_ptr<std::vector<T>> &data, const std::shared_ptr<std::vector<int>> &shape = nullptr)
data_(data)39 : data_(data), shape_(shape) {}
40
41 virtual ~Data() = default;
42
43 // Get the memory data of Data
data()44 T *data() const {
45 MS_EXCEPTION_IF_NULL(data_);
46 return data_->data();
47 }
48
49 // Get the mutable memory data of Data
MutableData()50 std::shared_ptr<std::vector<T>> MutableData() const { return data_; }
51
52 // Get the element number of Data
size()53 size_t size() const {
54 MS_EXCEPTION_IF_NULL(data_);
55 return data_->size();
56 }
57
58 // Get the dimension information of Data.
shape()59 std::shared_ptr<std::vector<int>> shape() const { return shape_; }
60
61 protected:
62 // Container used to store continuous memory buffer of Data.
63 std::shared_ptr<std::vector<T>> data_;
64
65 // Container used to record the dimension information of Data which persists a tensor.
66 std::shared_ptr<std::vector<int>> shape_;
67 };
68
69 // Implementation of the class Data to complete the function of persistence and disaster tolerance.
70 template <typename T>
71 class PersistentData : public Data<T> {
72 public:
73 explicit PersistentData(const std::shared_ptr<std::vector<T>> &data,
74 const std::shared_ptr<std::vector<int>> &shape = nullptr)
75 : Data<T>(data, shape) {}
76
77 ~PersistentData() override = default;
78
79 // Initialize storage module.
80 // Custom storage config, you can choose different configurations according to different storage forms,
81 // such as using file storage by configuring the file storage path,
82 // and config can be like this: std::map<std::string, std::string> config = {{kFileStoragePath, "real_path_of_dir"}};
83 void Initialize(const std::map<std::string, std::string> &storage_config);
84
85 // In disaster recovery mode, memory of tensor need to be saved into disk file periodically.
86 void Persist(const storage::DirtyInfo &dirty_info) const;
87
88 // In disaster recovery mode, server node or worker node need to restore persistent data when restart.
89 void Restore() const;
90
91 private:
92 // The following variables are used in disaster recovery mode:
93 // The threads used to execute persistence task.
94 std::thread persist_thread_;
95
96 // The file storage handle used to persist data.
97 std::shared_ptr<storage::StorageBase<int, T>> storage_;
98 };
99
100 template <typename T>
Initialize(const std::map<std::string,std::string> & storage_config)101 void PersistentData<T>::Initialize(const std::map<std::string, std::string> &storage_config) {
102 storage_ = std::make_shared<storage::LocalFile<int, T>>(storage_config);
103 }
104
105 template <typename T>
Persist(const storage::DirtyInfo & dirty_info)106 void PersistentData<T>::Persist(const storage::DirtyInfo &dirty_info) const {
107 MS_EXCEPTION_IF_NULL(storage_);
108 storage::InputData input = std::make_tuple(*Data<T>::shape_, Data<T>::data(), Data<T>::size() * sizeof(T));
109 storage_->Write(input, dirty_info);
110 }
111
112 template <typename T>
Restore()113 void PersistentData<T>::Restore() const {
114 storage::OutputData output = std::make_pair(Data<T>::data(), Data<T>::size() * sizeof(T));
115 MS_EXCEPTION_IF_NULL(storage_);
116 storage_->Read(output);
117 }
118 } // namespace persistent
119 } // namespace distributed
120 } // namespace mindspore
121
122 #endif // MIINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_DATA_H_
123