• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_STORAGE_LOCAL_FILE_H_
18 #define MINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_STORAGE_LOCAL_FILE_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 #include <utility>
25 
26 #include "include/backend/distributed/persistent/storage/storage.h"
27 #include "distributed/persistent/storage/block.h"
28 #include "distributed/persistent/storage/file_io_utils.h"
29 #include "distributed/persistent/storage/constants.h"
30 #include "utils/system/file_system.h"
31 
32 namespace mindspore {
33 namespace distributed {
34 namespace storage {
35 // The default maximum block length : 128MB.
36 constexpr size_t DEFAULT_MAX_BLOCK_LENGTH = 128 << 20;
37 
38 // File type persistence storage implementation class.
39 template <typename KeyType = int32_t, typename ValueType = float>
40 class LocalFile : public StorageBase<KeyType, ValueType> {
41  public:
42   explicit LocalFile(const std::map<std::string, std::string> &storage_config);
43   ~LocalFile() override;
44 
45   // Initialize local file storage, such as creating file system handle and check data legitimacy.
46   void Initialize() override;
47 
48   // Release the resource used by the local file storage.
49   void Finalize() override;
50 
51   // The following two methods are override version function for Write:
52   // 1. Create blocks and block metas.
53   // 2. Write input data to block files and Generate sha256 sequence for every block file.
54   // Write the entire blob data of tensor to the block files on disk:
55   void Write(const InputData &input, const DirtyInfo &dirty_info) override;
56   // Write the entire blob data composed of multiple tensors to the block files on disk:
57   void Write(const std::vector<InputData> &inputs, const DirtyInfo &dirty_info) override;
58 
59   // Write key-value pairs data into local file storage.
60   // Parameter[in] `keys`: The keys need to write, containing data pointer and data buffer length.
61   // Parameter[in] `values`: The values corresponding to keys need to write, containing data pointer and data buffer
62   // length.
63   void Write(const ConstDataWithLen &keys, const ConstDataWithLen &values) override;
64 
65   // The following two methods are override version function for Read:
66   // 1.Tamper proof check.
67   // 2.Read all block files and merge them into contiguous memory.
68   // Read data from all block files in file_path_(dir):
69   void Read(const OutputData &output) override;
70   // Read data from all block files in file_path_(dir) for multiple tensors.
71   void Read(const std::vector<OutputData> &outputs) override;
72 
73   // Read key-value pairs' values data from local file storage.
74   // Parameter[in] `keys`: The keys whose values need to read, containing data pointer and data buffer length.
75   // Parameter[out] `values`: The values corresponding to keys need to read, containing data pointer and data buffer
76   // length.
77   void Read(const ConstDataWithLen &keys, const DataWithLen &values) override;
78 
79   // Dump all keys of all key-value pairs in storage.
80   std::unique_ptr<std::vector<KeyType>> GetAllKeys() const override;
81 
82  private:
83   // Create blocks and block metas and write input data to block files.
84   void WriteBlockFiles(const std::vector<InputData> &inputs);
85 
86   // Write shardding data to one specific block file by block index and generate sha256.
87   void WriteOneBlockFile(size_t block_index, const std::vector<InputData> &inputs) const;
88 
89   // Obtain the corresponding file block index according to dirty info, only need to rewrite these file blocks, and
90   // dirty info needs to be sorted in ascending order.
91   void TransformDirtyInfoToBlockIndices(const DirtyInfo &dirty_info, std::vector<int> *block_indices) const;
92 
93   // Load file list info of block files and block meta files in the 'file_path_' to block list and block meta list.
94   bool LoadBlocksInfo();
95 
96   // The local file is composed of many block files, and each block file corresponds to a Block object in memory.
97   std::vector<std::shared_ptr<Block>> block_list_;
98 
99   // Container used to store meta info for every block in member variable 'block_list_', meta info can be customized,
100   // such as shard shape, shard range, field length, etc.
101   std::vector<std::shared_ptr<BlockMeta>> block_meta_list_;
102 
103   // Folder path to save all block files.
104   std::string file_path_;
105 
106   // Maximum size of each block file.
107   size_t max_block_length_;
108 
109   // Indicates whether block files has been created.
110   bool finish_create_block_files_{false};
111 
112   // File system of create or delete file.
113   std::shared_ptr<system::FileSystem> fs_;
114 
115   // All write-read helper for all block files.
116   std::vector<system::WriteFilePtr> block_files_;
117 
118   // For key-value data storage, the value size (such as the number of floating values)for one key-value pair.
119   size_t element_size_;
120 
121   // The number of elements that a block can hold.
122   size_t block_size_{1};
123 
124   // Record all key-value pairs' positions in block files, You can query which block file the value corresponding to a
125   // key is stored in, and the offset location of the block file.
126   // Data structure for this map: key -> pair{block index, offset in block}, offset in block is measured in bytes from
127   // the beginning of this file.
128   HashMap<KeyType, std::pair<size_t, size_t>> keys_to_locations_;
129 
130   // Record latest used position in latest created block file.
131   size_t current_offset_in_block_{0};
132 };
133 }  // namespace storage
134 }  // namespace distributed
135 }  // namespace mindspore
136 
137 #endif  // MINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_STORAGE_LOCAL_FILE_H_
138