1 /** 2 * Copyright 2021-2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_STORAGE_LOCAL_FILE_H_ 18 #define MINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_STORAGE_LOCAL_FILE_H_ 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <vector> 24 #include <utility> 25 26 #include "include/backend/distributed/persistent/storage/storage.h" 27 #include "distributed/persistent/storage/block.h" 28 #include "distributed/persistent/storage/file_io_utils.h" 29 #include "distributed/persistent/storage/constants.h" 30 #include "utils/system/file_system.h" 31 32 namespace mindspore { 33 namespace distributed { 34 namespace storage { 35 // The default maximum block length : 128MB. 36 constexpr size_t DEFAULT_MAX_BLOCK_LENGTH = 128 << 20; 37 38 // File type persistence storage implementation class. 39 template <typename KeyType = int32_t, typename ValueType = float> 40 class LocalFile : public StorageBase<KeyType, ValueType> { 41 public: 42 explicit LocalFile(const std::map<std::string, std::string> &storage_config); 43 ~LocalFile() override; 44 45 // Initialize local file storage, such as creating file system handle and check data legitimacy. 46 void Initialize() override; 47 48 // Release the resource used by the local file storage. 49 void Finalize() override; 50 51 // The following two methods are override version function for Write: 52 // 1. Create blocks and block metas. 53 // 2. Write input data to block files and Generate sha256 sequence for every block file. 54 // Write the entire blob data of tensor to the block files on disk: 55 void Write(const InputData &input, const DirtyInfo &dirty_info) override; 56 // Write the entire blob data composed of multiple tensors to the block files on disk: 57 void Write(const std::vector<InputData> &inputs, const DirtyInfo &dirty_info) override; 58 59 // Write key-value pairs data into local file storage. 60 // Parameter[in] `keys`: The keys need to write, containing data pointer and data buffer length. 61 // Parameter[in] `values`: The values corresponding to keys need to write, containing data pointer and data buffer 62 // length. 63 void Write(const ConstDataWithLen &keys, const ConstDataWithLen &values) override; 64 65 // The following two methods are override version function for Read: 66 // 1.Tamper proof check. 67 // 2.Read all block files and merge them into contiguous memory. 68 // Read data from all block files in file_path_(dir): 69 void Read(const OutputData &output) override; 70 // Read data from all block files in file_path_(dir) for multiple tensors. 71 void Read(const std::vector<OutputData> &outputs) override; 72 73 // Read key-value pairs' values data from local file storage. 74 // Parameter[in] `keys`: The keys whose values need to read, containing data pointer and data buffer length. 75 // Parameter[out] `values`: The values corresponding to keys need to read, containing data pointer and data buffer 76 // length. 77 void Read(const ConstDataWithLen &keys, const DataWithLen &values) override; 78 79 // Dump all keys of all key-value pairs in storage. 80 std::unique_ptr<std::vector<KeyType>> GetAllKeys() const override; 81 82 private: 83 // Create blocks and block metas and write input data to block files. 84 void WriteBlockFiles(const std::vector<InputData> &inputs); 85 86 // Write shardding data to one specific block file by block index and generate sha256. 87 void WriteOneBlockFile(size_t block_index, const std::vector<InputData> &inputs) const; 88 89 // Obtain the corresponding file block index according to dirty info, only need to rewrite these file blocks, and 90 // dirty info needs to be sorted in ascending order. 91 void TransformDirtyInfoToBlockIndices(const DirtyInfo &dirty_info, std::vector<int> *block_indices) const; 92 93 // Load file list info of block files and block meta files in the 'file_path_' to block list and block meta list. 94 bool LoadBlocksInfo(); 95 96 // The local file is composed of many block files, and each block file corresponds to a Block object in memory. 97 std::vector<std::shared_ptr<Block>> block_list_; 98 99 // Container used to store meta info for every block in member variable 'block_list_', meta info can be customized, 100 // such as shard shape, shard range, field length, etc. 101 std::vector<std::shared_ptr<BlockMeta>> block_meta_list_; 102 103 // Folder path to save all block files. 104 std::string file_path_; 105 106 // Maximum size of each block file. 107 size_t max_block_length_; 108 109 // Indicates whether block files has been created. 110 bool finish_create_block_files_{false}; 111 112 // File system of create or delete file. 113 std::shared_ptr<system::FileSystem> fs_; 114 115 // All write-read helper for all block files. 116 std::vector<system::WriteFilePtr> block_files_; 117 118 // For key-value data storage, the value size (such as the number of floating values)for one key-value pair. 119 size_t element_size_; 120 121 // The number of elements that a block can hold. 122 size_t block_size_{1}; 123 124 // Record all key-value pairs' positions in block files, You can query which block file the value corresponding to a 125 // key is stored in, and the offset location of the block file. 126 // Data structure for this map: key -> pair{block index, offset in block}, offset in block is measured in bytes from 127 // the beginning of this file. 128 HashMap<KeyType, std::pair<size_t, size_t>> keys_to_locations_; 129 130 // Record latest used position in latest created block file. 131 size_t current_offset_in_block_{0}; 132 }; 133 } // namespace storage 134 } // namespace distributed 135 } // namespace mindspore 136 137 #endif // MINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_STORAGE_LOCAL_FILE_H_ 138