1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Allows memory-mapping a full file or a specific region within the file. 16 // It also supports efficiently switching the region being mapped. 17 // 18 // Note on Performance: 19 // It supports different optimized strategies for common patterns on both 20 // read-only and read-write files. This includes using read-ahead buffers for 21 // faster reads as well as background-sync vs manual-sync of changes to disk. 22 // For more details, see comments at MemoryMappedFile::Strategy. 23 // 24 // ** Usage 1: pre-mmap large memory and grow the underlying file internally ** 25 // 26 // // Create MemoryMappedFile instance. 27 // ICING_ASSIGN_OR_RETURN( 28 // std::unique_ptr<MemoryMappedFile> mmapped_file, 29 // MemoryMappedFile::Create(filesystem, "/file.pb", 30 // READ_WRITE_AUTO_SYNC, 31 // max_file_size, 32 // /*pre_mapping_file_offset=*/0, 33 // /*pre_mapping_mmap_size=*/1024 * 1024)); 34 // 35 // // Found that we need 4K bytes for the file and mmapped region. 36 // mmapped_file->GrowAndRemapIfNecessary( 37 // /*new_file_offset=*/0, /*new_mmap_size=*/4 * 1024); 38 // char read_byte = mmapped_file->region()[4000]; 39 // mmapped_file->mutable_region()[4001] = write_byte; 40 // 41 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to 42 // disk. 43 // 44 // // Found that we need 2048 * 1024 bytes for the file and mmapped region. 45 // mmapped_file->GrowAndRemapIfNecessary( 46 // /*new_file_offset=*/0, /*new_mmap_size=*/2048 * 1024); 47 // mmapped_file->mutable_region()[2000 * 1024] = write_byte; 48 // mmapped_file.reset(); 49 // 50 // ** Usage 2: load by segments ** 51 // 52 // ICING_ASSIGN_OR_RETURN( 53 // std::unique_ptr<MemoryMappedFile> mmapped_file, 54 // MemoryMappedFile::Create(filesystem, "/file.pb", 55 // READ_WRITE_AUTO_SYNC, 56 // max_file_size, 57 // /*pre_mapping_file_offset=*/0, 58 // /*pre_mapping_mmap_size=*/16 * 1024)); 59 // 60 // // load the first 16K. 61 // mmapped_file->GrowAndRemapIfNecessary( 62 // /*new_file_offset=*/0, /*new_mmap_size=*/16 * 1024); 63 // char read_byte = mmapped_file->region()[100]; 64 // mmapped_file->mutable_region()[10] = write_byte; 65 // 66 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to 67 // disk. 68 // 69 // // load the next 16K. 70 // mmapped_file->GrowAndRemapIfNecessary( 71 // /*new_file_offset=*/16 * 1024, /*new_mmap_size=*/16 * 1024); 72 // mmapped_file->mutable_region()[10] = write_byte; 73 // mmapped_file.reset(); 74 75 #ifndef ICING_FILE_MEMORY_MAPPED_FILE_H_ 76 #define ICING_FILE_MEMORY_MAPPED_FILE_H_ 77 78 #include <unistd.h> 79 80 #include <algorithm> 81 #include <cstdint> 82 #include <memory> 83 #include <string> 84 #include <string_view> 85 86 #include "icing/text_classifier/lib3/utils/base/status.h" 87 #include "icing/text_classifier/lib3/utils/base/statusor.h" 88 #include "icing/file/filesystem.h" 89 90 namespace icing { 91 namespace lib { 92 93 class MemoryMappedFile { 94 public: system_page_size()95 static int64_t __attribute__((const)) system_page_size() { 96 static const int64_t page_size = 97 static_cast<int64_t>(sysconf(_SC_PAGE_SIZE)); 98 return page_size; 99 } 100 101 enum Strategy { 102 // Memory map a read-only file into a read-only memory region. 103 READ_ONLY, 104 105 // Memory map a read-write file into a writable memory region. Any changes 106 // made to the region are automatically flushed to the underlying file in 107 // the background. 108 READ_WRITE_AUTO_SYNC, 109 110 // Memory map a read-write file into a writable memory region. Changes made 111 // to this region will never be auto-synced to the underlying file. Unless 112 // the caller explicitly calls PersistToDisk(), all changes will be lost 113 // when the MemoryMappedFile is destroyed. 114 READ_WRITE_MANUAL_SYNC, 115 }; 116 117 // Absolute max file size, 16 GiB. 118 static constexpr int64_t kMaxFileSize = INT64_C(1) << 34; 119 120 // Default max file size, 1 MiB. 121 static constexpr int64_t kDefaultMaxFileSize = INT64_C(1) << 20; 122 123 // Creates a new MemoryMappedFile to read/write content to. 124 // 125 // filesystem : Object to make system level calls 126 // file_path : Full path of the file that needs to be memory-mapped. 127 // mmap_strategy : Strategy/optimizations to access the content. 128 // max_file_size : Maximum file size for MemoryMappedFile, default 129 // kDefaultMaxFileSize. 130 // 131 // Returns: 132 // A MemoryMappedFile instance on success 133 // OUT_OF_RANGE_ERROR if max_file_size is invalid 134 // INTERNAL_ERROR on I/O error 135 static libtextclassifier3::StatusOr<MemoryMappedFile> Create( 136 const Filesystem& filesystem, std::string_view file_path, 137 Strategy mmap_strategy, int64_t max_file_size = kDefaultMaxFileSize); 138 139 // Creates a new MemoryMappedFile to read/write content to. It remaps when 140 // creating the instance, but doesn't check or grow the actual file size, so 141 // the caller should call GrowAndRemapIfNecessary before accessing region. 142 // 143 // filesystem : Object to make system level calls 144 // file_path : Full path of the file that needs to be memory-mapped. 145 // mmap_strategy : Strategy/optimizations to access the content. 146 // max_file_size : Maximum file size for MemoryMappedFile. 147 // pre_mapping_file_offset : The offset of the file to be memory mapped. 148 // pre_mapping_mmap_size : mmap size for pre-mapping. 149 // 150 // Returns: 151 // A MemoryMappedFile instance on success 152 // OUT_OF_RANGE_ERROR if max_file_size, file_offset, or mmap_size is invalid 153 // INTERNAL_ERROR on I/O error 154 static libtextclassifier3::StatusOr<MemoryMappedFile> Create( 155 const Filesystem& filesystem, std::string_view file_path, 156 Strategy mmap_strategy, int64_t max_file_size, 157 int64_t pre_mapping_file_offset, int64_t pre_mapping_mmap_size); 158 159 // Delete copy constructor and assignment operator. 160 MemoryMappedFile(const MemoryMappedFile& other) = delete; 161 MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete; 162 163 MemoryMappedFile(MemoryMappedFile&& other); 164 MemoryMappedFile& operator=(MemoryMappedFile&& other); 165 166 // Frees any region that is still memory-mapped region. 167 ~MemoryMappedFile(); 168 169 // TODO(b/247671531): migrate all callers to use GrowAndRemapIfNecessary and 170 // deprecate this API. 171 // 172 // Memory-map the newly specified region within the file specified by 173 // file_offset and mmap_size. Unmaps any previously mmapped region. 174 // It doesn't handle the underlying file growth. 175 // 176 // Returns any encountered IO error. 177 libtextclassifier3::Status Remap(int64_t file_offset, int64_t mmap_size); 178 179 // Attempt to memory-map the newly specified region within the file specified 180 // by new_file_offset and new_mmap_size. It handles mmap and file growth 181 // intelligently. 182 // - Compute least file size needed according to new_file_offset and 183 // new_mmap_size, and compare with the current file size. If requiring file 184 // growth, then grow the underlying file (Write) or return error if 185 // strategy_ is READ_ONLY. 186 // - If new_file_offset is different from the current file_offset_ or 187 // new_mmap_size is greater than the current mmap_size_, then memory-map 188 // the newly specified region and unmap any previously mmapped region. 189 // 190 // This API is useful for file growth since it grows the underlying file 191 // internally and handles remapping intelligently. By pre-mmapping a large 192 // memory, we only need to grow the underlying file (Write) without remapping 193 // in each round of growth, which significantly reduces the cost of system 194 // call and memory paging after remap. 195 // 196 // Returns: 197 // OK on success 198 // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid 199 // Any error from GrowFileSize() and RemapImpl() 200 libtextclassifier3::Status GrowAndRemapIfNecessary(int64_t new_file_offset, 201 int64_t new_mmap_size); 202 203 // unmap and free-up the region that has currently been memory mapped. 204 void Unmap(); 205 206 // Explicitly persist any changes made to the currently mapped region to disk. 207 // 208 // NOTE: This is only valid if Strategy=READ_WRITE was used. 209 // 210 // Returns: 211 // OK on success 212 // INTERNAL on I/O error 213 // FAILED_PRECONDITION if Strategy is not implemented 214 libtextclassifier3::Status PersistToDisk(); 215 216 // Advise the system to help it optimize the memory-mapped region for 217 // upcoming read/write operations. 218 // 219 // NOTE: See linux documentation of madvise() for additional details. 220 enum AccessPattern { 221 // Future memory access are expected to be in random order. So, readhead 222 // will have limited impact on latency. 223 ACCESS_RANDOM, 224 225 // Future memory access are expected to be sequential. So, some readahead 226 // can greatly improve latency. 227 ACCESS_SEQUENTIAL, 228 229 // Future memory access is expected to be high-volume and all over the file. 230 // So, preloading the whole region into memory would greatly improve 231 // latency. 232 ACCESS_ALL, 233 234 // Future memory access is expected to be rare. So, it is best to free up 235 // as much of preloaded memory as possible. 236 ACCESS_NONE, 237 }; 238 libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern); 239 strategy()240 Strategy strategy() const { return strategy_; } 241 max_file_size()242 int64_t max_file_size() const { return max_file_size_; } 243 244 // Accessors to the memory-mapped region. Returns null if nothing is mapped. region()245 const char* region() const { 246 return reinterpret_cast<const char*>(mmap_result_) + alignment_adjustment_; 247 } mutable_region()248 char* mutable_region() { 249 return reinterpret_cast<char*>(mmap_result_) + alignment_adjustment_; 250 } 251 file_offset()252 int64_t file_offset() const { return file_offset_; } 253 254 // TODO(b/247671531): remove this API after migrating all callers to use 255 // GrowAndRemapIfNecessary. region_size()256 int64_t region_size() const { return mmap_size_; } 257 258 // The size that is safe for the client to read/write. This is only valid for 259 // callers that use GrowAndRemapIfNecessary. available_size()260 int64_t available_size() const { 261 return std::min(mmap_size_, 262 std::max(INT64_C(0), file_size_ - file_offset_)); 263 } 264 265 private: 266 explicit MemoryMappedFile(const Filesystem& filesystem, 267 std::string_view file_path, Strategy mmap_strategy, 268 int64_t max_file_size, int64_t file_size); 269 270 // Grow the underlying file to new_file_size. 271 // Note: it is possible that Write() (implemented in the file system call 272 // library) grows the underlying file partially and returns error due to 273 // failures, so the cached file_size_ may contain out-of-date value, but it is 274 // still guaranteed that file_size_ is always smaller or equal to the actual 275 // file size. In the next round of growing: 276 // - If new_file_size is not greater than file_size_, then we're still 277 // confident that the actual file size is large enough and therefore skip 278 // the grow process. 279 // - If new_file_size is greater than file_size_, then we will invoke the 280 // system call to sync the actual file size. At this moment, file_size_ is 281 // the actual file size and therefore we can grow the underlying file size 282 // correctly. 283 // 284 // Returns: 285 // OK on success 286 // FAILED_PRECONDITION_ERROR if requiring file growth and strategy_ is 287 // READ_ONLY 288 // OUT_OF_RANGE_ERROR if new_mmap_size exceeds max_file_size_ 289 // INTERNAL_ERROR on I/O error 290 libtextclassifier3::Status GrowFileSize(int64_t new_file_size); 291 292 // Memory-map the newly specified region within the file specified by 293 // new_file_offset and new_mmap_size. Unmaps any previously mmapped region. 294 // It doesn't handle the underlying file growth. 295 // 296 // Returns: 297 // OK on success 298 // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid 299 // INTERNAL_ERROR on I/O error 300 libtextclassifier3::Status RemapImpl(int64_t new_file_offset, 301 int64_t new_mmap_size); 302 303 // Swaps the contents of this with other. 304 void Swap(MemoryMappedFile* other); 305 adjusted_offset()306 int64_t adjusted_offset() const { 307 return file_offset_ - alignment_adjustment_; 308 } 309 adjusted_mmap_size()310 int64_t adjusted_mmap_size() const { 311 return alignment_adjustment_ + mmap_size_; 312 } 313 314 // Cached constructor params. 315 const Filesystem* filesystem_; 316 std::string file_path_; 317 Strategy strategy_; 318 319 // Raw file related fields: 320 // - max_file_size_ 321 // - file_size_ 322 323 // Max file size for MemoryMappedFile. It should not exceed the absolute max 324 // size of memory mapped file (kMaxFileSize). It is only used in 325 // GrowAndRemapIfNecessary(), the new API that handles underlying file growth 326 // internally and remaps intelligently. 327 // 328 // Note: max_file_size_ will be specified in runtime and the caller should 329 // make sure its value is correct and reasonable. 330 int64_t max_file_size_; 331 332 // Cached file size to avoid calling system call too frequently. It is only 333 // used in GrowAndRemapIfNecessary(), the new API that handles underlying file 334 // growth internally and remaps intelligently. 335 // 336 // Note: it is guaranteed that file_size_ is smaller or equal to the actual 337 // file size as long as the underlying file hasn't been truncated or deleted 338 // externally. See GrowFileSize() for more details. 339 int64_t file_size_; 340 341 // Memory mapped related fields: 342 // - mmap_result_ 343 // - file_offset_ 344 // - alignment_adjustment_ 345 // - mmap_size_ 346 347 // Raw pointer (or error) returned by calls to mmap(). 348 void* mmap_result_; 349 350 // Offset within the file at which the current memory-mapped region starts. 351 int64_t file_offset_; 352 353 // Size that is currently memory-mapped. 354 // Note that the mmapped size can be larger than the underlying file size. We 355 // can reduce remapping by pre-mmapping a large memory and grow the file size 356 // later. See GrowAndRemapIfNecessary(). 357 int64_t mmap_size_; 358 359 // The difference between file_offset_ and the actual adjusted (aligned) 360 // offset. 361 // Since mmap requires the offset to be a multiple of system page size, we 362 // have to align file_offset_ to the last multiple of system page size. 363 int64_t alignment_adjustment_; 364 365 // E.g. system_page_size = 5, RemapImpl(/*new_file_offset=*/8, mmap_size) 366 // 367 // File layout: xxxxx xxxxx xxxxx xxxxx xxxxx xx 368 // file_offset_: 8 369 // adjusted_offset(): 5 370 // region()/mutable_region(): | 371 // mmap_result_: | 372 // 373 // alignment_adjustment_: file_offset_ - adjusted_offset() 374 // mmap_size_: mmap_size 375 // region_size(): mmap_size_ 376 // available_size(): std::min(mmap_size_, 377 // std::max(0, file_size_ - file_offset_)) 378 // region_range: [file_offset_, file_offset + mmap_size) 379 // adjusted_mmap_size(): alignment_adjustment_ + mmap_size_ 380 // adjusted_mmap_range: [alignment_offset, file_offset + mmap_size) 381 }; 382 383 } // namespace lib 384 } // namespace icing 385 386 #endif // ICING_FILE_MEMORY_MAPPED_FILE_H_ 387