• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Allows memory-mapping a full file or a specific region within the file.
16 // It also supports efficiently switching the region being mapped.
17 //
18 // Note on Performance:
19 // It supports different optimized strategies for common patterns on both
20 // read-only and read-write files. This includes using read-ahead buffers for
21 // faster reads as well as background-sync vs manual-sync of changes to disk.
22 // For more details, see comments at MemoryMappedFile::Strategy.
23 //
24 // ** Usage 1: pre-mmap large memory and grow the underlying file internally **
25 //
26 // // Create MemoryMappedFile instance.
27 // ICING_ASSIGN_OR_RETURN(
28 //     std::unique_ptr<MemoryMappedFile> mmapped_file,
29 //     MemoryMappedFile::Create(filesystem, "/file.pb",
30 //                              READ_WRITE_AUTO_SYNC,
31 //                              max_file_size,
32 //                              /*pre_mapping_file_offset=*/0,
33 //                              /*pre_mapping_mmap_size=*/1024 * 1024));
34 //
35 // // Found that we need 4K bytes for the file and mmapped region.
36 // mmapped_file->GrowAndRemapIfNecessary(
37 //     /*new_file_offset=*/0, /*new_mmap_size=*/4 * 1024);
38 // char read_byte = mmapped_file->region()[4000];
39 // mmapped_file->mutable_region()[4001] = write_byte;
40 //
41 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
42 // disk.
43 //
44 // // Found that we need 2048 * 1024 bytes for the file and mmapped region.
45 // mmapped_file->GrowAndRemapIfNecessary(
46 //     /*new_file_offset=*/0, /*new_mmap_size=*/2048 * 1024);
47 // mmapped_file->mutable_region()[2000 * 1024] = write_byte;
48 // mmapped_file.reset();
49 //
50 // ** Usage 2: load by segments **
51 //
52 // ICING_ASSIGN_OR_RETURN(
53 //     std::unique_ptr<MemoryMappedFile> mmapped_file,
54 //     MemoryMappedFile::Create(filesystem, "/file.pb",
55 //                              READ_WRITE_AUTO_SYNC,
56 //                              max_file_size,
57 //                              /*pre_mapping_file_offset=*/0,
58 //                              /*pre_mapping_mmap_size=*/16 * 1024));
59 //
60 // // load the first 16K.
61 // mmapped_file->GrowAndRemapIfNecessary(
62 //     /*new_file_offset=*/0, /*new_mmap_size=*/16 * 1024);
63 // char read_byte = mmapped_file->region()[100];
64 // mmapped_file->mutable_region()[10] = write_byte;
65 //
66 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
67 // disk.
68 //
69 // // load the next 16K.
70 // mmapped_file->GrowAndRemapIfNecessary(
71 //     /*new_file_offset=*/16 * 1024, /*new_mmap_size=*/16 * 1024);
72 // mmapped_file->mutable_region()[10] = write_byte;
73 // mmapped_file.reset();
74 
75 #ifndef ICING_FILE_MEMORY_MAPPED_FILE_H_
76 #define ICING_FILE_MEMORY_MAPPED_FILE_H_
77 
78 #include <unistd.h>
79 
80 #include <algorithm>
81 #include <cstdint>
82 #include <memory>
83 #include <string>
84 #include <string_view>
85 
86 #include "icing/text_classifier/lib3/utils/base/status.h"
87 #include "icing/text_classifier/lib3/utils/base/statusor.h"
88 #include "icing/file/filesystem.h"
89 
90 namespace icing {
91 namespace lib {
92 
93 class MemoryMappedFile {
94  public:
system_page_size()95   static int64_t __attribute__((const)) system_page_size() {
96     static const int64_t page_size =
97         static_cast<int64_t>(sysconf(_SC_PAGE_SIZE));
98     return page_size;
99   }
100 
101   enum Strategy {
102     // Memory map a read-only file into a read-only memory region.
103     READ_ONLY,
104 
105     // Memory map a read-write file into a writable memory region. Any changes
106     // made to the region are automatically flushed to the underlying file in
107     // the background.
108     READ_WRITE_AUTO_SYNC,
109 
110     // Memory map a read-write file into a writable memory region. Changes made
111     // to this region will never be auto-synced to the underlying file. Unless
112     // the caller explicitly calls PersistToDisk(), all changes will be lost
113     // when the MemoryMappedFile is destroyed.
114     READ_WRITE_MANUAL_SYNC,
115   };
116 
117   // Absolute max file size, 16 GiB.
118   static constexpr int64_t kMaxFileSize = INT64_C(1) << 34;
119 
120   // Default max file size, 1 MiB.
121   static constexpr int64_t kDefaultMaxFileSize = INT64_C(1) << 20;
122 
123   // Creates a new MemoryMappedFile to read/write content to.
124   //
125   // filesystem    : Object to make system level calls
126   // file_path     : Full path of the file that needs to be memory-mapped.
127   // mmap_strategy : Strategy/optimizations to access the content.
128   // max_file_size : Maximum file size for MemoryMappedFile, default
129   //                 kDefaultMaxFileSize.
130   //
131   // Returns:
132   //   A MemoryMappedFile instance on success
133   //   OUT_OF_RANGE_ERROR if max_file_size is invalid
134   //   INTERNAL_ERROR on I/O error
135   static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
136       const Filesystem& filesystem, std::string_view file_path,
137       Strategy mmap_strategy, int64_t max_file_size = kDefaultMaxFileSize);
138 
139   // Creates a new MemoryMappedFile to read/write content to. It remaps when
140   // creating the instance, but doesn't check or grow the actual file size, so
141   // the caller should call GrowAndRemapIfNecessary before accessing region.
142   //
143   // filesystem    : Object to make system level calls
144   // file_path     : Full path of the file that needs to be memory-mapped.
145   // mmap_strategy : Strategy/optimizations to access the content.
146   // max_file_size : Maximum file size for MemoryMappedFile.
147   // pre_mapping_file_offset : The offset of the file to be memory mapped.
148   // pre_mapping_mmap_size   : mmap size for pre-mapping.
149   //
150   // Returns:
151   //   A MemoryMappedFile instance on success
152   //   OUT_OF_RANGE_ERROR if max_file_size, file_offset, or mmap_size is invalid
153   //   INTERNAL_ERROR on I/O error
154   static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
155       const Filesystem& filesystem, std::string_view file_path,
156       Strategy mmap_strategy, int64_t max_file_size,
157       int64_t pre_mapping_file_offset, int64_t pre_mapping_mmap_size);
158 
159   // Delete copy constructor and assignment operator.
160   MemoryMappedFile(const MemoryMappedFile& other) = delete;
161   MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete;
162 
163   MemoryMappedFile(MemoryMappedFile&& other);
164   MemoryMappedFile& operator=(MemoryMappedFile&& other);
165 
166   // Frees any region that is still memory-mapped region.
167   ~MemoryMappedFile();
168 
169   // TODO(b/247671531): migrate all callers to use GrowAndRemapIfNecessary and
170   // deprecate this API.
171   //
172   // Memory-map the newly specified region within the file specified by
173   // file_offset and mmap_size. Unmaps any previously mmapped region.
174   // It doesn't handle the underlying file growth.
175   //
176   // Returns any encountered IO error.
177   libtextclassifier3::Status Remap(int64_t file_offset, int64_t mmap_size);
178 
179   // Attempt to memory-map the newly specified region within the file specified
180   // by new_file_offset and new_mmap_size. It handles mmap and file growth
181   // intelligently.
182   // - Compute least file size needed according to new_file_offset and
183   //   new_mmap_size, and compare with the current file size. If requiring file
184   //   growth, then grow the underlying file (Write) or return error if
185   //   strategy_ is READ_ONLY.
186   // - If new_file_offset is different from the current file_offset_ or
187   //   new_mmap_size is greater than the current mmap_size_, then memory-map
188   //   the newly specified region and unmap any previously mmapped region.
189   //
190   // This API is useful for file growth since it grows the underlying file
191   // internally and handles remapping intelligently. By pre-mmapping a large
192   // memory, we only need to grow the underlying file (Write) without remapping
193   // in each round of growth, which significantly reduces the cost of system
194   // call and memory paging after remap.
195   //
196   // Returns:
197   //   OK on success
198   //   OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
199   //   Any error from GrowFileSize() and RemapImpl()
200   libtextclassifier3::Status GrowAndRemapIfNecessary(int64_t new_file_offset,
201                                                      int64_t new_mmap_size);
202 
203   // unmap and free-up the region that has currently been memory mapped.
204   void Unmap();
205 
206   // Explicitly persist any changes made to the currently mapped region to disk.
207   //
208   // NOTE: This is only valid if Strategy=READ_WRITE was used.
209   //
210   // Returns:
211   //   OK on success
212   //   INTERNAL on I/O error
213   //   FAILED_PRECONDITION if Strategy is not implemented
214   libtextclassifier3::Status PersistToDisk();
215 
216   // Advise the system to help it optimize the memory-mapped region for
217   // upcoming read/write operations.
218   //
219   // NOTE: See linux documentation of madvise() for additional details.
220   enum AccessPattern {
221     // Future memory access are expected to be in random order. So, readhead
222     // will have limited impact on latency.
223     ACCESS_RANDOM,
224 
225     // Future memory access are expected to be sequential. So, some readahead
226     // can greatly improve latency.
227     ACCESS_SEQUENTIAL,
228 
229     // Future memory access is expected to be high-volume and all over the file.
230     // So, preloading the whole region into memory would greatly improve
231     // latency.
232     ACCESS_ALL,
233 
234     // Future memory access is expected to be rare. So, it is best to free up
235     // as much of preloaded memory as possible.
236     ACCESS_NONE,
237   };
238   libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern);
239 
strategy()240   Strategy strategy() const { return strategy_; }
241 
max_file_size()242   int64_t max_file_size() const { return max_file_size_; }
243 
244   // Accessors to the memory-mapped region. Returns null if nothing is mapped.
region()245   const char* region() const {
246     return reinterpret_cast<const char*>(mmap_result_) + alignment_adjustment_;
247   }
mutable_region()248   char* mutable_region() {
249     return reinterpret_cast<char*>(mmap_result_) + alignment_adjustment_;
250   }
251 
file_offset()252   int64_t file_offset() const { return file_offset_; }
253 
254   // TODO(b/247671531): remove this API after migrating all callers to use
255   //                    GrowAndRemapIfNecessary.
region_size()256   int64_t region_size() const { return mmap_size_; }
257 
258   // The size that is safe for the client to read/write. This is only valid for
259   // callers that use GrowAndRemapIfNecessary.
available_size()260   int64_t available_size() const {
261     return std::min(mmap_size_,
262                     std::max(INT64_C(0), file_size_ - file_offset_));
263   }
264 
265  private:
266   explicit MemoryMappedFile(const Filesystem& filesystem,
267                             std::string_view file_path, Strategy mmap_strategy,
268                             int64_t max_file_size, int64_t file_size);
269 
270   // Grow the underlying file to new_file_size.
271   // Note: it is possible that Write() (implemented in the file system call
272   // library) grows the underlying file partially and returns error due to
273   // failures, so the cached file_size_ may contain out-of-date value, but it is
274   // still guaranteed that file_size_ is always smaller or equal to the actual
275   // file size. In the next round of growing:
276   // - If new_file_size is not greater than file_size_, then we're still
277   //   confident that the actual file size is large enough and therefore skip
278   //   the grow process.
279   // - If new_file_size is greater than file_size_, then we will invoke the
280   //   system call to sync the actual file size. At this moment, file_size_ is
281   //   the actual file size and therefore we can grow the underlying file size
282   //   correctly.
283   //
284   // Returns:
285   //   OK on success
286   //   FAILED_PRECONDITION_ERROR if requiring file growth and strategy_ is
287   //                             READ_ONLY
288   //   OUT_OF_RANGE_ERROR if new_mmap_size exceeds max_file_size_
289   //   INTERNAL_ERROR on I/O error
290   libtextclassifier3::Status GrowFileSize(int64_t new_file_size);
291 
292   // Memory-map the newly specified region within the file specified by
293   // new_file_offset and new_mmap_size. Unmaps any previously mmapped region.
294   // It doesn't handle the underlying file growth.
295   //
296   // Returns:
297   //   OK on success
298   //   OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
299   //   INTERNAL_ERROR on I/O error
300   libtextclassifier3::Status RemapImpl(int64_t new_file_offset,
301                                        int64_t new_mmap_size);
302 
303   // Swaps the contents of this with other.
304   void Swap(MemoryMappedFile* other);
305 
adjusted_offset()306   int64_t adjusted_offset() const {
307     return file_offset_ - alignment_adjustment_;
308   }
309 
adjusted_mmap_size()310   int64_t adjusted_mmap_size() const {
311     return alignment_adjustment_ + mmap_size_;
312   }
313 
314   // Cached constructor params.
315   const Filesystem* filesystem_;
316   std::string file_path_;
317   Strategy strategy_;
318 
319   // Raw file related fields:
320   // - max_file_size_
321   // - file_size_
322 
323   // Max file size for MemoryMappedFile. It should not exceed the absolute max
324   // size of memory mapped file (kMaxFileSize). It is only used in
325   // GrowAndRemapIfNecessary(), the new API that handles underlying file growth
326   // internally and remaps intelligently.
327   //
328   // Note: max_file_size_ will be specified in runtime and the caller should
329   // make sure its value is correct and reasonable.
330   int64_t max_file_size_;
331 
332   // Cached file size to avoid calling system call too frequently. It is only
333   // used in GrowAndRemapIfNecessary(), the new API that handles underlying file
334   // growth internally and remaps intelligently.
335   //
336   // Note: it is guaranteed that file_size_ is smaller or equal to the actual
337   // file size as long as the underlying file hasn't been truncated or deleted
338   // externally. See GrowFileSize() for more details.
339   int64_t file_size_;
340 
341   // Memory mapped related fields:
342   // - mmap_result_
343   // - file_offset_
344   // - alignment_adjustment_
345   // - mmap_size_
346 
347   // Raw pointer (or error) returned by calls to mmap().
348   void* mmap_result_;
349 
350   // Offset within the file at which the current memory-mapped region starts.
351   int64_t file_offset_;
352 
353   // Size that is currently memory-mapped.
354   // Note that the mmapped size can be larger than the underlying file size. We
355   // can reduce remapping by pre-mmapping a large memory and grow the file size
356   // later. See GrowAndRemapIfNecessary().
357   int64_t mmap_size_;
358 
359   // The difference between file_offset_ and the actual adjusted (aligned)
360   // offset.
361   // Since mmap requires the offset to be a multiple of system page size, we
362   // have to align file_offset_ to the last multiple of system page size.
363   int64_t alignment_adjustment_;
364 
365   // E.g. system_page_size = 5, RemapImpl(/*new_file_offset=*/8, mmap_size)
366   //
367   // File layout:               xxxxx xxxxx xxxxx xxxxx xxxxx xx
368   // file_offset_:                       8
369   // adjusted_offset():               5
370   // region()/mutable_region():          |
371   // mmap_result_:                    |
372   //
373   // alignment_adjustment_: file_offset_ - adjusted_offset()
374   // mmap_size_:            mmap_size
375   // region_size():         mmap_size_
376   // available_size():      std::min(mmap_size_,
377   //                                 std::max(0, file_size_ - file_offset_))
378   // region_range:          [file_offset_, file_offset + mmap_size)
379   // adjusted_mmap_size():  alignment_adjustment_ + mmap_size_
380   // adjusted_mmap_range:   [alignment_offset, file_offset + mmap_size)
381 };
382 
383 }  // namespace lib
384 }  // namespace icing
385 
386 #endif  // ICING_FILE_MEMORY_MAPPED_FILE_H_
387