• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #![deny(missing_docs)]
6 
7 use std::fs::File;
8 use std::ops::Range;
9 use std::os::unix::fs::FileExt;
10 
11 use base::error;
12 use base::linux::MemoryMappingUnix;
13 use base::MemoryMapping;
14 use base::MemoryMappingBuilder;
15 use base::MmapError;
16 use base::Protection;
17 use base::VolatileMemory;
18 use base::VolatileMemoryError;
19 use base::VolatileSlice;
20 use thiserror::Error as ThisError;
21 
22 use crate::pagesize::bytes_to_pages;
23 use crate::pagesize::is_page_aligned;
24 use crate::pagesize::pages_to_bytes;
25 
26 pub type Result<T> = std::result::Result<T, Error>;
27 
28 // On 4KB page size system, guest memory must be less than 8 TiB which is reasonable assumption.
29 const MAX_PAGE_IDX: usize = (1 << 31) - 2;
30 
31 #[derive(ThisError, Debug)]
32 pub enum Error {
33     #[error("failed to io: {0}")]
34     Io(#[from] std::io::Error),
35     #[error("failed to mmap operation ({0}): {1}")]
36     Mmap(&'static str, MmapError),
37     #[error("failed to volatile memory operation: {0}")]
38     VolatileMemory(#[from] VolatileMemoryError),
39     #[error("index is out of range")]
40     OutOfRange,
41     #[error("data size is invalid")]
42     InvalidSize,
43     #[error("index is invalid")]
44     InvalidIndex,
45 }
46 
47 /// u32 to pack the state of a page on the file.
48 ///
49 /// * MSB: Whether the page on file is freed. (1: freed, 2: allocated)
50 /// * lower 31 bits:
51 ///   * The corresponding page index if the file page is allocated.
52 ///   * The file page index + 1 of next freed file page if the file page is freed. Zero means it is
53 ///     the last page in the free list.
54 #[derive(Debug)]
55 struct FilePageState(u32);
56 
57 impl FilePageState {
58     const FREED_BIT_MASK: u32 = 1 << 31;
59 
freed_state(first_freed_page: Option<usize>) -> Self60     fn freed_state(first_freed_page: Option<usize>) -> Self {
61         Self(
62             Self::FREED_BIT_MASK
63                 | first_freed_page
64                     .map(|idx_file| idx_file as u32 + 1)
65                     .unwrap_or(0),
66         )
67     }
68 
allocated_state(idx_page: usize) -> Option<Self>69     fn allocated_state(idx_page: usize) -> Option<Self> {
70         if idx_page <= MAX_PAGE_IDX {
71             Some(Self(idx_page as u32))
72         } else {
73             // idx_page is invalid.
74             None
75         }
76     }
77 
is_freed(&self) -> bool78     fn is_freed(&self) -> bool {
79         self.0 & Self::FREED_BIT_MASK != 0
80     }
81 
82     /// This is valid only if the page is freed.
next_file_freed_idx(&self) -> Option<Option<usize>>83     fn next_file_freed_idx(&self) -> Option<Option<usize>> {
84         if self.is_freed() {
85             let next_idx_file = !Self::FREED_BIT_MASK & self.0;
86             if next_idx_file == 0 {
87                 Some(None)
88             } else {
89                 Some(Some(next_idx_file as usize - 1))
90             }
91         } else {
92             None
93         }
94     }
95 
96     /// This is valid only if the page is allocated.
idx_page(&self) -> Option<usize>97     fn idx_page(&self) -> Option<usize> {
98         if self.is_freed() {
99             // The file page is freed.
100             None
101         } else {
102             Some(self.0 as usize)
103         }
104     }
105 }
106 
107 #[derive(Debug)]
108 struct FilePageStates {
109     /// Freed pages on the swap file are managed in a free list. `first_freed_idx_file` points to
110     /// the first page index in the list.
111     first_idx_file_freed: Option<usize>,
112     states: Vec<FilePageState>,
113 }
114 
115 impl FilePageStates {
new(capacity: usize) -> Self116     fn new(capacity: usize) -> Self {
117         FilePageStates {
118             first_idx_file_freed: None,
119             states: Vec::with_capacity(capacity),
120         }
121     }
122 
len(&self) -> usize123     fn len(&self) -> usize {
124         self.states.len()
125     }
126 
127     /// Free a page on swap file.
free(&mut self, idx_file: usize)128     fn free(&mut self, idx_file: usize) {
129         self.states[idx_file] = FilePageState::freed_state(self.first_idx_file_freed);
130         self.first_idx_file_freed = Some(idx_file);
131     }
132 
133     /// Allocates a file page on the swap file.
134     ///
135     /// This returns the index of the allocated file page.
136     ///
137     /// This reuses freed file pages first. If the free list is empty, this allocates new pages in
138     /// the file.
allocate(&mut self, idx_page: usize) -> usize139     fn allocate(&mut self, idx_page: usize) -> usize {
140         if let Some(idx_file_freed) = self.first_idx_file_freed {
141             // TODO(kawasin): Collect consecutive freed pages in the free list to reduce number of
142             // writes.
143             let Some(next_idx_file_freed) = self.states[idx_file_freed].next_file_freed_idx()
144             else {
145                 unreachable!("pages in free list must be freed pages")
146             };
147             let Some(state) = FilePageState::allocated_state(idx_page) else {
148                 unreachable!("idx_page must be less than MAX_PAGE_IDX");
149             };
150             self.states[idx_file_freed] = state;
151             self.first_idx_file_freed = next_idx_file_freed;
152 
153             idx_file_freed
154         } else {
155             // The free list is empty. Allocate new pages.
156             let head_idx_file = self.states.len();
157             let Some(state) = FilePageState::allocated_state(idx_page) else {
158                 unreachable!("idx must be less than MAX_PAGE_IDX");
159             };
160             self.states.push(state);
161             head_idx_file
162         }
163     }
164 
165     /// Find the index range of file pages that are all present.
166     ///
167     /// This returns the pair of range of file page indexes and the index of the corresponding first
168     /// page.
169     ///
170     /// Returns `None` if no pages after `idx_file` are present.
171     ///
172     /// # Arguments
173     ///
174     /// * `idx_file` - The first index to start searching from.
175     /// * `page_states` - The page states
176     /// * `max_pages` - The maximum number of pages to search.
177     /// * `consecutive` - If true, the pages must have consecutive idx_page values.
find_present_pages_range( &self, idx_file: usize, page_states: &[PageState], max_pages: usize, consecutive: bool, ) -> Option<(Range<usize>, usize)>178     fn find_present_pages_range(
179         &self,
180         idx_file: usize,
181         page_states: &[PageState],
182         max_pages: usize,
183         consecutive: bool,
184     ) -> Option<(Range<usize>, usize)> {
185         let next_head_idx_offset = self.states[idx_file..].iter().position(|state| {
186             !state.is_freed()
187                 && page_states[state
188                     .idx_page()
189                     .unwrap_or_else(|| unreachable!("the page is not freed"))]
190                 .is_present()
191         });
192         let Some(next_head_idx_offset) = next_head_idx_offset else {
193             return None;
194         };
195         let idx_file = idx_file + next_head_idx_offset;
196 
197         let Some(head_idx_page) = self.states[idx_file].idx_page() else {
198             unreachable!("the file page must not be freed");
199         };
200 
201         let mut pages = 1;
202 
203         if max_pages > 1 {
204             for state in self.states[idx_file + 1..].iter() {
205                 if state.is_freed() {
206                     break;
207                 } else {
208                     let Some(idx_page) = state.idx_page() else {
209                         unreachable!("allocated page must have idx_page");
210                     };
211                     if !page_states[idx_page].is_present()
212                         || (consecutive && idx_page != head_idx_page + pages)
213                     {
214                         break;
215                     }
216                 }
217 
218                 pages += 1;
219                 if pages >= max_pages {
220                     break;
221                 }
222             }
223         }
224 
225         Some((idx_file..idx_file + pages, head_idx_page))
226     }
227 }
228 
229 /// u32 to pack the state of a guest memory page.
230 ///
231 /// * If the page is not on the swap file, the value is zero.
232 /// * MSB: Whether the page is stale or not. (0: stale, 1: present).
233 /// * lower 31 bits: The corresponding file page index + 1. Never be zero.
234 #[derive(Clone, Debug)]
235 struct PageState(u32);
236 
237 impl PageState {
238     const IDX_FILE_MASK: u32 = (1 << 31) - 1;
239     const PRESENT_BIT_MASK: u32 = 1 << 31;
240 
is_none(&self) -> bool241     fn is_none(&self) -> bool {
242         self.0 == 0
243     }
244 
idx_file(&self) -> Option<usize>245     fn idx_file(&self) -> Option<usize> {
246         if self.0 != 0 {
247             Some((self.0 & Self::IDX_FILE_MASK) as usize - 1)
248         } else {
249             None
250         }
251     }
252 
is_present(&self) -> bool253     fn is_present(&self) -> bool {
254         self.0 & Self::PRESENT_BIT_MASK != 0
255     }
256 
update(&mut self, idx_file: usize)257     fn update(&mut self, idx_file: usize) {
258         self.0 = (idx_file as u32 + 1) | Self::PRESENT_BIT_MASK;
259     }
260 
mark_as_present(&mut self)261     fn mark_as_present(&mut self) {
262         self.0 |= Self::PRESENT_BIT_MASK;
263     }
264 
clear(&mut self)265     fn clear(&mut self) {
266         self.0 &= !Self::PRESENT_BIT_MASK;
267     }
268 
free(&mut self)269     fn free(&mut self) {
270         self.0 = 0;
271     }
272 }
273 
274 /// [SwapFile] stores active pages in a memory region.
275 ///
276 /// This shares the swap file with other regions and creates mmap corresponding range in the file.
277 ///
278 /// TODO(kawasin): The file structure is straightforward and is not optimized yet.
279 /// Each page in the file corresponds to the page in the memory region.
280 #[derive(Debug)]
281 pub struct SwapFile<'a> {
282     file: &'a File,
283     file_mmap: MemoryMapping,
284     page_states: Vec<PageState>,
285     file_states: FilePageStates,
286     // All the data pages before this index are mlock(2)ed.
287     cursor_mlock: usize,
288     min_possible_present_idx_file: usize,
289 }
290 
291 impl<'a> SwapFile<'a> {
292     /// Creates an initialized [SwapFile] for a memory region.
293     ///
294     /// The all pages are marked as empty at first time.
295     ///
296     /// # Arguments
297     ///
298     /// * `file` - The swap file.
299     /// * `num_of_pages` - The number of pages in the region.
new(file: &'a File, num_of_pages: usize) -> Result<Self>300     pub fn new(file: &'a File, num_of_pages: usize) -> Result<Self> {
301         if num_of_pages > MAX_PAGE_IDX {
302             return Err(Error::InvalidSize);
303         }
304         let file_mmap = MemoryMappingBuilder::new(pages_to_bytes(num_of_pages))
305             .from_file(file)
306             .protection(Protection::read())
307             .build()
308             .map_err(|e| Error::Mmap("create", e))?;
309         Ok(Self {
310             file,
311             file_mmap,
312             page_states: vec![PageState(0); num_of_pages],
313             file_states: FilePageStates::new(num_of_pages),
314             cursor_mlock: 0,
315             min_possible_present_idx_file: 0,
316         })
317     }
318 
319     /// Returns a content of the page corresponding to the index if it is present.
320     ///
321     /// Returns [Option::None] if no content in the file.
322     ///
323     /// Returns [Error::OutOfRange] if the `idx` is out of range.
324     ///
325     /// # Arguments
326     ///
327     /// * `idx_page` - the index of the page from the head of the pages.
page_content( &self, idx_page: usize, allow_cleared: bool, ) -> Result<Option<VolatileSlice>>328     pub fn page_content(
329         &self,
330         idx_page: usize,
331         allow_cleared: bool,
332     ) -> Result<Option<VolatileSlice>> {
333         let state = self.page_states.get(idx_page).ok_or(Error::OutOfRange)?;
334         if !state.is_none() && (allow_cleared || state.is_present()) {
335             let Some(idx_file) = state.idx_file() else {
336                 unreachable!("the page is not none");
337             };
338             return match self
339                 .file_mmap
340                 .get_slice(pages_to_bytes(idx_file), pages_to_bytes(1))
341             {
342                 Ok(slice) => Ok(Some(slice)),
343                 Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange),
344                 Err(e) => Err(e.into()),
345             };
346         }
347         Ok(None)
348     }
349 
350     /// Start readahead the swap file into the page cache from the head.
351     ///
352     /// This also `mlock2(2)` the pages not to be dropped again after populated. This does not block
353     /// the caller thread by I/O wait because:
354     ///
355     /// * `mlock2(2)` is executed with `MLOCK_ONFAULT`.
356     /// * `MADV_WILLNEED` is the same as `readahead(2)` which triggers the readahead background.
357     ///   * However Linux has a bug that `readahead(2)` (and also `MADV_WILLNEED`) may block due to
358     ///     reading the filesystem metadata.
359     ///
360     /// This returns the number of consecutive pages which are newly mlock(2)ed. Returning `0` means
361     /// that there is no more data to be mlock(2)ed in this file.
362     ///
363     /// The caller must track the number of pages mlock(2)ed not to mlock(2) more pages than
364     /// `RLIMIT_MEMLOCK` if it does not have `CAP_IPC_LOCK`.
365     ///
366     /// # Arguments
367     ///
368     /// * `max_pages` - The maximum number of pages to be mlock(2)ed at once.
lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize>369     pub fn lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize> {
370         if let Some((idx_file_range, _)) = self.file_states.find_present_pages_range(
371             self.cursor_mlock,
372             &self.page_states,
373             max_pages,
374             false,
375         ) {
376             let pages = idx_file_range.end - idx_file_range.start;
377             let mem_offset = pages_to_bytes(idx_file_range.start);
378             let size_in_bytes = pages_to_bytes(pages);
379             self.file_mmap
380                 .lock_on_fault(mem_offset, size_in_bytes)
381                 .map_err(|e| Error::Mmap("mlock", e))?;
382             self.file_mmap
383                 .async_prefetch(mem_offset, size_in_bytes)
384                 .map_err(|e| Error::Mmap("madvise willneed", e))?;
385             self.cursor_mlock = idx_file_range.end;
386             Ok(pages)
387         } else {
388             self.cursor_mlock = self.file_states.len();
389             Ok(0)
390         }
391     }
392 
393     /// Mark the pages in the file corresponding to the index as cleared.
394     ///
395     /// The contents on the swap file are preserved and will be reused by
396     /// `SwapFile::mark_as_present()` and reduce disk I/O.
397     ///
398     /// If the pages are mlock(2)ed, unlock them before MADV_DONTNEED. This returns the number of
399     /// pages munlock(2)ed.
400     ///
401     /// # Arguments
402     ///
403     /// * `idx_page_range` - The indices of consecutive pages to be cleared. All the pages must be
404     ///   present and consecutive in the compacted file.
clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>405     pub fn clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> {
406         let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range.clone())?;
407 
408         for state in &mut self.page_states[idx_page_range] {
409             state.clear();
410         }
411 
412         let offset = pages_to_bytes(idx_file_range.start);
413         let munlocked_size = if idx_file_range.start < self.cursor_mlock {
414             // idx_page_range is validated at clear_range() and self.cursor_mlock is within the
415             // mmap.
416             let pages = idx_file_range.end.min(self.cursor_mlock) - idx_file_range.start;
417             // munlock(2) first because MADV_DONTNEED fails for mlock(2)ed pages.
418             self.file_mmap
419                 .unlock(offset, pages_to_bytes(pages))
420                 .map_err(|e| Error::Mmap("munlock", e))?;
421             pages
422         } else {
423             0
424         };
425         // offset and size are validated at clear_range().
426         let size = pages_to_bytes(idx_file_range.end - idx_file_range.start);
427         // The page cache is cleared without writing pages back to file even if they are dirty.
428         // The disk contents which may not be the latest are kept for later trim optimization.
429         self.file_mmap
430             .drop_page_cache(offset, size)
431             .map_err(|e| Error::Mmap("madvise dontneed", e))?;
432         Ok(munlocked_size)
433     }
434 
435     /// Free the pages corresponding to the given range in the file.
436     ///
437     /// If the pages are mlock(2)ed, unlock them. This returns the number of pages munlock(2)ed.
438     ///
439     /// # Arguments
440     ///
441     /// * `idx_page_range` - The indices of consecutive pages to be freed. This may contains
442     ///   non-present pages.
free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>443     pub fn free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> {
444         if idx_page_range.end > self.page_states.len() {
445             return Err(Error::OutOfRange);
446         }
447         let mut mlocked_pages = 0;
448         let mut mlock_range: Option<Range<usize>> = None;
449         for state in &mut self.page_states[idx_page_range] {
450             if !state.is_none() {
451                 let Some(idx_file) = state.idx_file() else {
452                     unreachable!("the page is not none.");
453                 };
454                 self.file_states.free(idx_file);
455 
456                 if idx_file < self.cursor_mlock && state.is_present() {
457                     mlocked_pages += 1;
458                     if let Some(range) = mlock_range.as_mut() {
459                         if idx_file + 1 == range.start {
460                             range.start = idx_file;
461                         } else if idx_file == range.end {
462                             range.end += 1;
463                         } else {
464                             self.file_mmap
465                                 .unlock(
466                                     pages_to_bytes(range.start),
467                                     pages_to_bytes(range.end - range.start),
468                                 )
469                                 .map_err(|e| Error::Mmap("munlock", e))?;
470                             mlock_range = Some(idx_file..idx_file + 1);
471                         }
472                     } else {
473                         mlock_range = Some(idx_file..idx_file + 1);
474                     }
475                 }
476             }
477             state.free();
478         }
479         if let Some(mlock_range) = mlock_range {
480             self.file_mmap
481                 .unlock(
482                     pages_to_bytes(mlock_range.start),
483                     pages_to_bytes(mlock_range.end - mlock_range.start),
484                 )
485                 .map_err(|e| Error::Mmap("munlock", e))?;
486         }
487 
488         Ok(mlocked_pages)
489     }
490 
491     /// munlock(2) pages if there are mlock(2)ed pages in the mmap and reset the internal cursor for
492     /// mlock(2) tracking.
clear_mlock(&mut self) -> Result<()>493     pub fn clear_mlock(&mut self) -> Result<()> {
494         if self.cursor_mlock > 0 {
495             // cursor_mlock is not `0` only when disabling vmm-swap is aborted by overriding
496             // vmm-swap enable. munlock(2)ing the whole possible pages is not a problem because this
497             // is not a hot path.
498             self.file_mmap
499                 .unlock(0, pages_to_bytes(self.cursor_mlock))
500                 .map_err(|e| Error::Mmap("munlock", e))?;
501         }
502         self.cursor_mlock = 0;
503         Ok(())
504     }
505 
506     /// Mark the page as present on the file.
507     ///
508     /// The content on the swap file on previous `SwapFile::write_to_file()` is reused.
509     ///
510     /// # Arguments
511     ///
512     /// * `idx` - the index of the page from the head of the pages.
mark_as_present(&mut self, idx_page: usize) -> Result<()>513     pub fn mark_as_present(&mut self, idx_page: usize) -> Result<()> {
514         let state = self
515             .page_states
516             .get_mut(idx_page)
517             .ok_or(Error::OutOfRange)?;
518         if !state.is_none() && !state.is_present() {
519             state.mark_as_present();
520             let Some(idx_file) = state.idx_file() else {
521                 unreachable!("the page is not none.");
522             };
523             self.min_possible_present_idx_file =
524                 std::cmp::min(idx_file, self.min_possible_present_idx_file);
525             Ok(())
526         } else {
527             Err(Error::InvalidIndex)
528         }
529     }
530 
531     /// Writes the contents to the swap file.
532     ///
533     /// # Arguments
534     ///
535     /// * `idx_page` - the index of the head page of the content from the head of the pages.
536     /// * `mem_slice` - the page content(s). this can be more than 1 page. the size must align with
537     ///   the pagesize.
write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()>538     pub fn write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()> {
539         // validate
540         if !is_page_aligned(mem_slice.len()) {
541             // mem_slice size must align with page size.
542             return Err(Error::InvalidSize);
543         }
544         let num_pages = bytes_to_pages(mem_slice.len());
545         if idx_page + num_pages > self.page_states.len() {
546             return Err(Error::OutOfRange);
547         }
548 
549         // Setting 0 is faster than setting exact index by complex conditions.
550         self.min_possible_present_idx_file = 0;
551 
552         for cur in idx_page..idx_page + num_pages {
553             let state = &mut self.page_states[cur];
554             if state.is_none() {
555                 let idx_file = self.file_states.allocate(cur);
556                 state.update(idx_file);
557             } else {
558                 state.mark_as_present();
559             }
560         }
561 
562         let mut pending_idx_file = None;
563         let mut pending_pages = 0;
564         let mut mem_slice = mem_slice;
565         for state in self.page_states[idx_page..idx_page + num_pages].iter() {
566             let Some(idx_file) = state.idx_file() else {
567                 unreachable!("pages must be allocated");
568             };
569             if let Some(pending_idx_file) = pending_idx_file {
570                 if idx_file == pending_idx_file + pending_pages {
571                     pending_pages += 1;
572                     continue;
573                 }
574                 let size = pages_to_bytes(pending_pages);
575                 // Write with pwrite(2) syscall instead of copying contents to mmap because write
576                 // syscall is more explicit for kernel how many pages are going to be written while
577                 // mmap only knows each page to be written on a page fault basis.
578                 self.file
579                     .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?;
580                 mem_slice = &mem_slice[size..];
581             }
582             pending_idx_file = Some(idx_file);
583             pending_pages = 1;
584         }
585         if let Some(pending_idx_file) = pending_idx_file {
586             let size = pages_to_bytes(pending_pages);
587             self.file
588                 .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?;
589             mem_slice = &mem_slice[size..];
590         }
591         if !mem_slice.is_empty() {
592             unreachable!("mem_slice must be all consumed");
593         }
594 
595         Ok(())
596     }
597 
598     /// Returns the first range of indices of consecutive pages present in the swap file.
599     ///
600     /// # Arguments
601     ///
602     /// * `max_pages` - the max size of the returned chunk even if the chunk of consecutive present
603     ///   pages is longer than this.
first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>>604     pub fn first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>> {
605         if let Some((idx_file_range, head_idx_page)) = self.file_states.find_present_pages_range(
606             self.min_possible_present_idx_file,
607             &self.page_states,
608             max_pages,
609             true,
610         ) {
611             self.min_possible_present_idx_file = idx_file_range.start;
612             let idx_page_range =
613                 head_idx_page..head_idx_page + idx_file_range.end - idx_file_range.start;
614             Some(idx_page_range)
615         } else {
616             self.min_possible_present_idx_file = self.file_states.len();
617             None
618         }
619     }
620 
621     /// Returns the [VolatileSlice] corresponding to the indices regardless of whether the pages are
622     /// present or not.
623     ///
624     /// If the range is out of the region, this returns [Error::OutOfRange].
625     ///
626     /// # Arguments
627     ///
628     /// * `idx_page_range` - the indices of the pages. All the pages must be present and consecutive
629     ///   in the compacted file.
get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice>630     pub fn get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice> {
631         let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range)?;
632         match self.file_mmap.get_slice(
633             pages_to_bytes(idx_file_range.start),
634             pages_to_bytes(idx_file_range.end - idx_file_range.start),
635         ) {
636             Ok(slice) => Ok(slice),
637             Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange),
638             Err(e) => Err(e.into()),
639         }
640     }
641 
642     /// Returns the count of present pages in the swap file.
present_pages(&self) -> usize643     pub fn present_pages(&self) -> usize {
644         self.page_states
645             .iter()
646             .map(|state| state.is_present() as usize)
647             .sum()
648     }
649 
650     /// Convert the index range to corresponding index range of compacted file.
651     ///
652     /// This validates that the `idx_page_range` satisfy:
653     ///
654     /// * `idx_page_range` has corresponding page in the file.
655     /// * corresponding index range in the file is consecutive.
convert_idx_page_range_to_idx_file( &self, idx_page_range: Range<usize>, ) -> Result<Range<usize>>656     fn convert_idx_page_range_to_idx_file(
657         &self,
658         idx_page_range: Range<usize>,
659     ) -> Result<Range<usize>> {
660         // Validate that the idx_range is for cosecutive present file pages.
661         let state = self
662             .page_states
663             .get(idx_page_range.start)
664             .ok_or(Error::OutOfRange)?;
665         if state.is_none() || !state.is_present() {
666             return Err(Error::InvalidIndex);
667         }
668         let Some(head_idx_file) = state.idx_file() else {
669             unreachable!("the page is not none.");
670         };
671         let mut idx_file = head_idx_file;
672         for idx in idx_page_range.start + 1..idx_page_range.end {
673             let state = self.page_states.get(idx).ok_or(Error::OutOfRange)?;
674             idx_file += 1;
675             if state.is_none()
676                 || !state.is_present()
677                 || state
678                     .idx_file()
679                     .unwrap_or_else(|| unreachable!("the page is not none."))
680                     != idx_file
681             {
682                 return Err(Error::InvalidIndex);
683             }
684         }
685         let idx_file_range =
686             head_idx_file..head_idx_file + idx_page_range.end - idx_page_range.start;
687         Ok(idx_file_range)
688     }
689 }
690 
691 #[cfg(test)]
692 mod tests {
693     use std::slice;
694 
695     use base::pagesize;
696     use base::sys::FileDataIterator;
697 
698     use super::*;
699 
700     #[test]
new_success()701     fn new_success() {
702         let file = tempfile::tempfile().unwrap();
703 
704         assert_eq!(SwapFile::new(&file, 200).is_ok(), true);
705     }
706 
707     #[test]
len()708     fn len() {
709         let file = tempfile::tempfile().unwrap();
710         let swap_file = SwapFile::new(&file, 200).unwrap();
711 
712         assert_eq!(swap_file.page_states.len(), 200);
713     }
714 
715     #[test]
page_content_default_is_none()716     fn page_content_default_is_none() {
717         let file = tempfile::tempfile().unwrap();
718         let swap_file = SwapFile::new(&file, 200).unwrap();
719 
720         assert_eq!(swap_file.page_content(0, false).unwrap().is_none(), true);
721     }
722 
723     #[test]
page_content_returns_content()724     fn page_content_returns_content() {
725         let file = tempfile::tempfile().unwrap();
726         let mut swap_file = SwapFile::new(&file, 200).unwrap();
727 
728         let data = &vec![1; pagesize()];
729         swap_file.write_to_file(0, data).unwrap();
730 
731         let page = swap_file.page_content(0, false).unwrap().unwrap();
732         // TODO(b/315998194): Add safety comment
733         #[allow(clippy::undocumented_unsafe_blocks)]
734         let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) };
735         assert_eq!(result, data);
736     }
737 
738     #[test]
page_content_out_of_range()739     fn page_content_out_of_range() {
740         let file = tempfile::tempfile().unwrap();
741         let swap_file = SwapFile::new(&file, 200).unwrap();
742 
743         assert_eq!(swap_file.page_content(199, false).is_ok(), true);
744         match swap_file.page_content(200, false) {
745             Err(Error::OutOfRange) => {}
746             _ => unreachable!("not out of range"),
747         }
748     }
749 
assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8])750     fn assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8]) {
751         let page = swap_file.page_content(idx, false).unwrap().unwrap();
752         // TODO(b/315998194): Add safety comment
753         #[allow(clippy::undocumented_unsafe_blocks)]
754         let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) };
755         assert_eq!(result, data);
756     }
757 
758     #[test]
write_to_file_swap_file()759     fn write_to_file_swap_file() {
760         let file = tempfile::tempfile().unwrap();
761         let mut swap_file = SwapFile::new(&file, 200).unwrap();
762 
763         let buf1 = &vec![1; pagesize()];
764         let buf2 = &vec![2; 2 * pagesize()];
765         swap_file.write_to_file(0, buf1).unwrap();
766         swap_file.write_to_file(2, buf2).unwrap();
767 
768         // page_content()
769         assert_page_content(&swap_file, 0, buf1);
770         assert_page_content(&swap_file, 2, &buf2[0..pagesize()]);
771         assert_page_content(&swap_file, 3, &buf2[pagesize()..2 * pagesize()]);
772     }
773 
774     #[test]
write_to_file_invalid_size()775     fn write_to_file_invalid_size() {
776         let file = tempfile::tempfile().unwrap();
777         let mut swap_file = SwapFile::new(&file, 200).unwrap();
778 
779         let buf = &vec![1; pagesize() + 1];
780         match swap_file.write_to_file(0, buf) {
781             Err(Error::InvalidSize) => {}
782             _ => unreachable!("not invalid size"),
783         };
784     }
785 
786     #[test]
write_to_file_out_of_range()787     fn write_to_file_out_of_range() {
788         let file = tempfile::tempfile().unwrap();
789         let mut swap_file = SwapFile::new(&file, 200).unwrap();
790 
791         let buf1 = &vec![1; pagesize()];
792         let buf2 = &vec![2; 2 * pagesize()];
793         match swap_file.write_to_file(200, buf1) {
794             Err(Error::OutOfRange) => {}
795             _ => unreachable!("not out of range"),
796         };
797         match swap_file.write_to_file(199, buf2) {
798             Err(Error::OutOfRange) => {}
799             _ => unreachable!("not out of range"),
800         };
801     }
802 
803     #[test]
write_to_file_overwrite()804     fn write_to_file_overwrite() {
805         let file = tempfile::tempfile().unwrap();
806         let mut swap_file = SwapFile::new(&file, 200).unwrap();
807 
808         swap_file.write_to_file(0, &vec![1; pagesize()]).unwrap();
809         swap_file
810             .write_to_file(2, &vec![2; 2 * pagesize()])
811             .unwrap();
812 
813         let mut buf = vec![0; 3 * pagesize()];
814         buf[..pagesize()].fill(3);
815         buf[pagesize()..2 * pagesize()].fill(4);
816         buf[2 * pagesize()..3 * pagesize()].fill(5);
817         swap_file.write_to_file(0, &buf).unwrap();
818 
819         assert_page_content(&swap_file, 0, &vec![3; pagesize()]);
820         assert_page_content(&swap_file, 1, &vec![4; pagesize()]);
821         assert_page_content(&swap_file, 2, &vec![5; pagesize()]);
822         assert_page_content(&swap_file, 3, &vec![2; pagesize()]);
823         assert!(swap_file.page_content(4, false).unwrap().is_none());
824 
825         let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
826             .collect::<std::result::Result<Vec<_>, _>>();
827         assert_eq!(data, Ok(vec![0..4 * pagesize() as u64]));
828 
829         buf[..pagesize()].fill(6);
830         buf[pagesize()..2 * pagesize()].fill(7);
831         buf[2 * pagesize()..3 * pagesize()].fill(8);
832         swap_file.write_to_file(2, &buf).unwrap();
833         assert_page_content(&swap_file, 0, &vec![3; pagesize()]);
834         assert_page_content(&swap_file, 1, &vec![4; pagesize()]);
835         assert_page_content(&swap_file, 2, &vec![6; pagesize()]);
836         assert_page_content(&swap_file, 3, &vec![7; pagesize()]);
837         assert_page_content(&swap_file, 4, &vec![8; pagesize()]);
838         assert!(swap_file.page_content(5, false).unwrap().is_none());
839 
840         let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
841             .collect::<std::result::Result<Vec<_>, _>>();
842         assert_eq!(data, Ok(vec![0..5 * pagesize() as u64]));
843     }
844 
845     #[test]
846     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
lock_and_start_populate()847     fn lock_and_start_populate() {
848         let file = tempfile::tempfile().unwrap();
849         let mut swap_file = SwapFile::new(&file, 200).unwrap();
850 
851         swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
852         swap_file
853             .write_to_file(3, &vec![1; 5 * pagesize()])
854             .unwrap();
855         swap_file.write_to_file(10, &vec![1; pagesize()]).unwrap();
856 
857         let mut locked_pages = 0;
858         loop {
859             let pages = swap_file.lock_and_async_prefetch(2).unwrap();
860             if pages == 0 {
861                 break;
862             }
863             assert!(pages <= 2);
864             locked_pages += pages;
865         }
866         assert_eq!(locked_pages, 7);
867     }
868 
869     #[test]
clear_range()870     fn clear_range() {
871         let file = tempfile::tempfile().unwrap();
872         let mut swap_file = SwapFile::new(&file, 200).unwrap();
873 
874         let data = &vec![1; pagesize()];
875         swap_file.write_to_file(0, data).unwrap();
876         swap_file.clear_range(0..1).unwrap();
877 
878         assert!(swap_file.page_content(0, false).unwrap().is_none());
879     }
880 
881     #[test]
882     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
clear_range_unlocked_pages()883     fn clear_range_unlocked_pages() {
884         let file = tempfile::tempfile().unwrap();
885         let mut swap_file = SwapFile::new(&file, 200).unwrap();
886 
887         swap_file
888             .write_to_file(1, &vec![1; 10 * pagesize()])
889             .unwrap();
890         // 1..6 is locked, 6..11 is not locked.
891         assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5);
892 
893         // locked pages only
894         assert_eq!(swap_file.clear_range(1..4).unwrap(), 3);
895         // locked pages + non-locked pages
896         assert_eq!(swap_file.clear_range(4..7).unwrap(), 2);
897         // non-locked pages
898         assert_eq!(swap_file.clear_range(10..11).unwrap(), 0);
899     }
900 
901     #[test]
clear_range_keep_on_disk()902     fn clear_range_keep_on_disk() {
903         let file = tempfile::tempfile().unwrap();
904         let mut swap_file = SwapFile::new(&file, 200).unwrap();
905 
906         let data = &vec![1; pagesize()];
907         swap_file.write_to_file(0, data).unwrap();
908         swap_file.clear_range(0..1).unwrap();
909 
910         let slice = swap_file.page_content(0, true).unwrap().unwrap();
911         // TODO(b/315998194): Add safety comment
912         #[allow(clippy::undocumented_unsafe_blocks)]
913         let slice = unsafe { slice::from_raw_parts(slice.as_ptr(), slice.size()) };
914         assert_eq!(slice, data);
915     }
916 
917     #[test]
clear_range_out_of_range()918     fn clear_range_out_of_range() {
919         let file = tempfile::tempfile().unwrap();
920         let mut swap_file = SwapFile::new(&file, 200).unwrap();
921         swap_file.write_to_file(199, &vec![0; pagesize()]).unwrap();
922 
923         match swap_file.clear_range(199..201) {
924             Err(Error::OutOfRange) => {}
925             _ => unreachable!("not out of range"),
926         };
927         assert!(swap_file.clear_range(199..200).is_ok());
928         match swap_file.clear_range(200..201) {
929             Err(Error::OutOfRange) => {}
930             _ => unreachable!("not out of range"),
931         };
932     }
933 
934     #[test]
free_range()935     fn free_range() {
936         let file = tempfile::tempfile().unwrap();
937         let mut swap_file = SwapFile::new(&file, 200).unwrap();
938 
939         let data = &vec![1; pagesize()];
940         swap_file.write_to_file(0, data).unwrap();
941         swap_file.free_range(0..1).unwrap();
942 
943         assert!(swap_file.page_content(0, false).unwrap().is_none());
944         assert!(swap_file.page_content(0, true).unwrap().is_none());
945     }
946 
947     #[test]
948     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
free_range_unlocked_pages()949     fn free_range_unlocked_pages() {
950         let file = tempfile::tempfile().unwrap();
951         let mut swap_file = SwapFile::new(&file, 200).unwrap();
952 
953         swap_file
954             .write_to_file(1, &vec![1; 10 * pagesize()])
955             .unwrap();
956         // 1..6 is locked, 6..11 is not locked.
957         assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5);
958 
959         // empty pages
960         assert_eq!(swap_file.free_range(0..1).unwrap(), 0);
961         // empty pages + locked pages
962         assert_eq!(swap_file.free_range(0..2).unwrap(), 1);
963         // locked pages only
964         assert_eq!(swap_file.free_range(2..4).unwrap(), 2);
965         // empty pages + locked pages + non-locked pages
966         assert_eq!(swap_file.free_range(3..7).unwrap(), 2);
967         // non-locked pages
968         assert_eq!(swap_file.free_range(10..11).unwrap(), 0);
969     }
970 
971     #[test]
free_range_out_of_range()972     fn free_range_out_of_range() {
973         let file = tempfile::tempfile().unwrap();
974         let mut swap_file = SwapFile::new(&file, 200).unwrap();
975 
976         assert_eq!(swap_file.free_range(199..200).is_ok(), true);
977         match swap_file.free_range(200..201) {
978             Err(Error::OutOfRange) => {}
979             _ => unreachable!("not out of range"),
980         };
981         match swap_file.free_range(199..201) {
982             Err(Error::OutOfRange) => {}
983             _ => unreachable!("not out of range"),
984         };
985     }
986 
987     #[test]
free_range_and_write()988     fn free_range_and_write() {
989         let file = tempfile::tempfile().unwrap();
990         let mut swap_file = SwapFile::new(&file, 200).unwrap();
991 
992         let data = &vec![1; 5 * pagesize()];
993         swap_file.write_to_file(0, data).unwrap();
994         swap_file.free_range(0..5).unwrap();
995 
996         swap_file
997             .write_to_file(0, &vec![2; 2 * pagesize()])
998             .unwrap();
999         swap_file
1000             .write_to_file(5, &vec![3; 4 * pagesize()])
1001             .unwrap();
1002 
1003         assert_page_content(&swap_file, 0, &vec![2; pagesize()]);
1004         assert_page_content(&swap_file, 1, &vec![2; pagesize()]);
1005         assert!(swap_file.page_content(2, true).unwrap().is_none());
1006         assert!(swap_file.page_content(3, true).unwrap().is_none());
1007         assert!(swap_file.page_content(4, true).unwrap().is_none());
1008         assert_page_content(&swap_file, 5, &vec![3; pagesize()]);
1009         assert_page_content(&swap_file, 6, &vec![3; pagesize()]);
1010         assert_page_content(&swap_file, 7, &vec![3; pagesize()]);
1011         assert_page_content(&swap_file, 8, &vec![3; pagesize()]);
1012         assert!(swap_file.page_content(9, true).unwrap().is_none());
1013 
1014         let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
1015             .collect::<std::result::Result<Vec<_>, _>>();
1016         assert_eq!(data, Ok(vec![0..6 * pagesize() as u64]));
1017     }
1018 
1019     #[test]
1020     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
clear_mlock()1021     fn clear_mlock() {
1022         let file = tempfile::tempfile().unwrap();
1023         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1024 
1025         swap_file
1026             .write_to_file(1, &vec![1; 10 * pagesize()])
1027             .unwrap();
1028         // success if there is no mlock.
1029         assert!(swap_file.clear_mlock().is_ok());
1030 
1031         assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10);
1032         // success if there is mlocked area.
1033         assert!(swap_file.clear_mlock().is_ok());
1034 
1035         // mlock area is cleared.
1036         assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10);
1037     }
1038 
1039     #[test]
first_data_range()1040     fn first_data_range() {
1041         let file = tempfile::tempfile().unwrap();
1042         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1043 
1044         swap_file
1045             .write_to_file(1, &vec![1; 2 * pagesize()])
1046             .unwrap();
1047         swap_file.write_to_file(3, &vec![2; pagesize()]).unwrap();
1048 
1049         assert_eq!(swap_file.first_data_range(200).unwrap(), 1..4);
1050         assert_eq!(swap_file.first_data_range(2).unwrap(), 1..3);
1051         assert_eq!(swap_file.first_data_range(1).unwrap(), 1..2);
1052         swap_file.clear_range(1..3).unwrap();
1053         assert_eq!(swap_file.first_data_range(2).unwrap(), 3..4);
1054         swap_file.clear_range(3..4).unwrap();
1055         assert!(swap_file.first_data_range(2).is_none());
1056     }
1057 
1058     #[test]
get_slice()1059     fn get_slice() {
1060         let file = tempfile::tempfile().unwrap();
1061         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1062 
1063         swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
1064         swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap();
1065 
1066         let slice = swap_file.get_slice(1..3).unwrap();
1067         assert_eq!(slice.size(), 2 * pagesize());
1068         let mut buf = vec![0u8; pagesize()];
1069         slice.get_slice(0, pagesize()).unwrap().copy_to(&mut buf);
1070         assert_eq!(buf, vec![1; pagesize()]);
1071 
1072         let mut buf = vec![0u8; pagesize()];
1073         slice
1074             .get_slice(pagesize(), pagesize())
1075             .unwrap()
1076             .copy_to(&mut buf);
1077         assert_eq!(buf, vec![2; pagesize()]);
1078     }
1079 
1080     #[test]
get_slice_out_of_range()1081     fn get_slice_out_of_range() {
1082         let file = tempfile::tempfile().unwrap();
1083         let swap_file = SwapFile::new(&file, 200).unwrap();
1084 
1085         match swap_file.get_slice(200..201) {
1086             Err(Error::OutOfRange) => {}
1087             other => {
1088                 unreachable!("unexpected result {:?}", other);
1089             }
1090         }
1091     }
1092 
1093     #[test]
present_pages()1094     fn present_pages() {
1095         let file = tempfile::tempfile().unwrap();
1096         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1097 
1098         swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
1099         swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap();
1100 
1101         assert_eq!(swap_file.present_pages(), 2);
1102     }
1103 }
1104