1 // Copyright 2022 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #![deny(missing_docs)] 6 7 use std::fs::File; 8 use std::ops::Range; 9 use std::os::unix::fs::FileExt; 10 11 use base::error; 12 use base::linux::MemoryMappingUnix; 13 use base::MemoryMapping; 14 use base::MemoryMappingBuilder; 15 use base::MmapError; 16 use base::Protection; 17 use base::VolatileMemory; 18 use base::VolatileMemoryError; 19 use base::VolatileSlice; 20 use thiserror::Error as ThisError; 21 22 use crate::pagesize::bytes_to_pages; 23 use crate::pagesize::is_page_aligned; 24 use crate::pagesize::pages_to_bytes; 25 26 pub type Result<T> = std::result::Result<T, Error>; 27 28 // On 4KB page size system, guest memory must be less than 8 TiB which is reasonable assumption. 29 const MAX_PAGE_IDX: usize = (1 << 31) - 2; 30 31 #[derive(ThisError, Debug)] 32 pub enum Error { 33 #[error("failed to io: {0}")] 34 Io(#[from] std::io::Error), 35 #[error("failed to mmap operation ({0}): {1}")] 36 Mmap(&'static str, MmapError), 37 #[error("failed to volatile memory operation: {0}")] 38 VolatileMemory(#[from] VolatileMemoryError), 39 #[error("index is out of range")] 40 OutOfRange, 41 #[error("data size is invalid")] 42 InvalidSize, 43 #[error("index is invalid")] 44 InvalidIndex, 45 } 46 47 /// u32 to pack the state of a page on the file. 48 /// 49 /// * MSB: Whether the page on file is freed. (1: freed, 2: allocated) 50 /// * lower 31 bits: 51 /// * The corresponding page index if the file page is allocated. 52 /// * The file page index + 1 of next freed file page if the file page is freed. Zero means it is 53 /// the last page in the free list. 54 #[derive(Debug)] 55 struct FilePageState(u32); 56 57 impl FilePageState { 58 const FREED_BIT_MASK: u32 = 1 << 31; 59 freed_state(first_freed_page: Option<usize>) -> Self60 fn freed_state(first_freed_page: Option<usize>) -> Self { 61 Self( 62 Self::FREED_BIT_MASK 63 | first_freed_page 64 .map(|idx_file| idx_file as u32 + 1) 65 .unwrap_or(0), 66 ) 67 } 68 allocated_state(idx_page: usize) -> Option<Self>69 fn allocated_state(idx_page: usize) -> Option<Self> { 70 if idx_page <= MAX_PAGE_IDX { 71 Some(Self(idx_page as u32)) 72 } else { 73 // idx_page is invalid. 74 None 75 } 76 } 77 is_freed(&self) -> bool78 fn is_freed(&self) -> bool { 79 self.0 & Self::FREED_BIT_MASK != 0 80 } 81 82 /// This is valid only if the page is freed. next_file_freed_idx(&self) -> Option<Option<usize>>83 fn next_file_freed_idx(&self) -> Option<Option<usize>> { 84 if self.is_freed() { 85 let next_idx_file = !Self::FREED_BIT_MASK & self.0; 86 if next_idx_file == 0 { 87 Some(None) 88 } else { 89 Some(Some(next_idx_file as usize - 1)) 90 } 91 } else { 92 None 93 } 94 } 95 96 /// This is valid only if the page is allocated. idx_page(&self) -> Option<usize>97 fn idx_page(&self) -> Option<usize> { 98 if self.is_freed() { 99 // The file page is freed. 100 None 101 } else { 102 Some(self.0 as usize) 103 } 104 } 105 } 106 107 #[derive(Debug)] 108 struct FilePageStates { 109 /// Freed pages on the swap file are managed in a free list. `first_freed_idx_file` points to 110 /// the first page index in the list. 111 first_idx_file_freed: Option<usize>, 112 states: Vec<FilePageState>, 113 } 114 115 impl FilePageStates { new(capacity: usize) -> Self116 fn new(capacity: usize) -> Self { 117 FilePageStates { 118 first_idx_file_freed: None, 119 states: Vec::with_capacity(capacity), 120 } 121 } 122 len(&self) -> usize123 fn len(&self) -> usize { 124 self.states.len() 125 } 126 127 /// Free a page on swap file. free(&mut self, idx_file: usize)128 fn free(&mut self, idx_file: usize) { 129 self.states[idx_file] = FilePageState::freed_state(self.first_idx_file_freed); 130 self.first_idx_file_freed = Some(idx_file); 131 } 132 133 /// Allocates a file page on the swap file. 134 /// 135 /// This returns the index of the allocated file page. 136 /// 137 /// This reuses freed file pages first. If the free list is empty, this allocates new pages in 138 /// the file. allocate(&mut self, idx_page: usize) -> usize139 fn allocate(&mut self, idx_page: usize) -> usize { 140 if let Some(idx_file_freed) = self.first_idx_file_freed { 141 // TODO(kawasin): Collect consecutive freed pages in the free list to reduce number of 142 // writes. 143 let Some(next_idx_file_freed) = self.states[idx_file_freed].next_file_freed_idx() 144 else { 145 unreachable!("pages in free list must be freed pages") 146 }; 147 let Some(state) = FilePageState::allocated_state(idx_page) else { 148 unreachable!("idx_page must be less than MAX_PAGE_IDX"); 149 }; 150 self.states[idx_file_freed] = state; 151 self.first_idx_file_freed = next_idx_file_freed; 152 153 idx_file_freed 154 } else { 155 // The free list is empty. Allocate new pages. 156 let head_idx_file = self.states.len(); 157 let Some(state) = FilePageState::allocated_state(idx_page) else { 158 unreachable!("idx must be less than MAX_PAGE_IDX"); 159 }; 160 self.states.push(state); 161 head_idx_file 162 } 163 } 164 165 /// Find the index range of file pages that are all present. 166 /// 167 /// This returns the pair of range of file page indexes and the index of the corresponding first 168 /// page. 169 /// 170 /// Returns `None` if no pages after `idx_file` are present. 171 /// 172 /// # Arguments 173 /// 174 /// * `idx_file` - The first index to start searching from. 175 /// * `page_states` - The page states 176 /// * `max_pages` - The maximum number of pages to search. 177 /// * `consecutive` - If true, the pages must have consecutive idx_page values. find_present_pages_range( &self, idx_file: usize, page_states: &[PageState], max_pages: usize, consecutive: bool, ) -> Option<(Range<usize>, usize)>178 fn find_present_pages_range( 179 &self, 180 idx_file: usize, 181 page_states: &[PageState], 182 max_pages: usize, 183 consecutive: bool, 184 ) -> Option<(Range<usize>, usize)> { 185 let next_head_idx_offset = self.states[idx_file..].iter().position(|state| { 186 !state.is_freed() 187 && page_states[state 188 .idx_page() 189 .unwrap_or_else(|| unreachable!("the page is not freed"))] 190 .is_present() 191 }); 192 let Some(next_head_idx_offset) = next_head_idx_offset else { 193 return None; 194 }; 195 let idx_file = idx_file + next_head_idx_offset; 196 197 let Some(head_idx_page) = self.states[idx_file].idx_page() else { 198 unreachable!("the file page must not be freed"); 199 }; 200 201 let mut pages = 1; 202 203 if max_pages > 1 { 204 for state in self.states[idx_file + 1..].iter() { 205 if state.is_freed() { 206 break; 207 } else { 208 let Some(idx_page) = state.idx_page() else { 209 unreachable!("allocated page must have idx_page"); 210 }; 211 if !page_states[idx_page].is_present() 212 || (consecutive && idx_page != head_idx_page + pages) 213 { 214 break; 215 } 216 } 217 218 pages += 1; 219 if pages >= max_pages { 220 break; 221 } 222 } 223 } 224 225 Some((idx_file..idx_file + pages, head_idx_page)) 226 } 227 } 228 229 /// u32 to pack the state of a guest memory page. 230 /// 231 /// * If the page is not on the swap file, the value is zero. 232 /// * MSB: Whether the page is stale or not. (0: stale, 1: present). 233 /// * lower 31 bits: The corresponding file page index + 1. Never be zero. 234 #[derive(Clone, Debug)] 235 struct PageState(u32); 236 237 impl PageState { 238 const IDX_FILE_MASK: u32 = (1 << 31) - 1; 239 const PRESENT_BIT_MASK: u32 = 1 << 31; 240 is_none(&self) -> bool241 fn is_none(&self) -> bool { 242 self.0 == 0 243 } 244 idx_file(&self) -> Option<usize>245 fn idx_file(&self) -> Option<usize> { 246 if self.0 != 0 { 247 Some((self.0 & Self::IDX_FILE_MASK) as usize - 1) 248 } else { 249 None 250 } 251 } 252 is_present(&self) -> bool253 fn is_present(&self) -> bool { 254 self.0 & Self::PRESENT_BIT_MASK != 0 255 } 256 update(&mut self, idx_file: usize)257 fn update(&mut self, idx_file: usize) { 258 self.0 = (idx_file as u32 + 1) | Self::PRESENT_BIT_MASK; 259 } 260 mark_as_present(&mut self)261 fn mark_as_present(&mut self) { 262 self.0 |= Self::PRESENT_BIT_MASK; 263 } 264 clear(&mut self)265 fn clear(&mut self) { 266 self.0 &= !Self::PRESENT_BIT_MASK; 267 } 268 free(&mut self)269 fn free(&mut self) { 270 self.0 = 0; 271 } 272 } 273 274 /// [SwapFile] stores active pages in a memory region. 275 /// 276 /// This shares the swap file with other regions and creates mmap corresponding range in the file. 277 /// 278 /// TODO(kawasin): The file structure is straightforward and is not optimized yet. 279 /// Each page in the file corresponds to the page in the memory region. 280 #[derive(Debug)] 281 pub struct SwapFile<'a> { 282 file: &'a File, 283 file_mmap: MemoryMapping, 284 page_states: Vec<PageState>, 285 file_states: FilePageStates, 286 // All the data pages before this index are mlock(2)ed. 287 cursor_mlock: usize, 288 min_possible_present_idx_file: usize, 289 } 290 291 impl<'a> SwapFile<'a> { 292 /// Creates an initialized [SwapFile] for a memory region. 293 /// 294 /// The all pages are marked as empty at first time. 295 /// 296 /// # Arguments 297 /// 298 /// * `file` - The swap file. 299 /// * `num_of_pages` - The number of pages in the region. new(file: &'a File, num_of_pages: usize) -> Result<Self>300 pub fn new(file: &'a File, num_of_pages: usize) -> Result<Self> { 301 if num_of_pages > MAX_PAGE_IDX { 302 return Err(Error::InvalidSize); 303 } 304 let file_mmap = MemoryMappingBuilder::new(pages_to_bytes(num_of_pages)) 305 .from_file(file) 306 .protection(Protection::read()) 307 .build() 308 .map_err(|e| Error::Mmap("create", e))?; 309 Ok(Self { 310 file, 311 file_mmap, 312 page_states: vec![PageState(0); num_of_pages], 313 file_states: FilePageStates::new(num_of_pages), 314 cursor_mlock: 0, 315 min_possible_present_idx_file: 0, 316 }) 317 } 318 319 /// Returns a content of the page corresponding to the index if it is present. 320 /// 321 /// Returns [Option::None] if no content in the file. 322 /// 323 /// Returns [Error::OutOfRange] if the `idx` is out of range. 324 /// 325 /// # Arguments 326 /// 327 /// * `idx_page` - the index of the page from the head of the pages. page_content( &self, idx_page: usize, allow_cleared: bool, ) -> Result<Option<VolatileSlice>>328 pub fn page_content( 329 &self, 330 idx_page: usize, 331 allow_cleared: bool, 332 ) -> Result<Option<VolatileSlice>> { 333 let state = self.page_states.get(idx_page).ok_or(Error::OutOfRange)?; 334 if !state.is_none() && (allow_cleared || state.is_present()) { 335 let Some(idx_file) = state.idx_file() else { 336 unreachable!("the page is not none"); 337 }; 338 return match self 339 .file_mmap 340 .get_slice(pages_to_bytes(idx_file), pages_to_bytes(1)) 341 { 342 Ok(slice) => Ok(Some(slice)), 343 Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange), 344 Err(e) => Err(e.into()), 345 }; 346 } 347 Ok(None) 348 } 349 350 /// Start readahead the swap file into the page cache from the head. 351 /// 352 /// This also `mlock2(2)` the pages not to be dropped again after populated. This does not block 353 /// the caller thread by I/O wait because: 354 /// 355 /// * `mlock2(2)` is executed with `MLOCK_ONFAULT`. 356 /// * `MADV_WILLNEED` is the same as `readahead(2)` which triggers the readahead background. 357 /// * However Linux has a bug that `readahead(2)` (and also `MADV_WILLNEED`) may block due to 358 /// reading the filesystem metadata. 359 /// 360 /// This returns the number of consecutive pages which are newly mlock(2)ed. Returning `0` means 361 /// that there is no more data to be mlock(2)ed in this file. 362 /// 363 /// The caller must track the number of pages mlock(2)ed not to mlock(2) more pages than 364 /// `RLIMIT_MEMLOCK` if it does not have `CAP_IPC_LOCK`. 365 /// 366 /// # Arguments 367 /// 368 /// * `max_pages` - The maximum number of pages to be mlock(2)ed at once. lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize>369 pub fn lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize> { 370 if let Some((idx_file_range, _)) = self.file_states.find_present_pages_range( 371 self.cursor_mlock, 372 &self.page_states, 373 max_pages, 374 false, 375 ) { 376 let pages = idx_file_range.end - idx_file_range.start; 377 let mem_offset = pages_to_bytes(idx_file_range.start); 378 let size_in_bytes = pages_to_bytes(pages); 379 self.file_mmap 380 .lock_on_fault(mem_offset, size_in_bytes) 381 .map_err(|e| Error::Mmap("mlock", e))?; 382 self.file_mmap 383 .async_prefetch(mem_offset, size_in_bytes) 384 .map_err(|e| Error::Mmap("madvise willneed", e))?; 385 self.cursor_mlock = idx_file_range.end; 386 Ok(pages) 387 } else { 388 self.cursor_mlock = self.file_states.len(); 389 Ok(0) 390 } 391 } 392 393 /// Mark the pages in the file corresponding to the index as cleared. 394 /// 395 /// The contents on the swap file are preserved and will be reused by 396 /// `SwapFile::mark_as_present()` and reduce disk I/O. 397 /// 398 /// If the pages are mlock(2)ed, unlock them before MADV_DONTNEED. This returns the number of 399 /// pages munlock(2)ed. 400 /// 401 /// # Arguments 402 /// 403 /// * `idx_page_range` - The indices of consecutive pages to be cleared. All the pages must be 404 /// present and consecutive in the compacted file. clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>405 pub fn clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> { 406 let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range.clone())?; 407 408 for state in &mut self.page_states[idx_page_range] { 409 state.clear(); 410 } 411 412 let offset = pages_to_bytes(idx_file_range.start); 413 let munlocked_size = if idx_file_range.start < self.cursor_mlock { 414 // idx_page_range is validated at clear_range() and self.cursor_mlock is within the 415 // mmap. 416 let pages = idx_file_range.end.min(self.cursor_mlock) - idx_file_range.start; 417 // munlock(2) first because MADV_DONTNEED fails for mlock(2)ed pages. 418 self.file_mmap 419 .unlock(offset, pages_to_bytes(pages)) 420 .map_err(|e| Error::Mmap("munlock", e))?; 421 pages 422 } else { 423 0 424 }; 425 // offset and size are validated at clear_range(). 426 let size = pages_to_bytes(idx_file_range.end - idx_file_range.start); 427 // The page cache is cleared without writing pages back to file even if they are dirty. 428 // The disk contents which may not be the latest are kept for later trim optimization. 429 self.file_mmap 430 .drop_page_cache(offset, size) 431 .map_err(|e| Error::Mmap("madvise dontneed", e))?; 432 Ok(munlocked_size) 433 } 434 435 /// Free the pages corresponding to the given range in the file. 436 /// 437 /// If the pages are mlock(2)ed, unlock them. This returns the number of pages munlock(2)ed. 438 /// 439 /// # Arguments 440 /// 441 /// * `idx_page_range` - The indices of consecutive pages to be freed. This may contains 442 /// non-present pages. free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>443 pub fn free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> { 444 if idx_page_range.end > self.page_states.len() { 445 return Err(Error::OutOfRange); 446 } 447 let mut mlocked_pages = 0; 448 let mut mlock_range: Option<Range<usize>> = None; 449 for state in &mut self.page_states[idx_page_range] { 450 if !state.is_none() { 451 let Some(idx_file) = state.idx_file() else { 452 unreachable!("the page is not none."); 453 }; 454 self.file_states.free(idx_file); 455 456 if idx_file < self.cursor_mlock && state.is_present() { 457 mlocked_pages += 1; 458 if let Some(range) = mlock_range.as_mut() { 459 if idx_file + 1 == range.start { 460 range.start = idx_file; 461 } else if idx_file == range.end { 462 range.end += 1; 463 } else { 464 self.file_mmap 465 .unlock( 466 pages_to_bytes(range.start), 467 pages_to_bytes(range.end - range.start), 468 ) 469 .map_err(|e| Error::Mmap("munlock", e))?; 470 mlock_range = Some(idx_file..idx_file + 1); 471 } 472 } else { 473 mlock_range = Some(idx_file..idx_file + 1); 474 } 475 } 476 } 477 state.free(); 478 } 479 if let Some(mlock_range) = mlock_range { 480 self.file_mmap 481 .unlock( 482 pages_to_bytes(mlock_range.start), 483 pages_to_bytes(mlock_range.end - mlock_range.start), 484 ) 485 .map_err(|e| Error::Mmap("munlock", e))?; 486 } 487 488 Ok(mlocked_pages) 489 } 490 491 /// munlock(2) pages if there are mlock(2)ed pages in the mmap and reset the internal cursor for 492 /// mlock(2) tracking. clear_mlock(&mut self) -> Result<()>493 pub fn clear_mlock(&mut self) -> Result<()> { 494 if self.cursor_mlock > 0 { 495 // cursor_mlock is not `0` only when disabling vmm-swap is aborted by overriding 496 // vmm-swap enable. munlock(2)ing the whole possible pages is not a problem because this 497 // is not a hot path. 498 self.file_mmap 499 .unlock(0, pages_to_bytes(self.cursor_mlock)) 500 .map_err(|e| Error::Mmap("munlock", e))?; 501 } 502 self.cursor_mlock = 0; 503 Ok(()) 504 } 505 506 /// Mark the page as present on the file. 507 /// 508 /// The content on the swap file on previous `SwapFile::write_to_file()` is reused. 509 /// 510 /// # Arguments 511 /// 512 /// * `idx` - the index of the page from the head of the pages. mark_as_present(&mut self, idx_page: usize) -> Result<()>513 pub fn mark_as_present(&mut self, idx_page: usize) -> Result<()> { 514 let state = self 515 .page_states 516 .get_mut(idx_page) 517 .ok_or(Error::OutOfRange)?; 518 if !state.is_none() && !state.is_present() { 519 state.mark_as_present(); 520 let Some(idx_file) = state.idx_file() else { 521 unreachable!("the page is not none."); 522 }; 523 self.min_possible_present_idx_file = 524 std::cmp::min(idx_file, self.min_possible_present_idx_file); 525 Ok(()) 526 } else { 527 Err(Error::InvalidIndex) 528 } 529 } 530 531 /// Writes the contents to the swap file. 532 /// 533 /// # Arguments 534 /// 535 /// * `idx_page` - the index of the head page of the content from the head of the pages. 536 /// * `mem_slice` - the page content(s). this can be more than 1 page. the size must align with 537 /// the pagesize. write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()>538 pub fn write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()> { 539 // validate 540 if !is_page_aligned(mem_slice.len()) { 541 // mem_slice size must align with page size. 542 return Err(Error::InvalidSize); 543 } 544 let num_pages = bytes_to_pages(mem_slice.len()); 545 if idx_page + num_pages > self.page_states.len() { 546 return Err(Error::OutOfRange); 547 } 548 549 // Setting 0 is faster than setting exact index by complex conditions. 550 self.min_possible_present_idx_file = 0; 551 552 for cur in idx_page..idx_page + num_pages { 553 let state = &mut self.page_states[cur]; 554 if state.is_none() { 555 let idx_file = self.file_states.allocate(cur); 556 state.update(idx_file); 557 } else { 558 state.mark_as_present(); 559 } 560 } 561 562 let mut pending_idx_file = None; 563 let mut pending_pages = 0; 564 let mut mem_slice = mem_slice; 565 for state in self.page_states[idx_page..idx_page + num_pages].iter() { 566 let Some(idx_file) = state.idx_file() else { 567 unreachable!("pages must be allocated"); 568 }; 569 if let Some(pending_idx_file) = pending_idx_file { 570 if idx_file == pending_idx_file + pending_pages { 571 pending_pages += 1; 572 continue; 573 } 574 let size = pages_to_bytes(pending_pages); 575 // Write with pwrite(2) syscall instead of copying contents to mmap because write 576 // syscall is more explicit for kernel how many pages are going to be written while 577 // mmap only knows each page to be written on a page fault basis. 578 self.file 579 .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?; 580 mem_slice = &mem_slice[size..]; 581 } 582 pending_idx_file = Some(idx_file); 583 pending_pages = 1; 584 } 585 if let Some(pending_idx_file) = pending_idx_file { 586 let size = pages_to_bytes(pending_pages); 587 self.file 588 .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?; 589 mem_slice = &mem_slice[size..]; 590 } 591 if !mem_slice.is_empty() { 592 unreachable!("mem_slice must be all consumed"); 593 } 594 595 Ok(()) 596 } 597 598 /// Returns the first range of indices of consecutive pages present in the swap file. 599 /// 600 /// # Arguments 601 /// 602 /// * `max_pages` - the max size of the returned chunk even if the chunk of consecutive present 603 /// pages is longer than this. first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>>604 pub fn first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>> { 605 if let Some((idx_file_range, head_idx_page)) = self.file_states.find_present_pages_range( 606 self.min_possible_present_idx_file, 607 &self.page_states, 608 max_pages, 609 true, 610 ) { 611 self.min_possible_present_idx_file = idx_file_range.start; 612 let idx_page_range = 613 head_idx_page..head_idx_page + idx_file_range.end - idx_file_range.start; 614 Some(idx_page_range) 615 } else { 616 self.min_possible_present_idx_file = self.file_states.len(); 617 None 618 } 619 } 620 621 /// Returns the [VolatileSlice] corresponding to the indices regardless of whether the pages are 622 /// present or not. 623 /// 624 /// If the range is out of the region, this returns [Error::OutOfRange]. 625 /// 626 /// # Arguments 627 /// 628 /// * `idx_page_range` - the indices of the pages. All the pages must be present and consecutive 629 /// in the compacted file. get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice>630 pub fn get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice> { 631 let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range)?; 632 match self.file_mmap.get_slice( 633 pages_to_bytes(idx_file_range.start), 634 pages_to_bytes(idx_file_range.end - idx_file_range.start), 635 ) { 636 Ok(slice) => Ok(slice), 637 Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange), 638 Err(e) => Err(e.into()), 639 } 640 } 641 642 /// Returns the count of present pages in the swap file. present_pages(&self) -> usize643 pub fn present_pages(&self) -> usize { 644 self.page_states 645 .iter() 646 .map(|state| state.is_present() as usize) 647 .sum() 648 } 649 650 /// Convert the index range to corresponding index range of compacted file. 651 /// 652 /// This validates that the `idx_page_range` satisfy: 653 /// 654 /// * `idx_page_range` has corresponding page in the file. 655 /// * corresponding index range in the file is consecutive. convert_idx_page_range_to_idx_file( &self, idx_page_range: Range<usize>, ) -> Result<Range<usize>>656 fn convert_idx_page_range_to_idx_file( 657 &self, 658 idx_page_range: Range<usize>, 659 ) -> Result<Range<usize>> { 660 // Validate that the idx_range is for cosecutive present file pages. 661 let state = self 662 .page_states 663 .get(idx_page_range.start) 664 .ok_or(Error::OutOfRange)?; 665 if state.is_none() || !state.is_present() { 666 return Err(Error::InvalidIndex); 667 } 668 let Some(head_idx_file) = state.idx_file() else { 669 unreachable!("the page is not none."); 670 }; 671 let mut idx_file = head_idx_file; 672 for idx in idx_page_range.start + 1..idx_page_range.end { 673 let state = self.page_states.get(idx).ok_or(Error::OutOfRange)?; 674 idx_file += 1; 675 if state.is_none() 676 || !state.is_present() 677 || state 678 .idx_file() 679 .unwrap_or_else(|| unreachable!("the page is not none.")) 680 != idx_file 681 { 682 return Err(Error::InvalidIndex); 683 } 684 } 685 let idx_file_range = 686 head_idx_file..head_idx_file + idx_page_range.end - idx_page_range.start; 687 Ok(idx_file_range) 688 } 689 } 690 691 #[cfg(test)] 692 mod tests { 693 use std::slice; 694 695 use base::pagesize; 696 use base::sys::FileDataIterator; 697 698 use super::*; 699 700 #[test] new_success()701 fn new_success() { 702 let file = tempfile::tempfile().unwrap(); 703 704 assert_eq!(SwapFile::new(&file, 200).is_ok(), true); 705 } 706 707 #[test] len()708 fn len() { 709 let file = tempfile::tempfile().unwrap(); 710 let swap_file = SwapFile::new(&file, 200).unwrap(); 711 712 assert_eq!(swap_file.page_states.len(), 200); 713 } 714 715 #[test] page_content_default_is_none()716 fn page_content_default_is_none() { 717 let file = tempfile::tempfile().unwrap(); 718 let swap_file = SwapFile::new(&file, 200).unwrap(); 719 720 assert_eq!(swap_file.page_content(0, false).unwrap().is_none(), true); 721 } 722 723 #[test] page_content_returns_content()724 fn page_content_returns_content() { 725 let file = tempfile::tempfile().unwrap(); 726 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 727 728 let data = &vec![1; pagesize()]; 729 swap_file.write_to_file(0, data).unwrap(); 730 731 let page = swap_file.page_content(0, false).unwrap().unwrap(); 732 // TODO(b/315998194): Add safety comment 733 #[allow(clippy::undocumented_unsafe_blocks)] 734 let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) }; 735 assert_eq!(result, data); 736 } 737 738 #[test] page_content_out_of_range()739 fn page_content_out_of_range() { 740 let file = tempfile::tempfile().unwrap(); 741 let swap_file = SwapFile::new(&file, 200).unwrap(); 742 743 assert_eq!(swap_file.page_content(199, false).is_ok(), true); 744 match swap_file.page_content(200, false) { 745 Err(Error::OutOfRange) => {} 746 _ => unreachable!("not out of range"), 747 } 748 } 749 assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8])750 fn assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8]) { 751 let page = swap_file.page_content(idx, false).unwrap().unwrap(); 752 // TODO(b/315998194): Add safety comment 753 #[allow(clippy::undocumented_unsafe_blocks)] 754 let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) }; 755 assert_eq!(result, data); 756 } 757 758 #[test] write_to_file_swap_file()759 fn write_to_file_swap_file() { 760 let file = tempfile::tempfile().unwrap(); 761 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 762 763 let buf1 = &vec![1; pagesize()]; 764 let buf2 = &vec![2; 2 * pagesize()]; 765 swap_file.write_to_file(0, buf1).unwrap(); 766 swap_file.write_to_file(2, buf2).unwrap(); 767 768 // page_content() 769 assert_page_content(&swap_file, 0, buf1); 770 assert_page_content(&swap_file, 2, &buf2[0..pagesize()]); 771 assert_page_content(&swap_file, 3, &buf2[pagesize()..2 * pagesize()]); 772 } 773 774 #[test] write_to_file_invalid_size()775 fn write_to_file_invalid_size() { 776 let file = tempfile::tempfile().unwrap(); 777 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 778 779 let buf = &vec![1; pagesize() + 1]; 780 match swap_file.write_to_file(0, buf) { 781 Err(Error::InvalidSize) => {} 782 _ => unreachable!("not invalid size"), 783 }; 784 } 785 786 #[test] write_to_file_out_of_range()787 fn write_to_file_out_of_range() { 788 let file = tempfile::tempfile().unwrap(); 789 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 790 791 let buf1 = &vec![1; pagesize()]; 792 let buf2 = &vec![2; 2 * pagesize()]; 793 match swap_file.write_to_file(200, buf1) { 794 Err(Error::OutOfRange) => {} 795 _ => unreachable!("not out of range"), 796 }; 797 match swap_file.write_to_file(199, buf2) { 798 Err(Error::OutOfRange) => {} 799 _ => unreachable!("not out of range"), 800 }; 801 } 802 803 #[test] write_to_file_overwrite()804 fn write_to_file_overwrite() { 805 let file = tempfile::tempfile().unwrap(); 806 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 807 808 swap_file.write_to_file(0, &vec![1; pagesize()]).unwrap(); 809 swap_file 810 .write_to_file(2, &vec![2; 2 * pagesize()]) 811 .unwrap(); 812 813 let mut buf = vec![0; 3 * pagesize()]; 814 buf[..pagesize()].fill(3); 815 buf[pagesize()..2 * pagesize()].fill(4); 816 buf[2 * pagesize()..3 * pagesize()].fill(5); 817 swap_file.write_to_file(0, &buf).unwrap(); 818 819 assert_page_content(&swap_file, 0, &vec![3; pagesize()]); 820 assert_page_content(&swap_file, 1, &vec![4; pagesize()]); 821 assert_page_content(&swap_file, 2, &vec![5; pagesize()]); 822 assert_page_content(&swap_file, 3, &vec![2; pagesize()]); 823 assert!(swap_file.page_content(4, false).unwrap().is_none()); 824 825 let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len()) 826 .collect::<std::result::Result<Vec<_>, _>>(); 827 assert_eq!(data, Ok(vec![0..4 * pagesize() as u64])); 828 829 buf[..pagesize()].fill(6); 830 buf[pagesize()..2 * pagesize()].fill(7); 831 buf[2 * pagesize()..3 * pagesize()].fill(8); 832 swap_file.write_to_file(2, &buf).unwrap(); 833 assert_page_content(&swap_file, 0, &vec![3; pagesize()]); 834 assert_page_content(&swap_file, 1, &vec![4; pagesize()]); 835 assert_page_content(&swap_file, 2, &vec![6; pagesize()]); 836 assert_page_content(&swap_file, 3, &vec![7; pagesize()]); 837 assert_page_content(&swap_file, 4, &vec![8; pagesize()]); 838 assert!(swap_file.page_content(5, false).unwrap().is_none()); 839 840 let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len()) 841 .collect::<std::result::Result<Vec<_>, _>>(); 842 assert_eq!(data, Ok(vec![0..5 * pagesize() as u64])); 843 } 844 845 #[test] 846 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support lock_and_start_populate()847 fn lock_and_start_populate() { 848 let file = tempfile::tempfile().unwrap(); 849 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 850 851 swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap(); 852 swap_file 853 .write_to_file(3, &vec![1; 5 * pagesize()]) 854 .unwrap(); 855 swap_file.write_to_file(10, &vec![1; pagesize()]).unwrap(); 856 857 let mut locked_pages = 0; 858 loop { 859 let pages = swap_file.lock_and_async_prefetch(2).unwrap(); 860 if pages == 0 { 861 break; 862 } 863 assert!(pages <= 2); 864 locked_pages += pages; 865 } 866 assert_eq!(locked_pages, 7); 867 } 868 869 #[test] clear_range()870 fn clear_range() { 871 let file = tempfile::tempfile().unwrap(); 872 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 873 874 let data = &vec![1; pagesize()]; 875 swap_file.write_to_file(0, data).unwrap(); 876 swap_file.clear_range(0..1).unwrap(); 877 878 assert!(swap_file.page_content(0, false).unwrap().is_none()); 879 } 880 881 #[test] 882 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support clear_range_unlocked_pages()883 fn clear_range_unlocked_pages() { 884 let file = tempfile::tempfile().unwrap(); 885 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 886 887 swap_file 888 .write_to_file(1, &vec![1; 10 * pagesize()]) 889 .unwrap(); 890 // 1..6 is locked, 6..11 is not locked. 891 assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5); 892 893 // locked pages only 894 assert_eq!(swap_file.clear_range(1..4).unwrap(), 3); 895 // locked pages + non-locked pages 896 assert_eq!(swap_file.clear_range(4..7).unwrap(), 2); 897 // non-locked pages 898 assert_eq!(swap_file.clear_range(10..11).unwrap(), 0); 899 } 900 901 #[test] clear_range_keep_on_disk()902 fn clear_range_keep_on_disk() { 903 let file = tempfile::tempfile().unwrap(); 904 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 905 906 let data = &vec![1; pagesize()]; 907 swap_file.write_to_file(0, data).unwrap(); 908 swap_file.clear_range(0..1).unwrap(); 909 910 let slice = swap_file.page_content(0, true).unwrap().unwrap(); 911 // TODO(b/315998194): Add safety comment 912 #[allow(clippy::undocumented_unsafe_blocks)] 913 let slice = unsafe { slice::from_raw_parts(slice.as_ptr(), slice.size()) }; 914 assert_eq!(slice, data); 915 } 916 917 #[test] clear_range_out_of_range()918 fn clear_range_out_of_range() { 919 let file = tempfile::tempfile().unwrap(); 920 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 921 swap_file.write_to_file(199, &vec![0; pagesize()]).unwrap(); 922 923 match swap_file.clear_range(199..201) { 924 Err(Error::OutOfRange) => {} 925 _ => unreachable!("not out of range"), 926 }; 927 assert!(swap_file.clear_range(199..200).is_ok()); 928 match swap_file.clear_range(200..201) { 929 Err(Error::OutOfRange) => {} 930 _ => unreachable!("not out of range"), 931 }; 932 } 933 934 #[test] free_range()935 fn free_range() { 936 let file = tempfile::tempfile().unwrap(); 937 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 938 939 let data = &vec![1; pagesize()]; 940 swap_file.write_to_file(0, data).unwrap(); 941 swap_file.free_range(0..1).unwrap(); 942 943 assert!(swap_file.page_content(0, false).unwrap().is_none()); 944 assert!(swap_file.page_content(0, true).unwrap().is_none()); 945 } 946 947 #[test] 948 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support free_range_unlocked_pages()949 fn free_range_unlocked_pages() { 950 let file = tempfile::tempfile().unwrap(); 951 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 952 953 swap_file 954 .write_to_file(1, &vec![1; 10 * pagesize()]) 955 .unwrap(); 956 // 1..6 is locked, 6..11 is not locked. 957 assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5); 958 959 // empty pages 960 assert_eq!(swap_file.free_range(0..1).unwrap(), 0); 961 // empty pages + locked pages 962 assert_eq!(swap_file.free_range(0..2).unwrap(), 1); 963 // locked pages only 964 assert_eq!(swap_file.free_range(2..4).unwrap(), 2); 965 // empty pages + locked pages + non-locked pages 966 assert_eq!(swap_file.free_range(3..7).unwrap(), 2); 967 // non-locked pages 968 assert_eq!(swap_file.free_range(10..11).unwrap(), 0); 969 } 970 971 #[test] free_range_out_of_range()972 fn free_range_out_of_range() { 973 let file = tempfile::tempfile().unwrap(); 974 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 975 976 assert_eq!(swap_file.free_range(199..200).is_ok(), true); 977 match swap_file.free_range(200..201) { 978 Err(Error::OutOfRange) => {} 979 _ => unreachable!("not out of range"), 980 }; 981 match swap_file.free_range(199..201) { 982 Err(Error::OutOfRange) => {} 983 _ => unreachable!("not out of range"), 984 }; 985 } 986 987 #[test] free_range_and_write()988 fn free_range_and_write() { 989 let file = tempfile::tempfile().unwrap(); 990 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 991 992 let data = &vec![1; 5 * pagesize()]; 993 swap_file.write_to_file(0, data).unwrap(); 994 swap_file.free_range(0..5).unwrap(); 995 996 swap_file 997 .write_to_file(0, &vec![2; 2 * pagesize()]) 998 .unwrap(); 999 swap_file 1000 .write_to_file(5, &vec![3; 4 * pagesize()]) 1001 .unwrap(); 1002 1003 assert_page_content(&swap_file, 0, &vec![2; pagesize()]); 1004 assert_page_content(&swap_file, 1, &vec![2; pagesize()]); 1005 assert!(swap_file.page_content(2, true).unwrap().is_none()); 1006 assert!(swap_file.page_content(3, true).unwrap().is_none()); 1007 assert!(swap_file.page_content(4, true).unwrap().is_none()); 1008 assert_page_content(&swap_file, 5, &vec![3; pagesize()]); 1009 assert_page_content(&swap_file, 6, &vec![3; pagesize()]); 1010 assert_page_content(&swap_file, 7, &vec![3; pagesize()]); 1011 assert_page_content(&swap_file, 8, &vec![3; pagesize()]); 1012 assert!(swap_file.page_content(9, true).unwrap().is_none()); 1013 1014 let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len()) 1015 .collect::<std::result::Result<Vec<_>, _>>(); 1016 assert_eq!(data, Ok(vec![0..6 * pagesize() as u64])); 1017 } 1018 1019 #[test] 1020 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support clear_mlock()1021 fn clear_mlock() { 1022 let file = tempfile::tempfile().unwrap(); 1023 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1024 1025 swap_file 1026 .write_to_file(1, &vec![1; 10 * pagesize()]) 1027 .unwrap(); 1028 // success if there is no mlock. 1029 assert!(swap_file.clear_mlock().is_ok()); 1030 1031 assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10); 1032 // success if there is mlocked area. 1033 assert!(swap_file.clear_mlock().is_ok()); 1034 1035 // mlock area is cleared. 1036 assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10); 1037 } 1038 1039 #[test] first_data_range()1040 fn first_data_range() { 1041 let file = tempfile::tempfile().unwrap(); 1042 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1043 1044 swap_file 1045 .write_to_file(1, &vec![1; 2 * pagesize()]) 1046 .unwrap(); 1047 swap_file.write_to_file(3, &vec![2; pagesize()]).unwrap(); 1048 1049 assert_eq!(swap_file.first_data_range(200).unwrap(), 1..4); 1050 assert_eq!(swap_file.first_data_range(2).unwrap(), 1..3); 1051 assert_eq!(swap_file.first_data_range(1).unwrap(), 1..2); 1052 swap_file.clear_range(1..3).unwrap(); 1053 assert_eq!(swap_file.first_data_range(2).unwrap(), 3..4); 1054 swap_file.clear_range(3..4).unwrap(); 1055 assert!(swap_file.first_data_range(2).is_none()); 1056 } 1057 1058 #[test] get_slice()1059 fn get_slice() { 1060 let file = tempfile::tempfile().unwrap(); 1061 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1062 1063 swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap(); 1064 swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap(); 1065 1066 let slice = swap_file.get_slice(1..3).unwrap(); 1067 assert_eq!(slice.size(), 2 * pagesize()); 1068 let mut buf = vec![0u8; pagesize()]; 1069 slice.get_slice(0, pagesize()).unwrap().copy_to(&mut buf); 1070 assert_eq!(buf, vec![1; pagesize()]); 1071 1072 let mut buf = vec![0u8; pagesize()]; 1073 slice 1074 .get_slice(pagesize(), pagesize()) 1075 .unwrap() 1076 .copy_to(&mut buf); 1077 assert_eq!(buf, vec![2; pagesize()]); 1078 } 1079 1080 #[test] get_slice_out_of_range()1081 fn get_slice_out_of_range() { 1082 let file = tempfile::tempfile().unwrap(); 1083 let swap_file = SwapFile::new(&file, 200).unwrap(); 1084 1085 match swap_file.get_slice(200..201) { 1086 Err(Error::OutOfRange) => {} 1087 other => { 1088 unreachable!("unexpected result {:?}", other); 1089 } 1090 } 1091 } 1092 1093 #[test] present_pages()1094 fn present_pages() { 1095 let file = tempfile::tempfile().unwrap(); 1096 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1097 1098 swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap(); 1099 swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap(); 1100 1101 assert_eq!(swap_file.present_pages(), 2); 1102 } 1103 } 1104