1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! The mmap module provides a safe interface to mmap memory and ensures unmap is called when the
6 //! mmap object leaves scope.
7
8 use std::ptr::null_mut;
9
10 use libc::c_int;
11 use libc::PROT_READ;
12 use libc::PROT_WRITE;
13 use log::warn;
14
15 use super::Error as ErrnoError;
16 use crate::pagesize;
17 use crate::AsRawDescriptor;
18 use crate::Descriptor;
19 use crate::MappedRegion;
20 use crate::MemoryMapping as CrateMemoryMapping;
21 use crate::MemoryMappingBuilder;
22 use crate::MmapError as Error;
23 use crate::MmapResult as Result;
24 use crate::Protection;
25 use crate::RawDescriptor;
26 use crate::SafeDescriptor;
27
28 impl From<Protection> for c_int {
29 #[inline(always)]
from(p: Protection) -> Self30 fn from(p: Protection) -> Self {
31 let mut value = 0;
32 if p.read {
33 value |= PROT_READ
34 }
35 if p.write {
36 value |= PROT_WRITE;
37 }
38 value
39 }
40 }
41
42 /// Validates that `offset`..`offset+range_size` lies within the bounds of a memory mapping of
43 /// `mmap_size` bytes. Also checks for any overflow.
validate_includes_range(mmap_size: usize, offset: usize, range_size: usize) -> Result<()>44 fn validate_includes_range(mmap_size: usize, offset: usize, range_size: usize) -> Result<()> {
45 // Ensure offset + size doesn't overflow
46 let end_offset = offset
47 .checked_add(range_size)
48 .ok_or(Error::InvalidAddress)?;
49 // Ensure offset + size are within the mapping bounds
50 if end_offset <= mmap_size {
51 Ok(())
52 } else {
53 Err(Error::InvalidAddress)
54 }
55 }
56
57 impl dyn MappedRegion {
58 /// Calls msync with MS_SYNC on a mapping of `size` bytes starting at `offset` from the start of
59 /// the region. `offset`..`offset+size` must be contained within the `MappedRegion`.
msync(&self, offset: usize, size: usize) -> Result<()>60 pub fn msync(&self, offset: usize, size: usize) -> Result<()> {
61 validate_includes_range(self.size(), offset, size)?;
62
63 // SAFETY:
64 // Safe because the MemoryMapping/MemoryMappingArena interface ensures our pointer and size
65 // are correct, and we've validated that `offset`..`offset+size` is in the range owned by
66 // this `MappedRegion`.
67 let ret = unsafe {
68 libc::msync(
69 (self.as_ptr() as usize + offset) as *mut libc::c_void,
70 size,
71 libc::MS_SYNC,
72 )
73 };
74 if ret != -1 {
75 Ok(())
76 } else {
77 Err(Error::SystemCallFailed(ErrnoError::last()))
78 }
79 }
80 }
81
82 /// Wraps an anonymous shared memory mapping in the current process. Provides
83 /// RAII semantics including munmap when no longer needed.
84 #[derive(Debug)]
85 pub struct MemoryMapping {
86 addr: *mut u8,
87 size: usize,
88 }
89
90 // SAFETY:
91 // Send and Sync aren't automatically inherited for the raw address pointer.
92 // Accessing that pointer is only done through the stateless interface which
93 // allows the object to be shared by multiple threads without a decrease in
94 // safety.
95 unsafe impl Send for MemoryMapping {}
96 // SAFETY: See safety comments for impl Send
97 unsafe impl Sync for MemoryMapping {}
98
99 impl MemoryMapping {
100 /// Creates an anonymous shared, read/write mapping of `size` bytes.
101 ///
102 /// # Arguments
103 /// * `size` - Size of memory region in bytes.
new(size: usize) -> Result<MemoryMapping>104 pub fn new(size: usize) -> Result<MemoryMapping> {
105 MemoryMapping::new_protection(size, None, Protection::read_write())
106 }
107
108 /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
109 ///
110 /// # Arguments
111 /// * `size` - Size of memory region in bytes.
112 /// * `align` - Optional alignment for MemoryMapping::addr.
113 /// * `prot` - Protection (e.g. readable/writable) of the memory region.
new_protection( size: usize, align: Option<u64>, prot: Protection, ) -> Result<MemoryMapping>114 pub fn new_protection(
115 size: usize,
116 align: Option<u64>,
117 prot: Protection,
118 ) -> Result<MemoryMapping> {
119 // SAFETY:
120 // This is safe because we are creating an anonymous mapping in a place not already used by
121 // any other area in this process.
122 unsafe { MemoryMapping::try_mmap(None, size, align, prot.into(), None) }
123 }
124
125 /// Maps the first `size` bytes of the given `fd` as read/write.
126 ///
127 /// # Arguments
128 /// * `fd` - File descriptor to mmap from.
129 /// * `size` - Size of memory region in bytes.
from_fd(fd: &dyn AsRawDescriptor, size: usize) -> Result<MemoryMapping>130 pub fn from_fd(fd: &dyn AsRawDescriptor, size: usize) -> Result<MemoryMapping> {
131 MemoryMapping::from_fd_offset(fd, size, 0)
132 }
133
from_fd_offset( fd: &dyn AsRawDescriptor, size: usize, offset: u64, ) -> Result<MemoryMapping>134 pub fn from_fd_offset(
135 fd: &dyn AsRawDescriptor,
136 size: usize,
137 offset: u64,
138 ) -> Result<MemoryMapping> {
139 MemoryMapping::from_fd_offset_protection(fd, size, offset, Protection::read_write())
140 }
141
142 /// Maps the `size` bytes starting at `offset` bytes of the given `fd` as read/write.
143 ///
144 /// # Arguments
145 /// * `fd` - File descriptor to mmap from.
146 /// * `size` - Size of memory region in bytes.
147 /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
148 /// * `prot` - Protection (e.g. readable/writable) of the memory region.
from_fd_offset_protection( fd: &dyn AsRawDescriptor, size: usize, offset: u64, prot: Protection, ) -> Result<MemoryMapping>149 pub fn from_fd_offset_protection(
150 fd: &dyn AsRawDescriptor,
151 size: usize,
152 offset: u64,
153 prot: Protection,
154 ) -> Result<MemoryMapping> {
155 MemoryMapping::from_fd_offset_protection_populate(fd, size, offset, 0, prot, false)
156 }
157
158 /// Maps `size` bytes starting at `offset` from the given `fd` as read/write, and requests
159 /// that the pages are pre-populated.
160 /// # Arguments
161 /// * `fd` - File descriptor to mmap from.
162 /// * `size` - Size of memory region in bytes.
163 /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
164 /// * `align` - Alignment for MemoryMapping::addr.
165 /// * `prot` - Protection (e.g. readable/writable) of the memory region.
166 /// * `populate` - Populate (prefault) page tables for a mapping.
from_fd_offset_protection_populate( fd: &dyn AsRawDescriptor, size: usize, offset: u64, align: u64, prot: Protection, populate: bool, ) -> Result<MemoryMapping>167 pub fn from_fd_offset_protection_populate(
168 fd: &dyn AsRawDescriptor,
169 size: usize,
170 offset: u64,
171 align: u64,
172 prot: Protection,
173 populate: bool,
174 ) -> Result<MemoryMapping> {
175 // SAFETY:
176 // This is safe because we are creating an anonymous mapping in a place not already used
177 // by any other area in this process.
178 unsafe {
179 MemoryMapping::try_mmap_populate(
180 None,
181 size,
182 Some(align),
183 prot.into(),
184 Some((fd, offset)),
185 populate,
186 )
187 }
188 }
189
190 /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
191 ///
192 /// # Arguments
193 ///
194 /// * `addr` - Memory address to mmap at.
195 /// * `size` - Size of memory region in bytes.
196 /// * `prot` - Protection (e.g. readable/writable) of the memory region.
197 ///
198 /// # Safety
199 ///
200 /// This function should not be called before the caller unmaps any mmap'd regions already
201 /// present at `(addr..addr+size)`.
new_protection_fixed( addr: *mut u8, size: usize, prot: Protection, ) -> Result<MemoryMapping>202 pub unsafe fn new_protection_fixed(
203 addr: *mut u8,
204 size: usize,
205 prot: Protection,
206 ) -> Result<MemoryMapping> {
207 MemoryMapping::try_mmap(Some(addr), size, None, prot.into(), None)
208 }
209
210 /// Maps the `size` bytes starting at `offset` bytes of the given `fd` with
211 /// `prot` protections.
212 ///
213 /// # Arguments
214 ///
215 /// * `addr` - Memory address to mmap at.
216 /// * `fd` - File descriptor to mmap from.
217 /// * `size` - Size of memory region in bytes.
218 /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
219 /// * `prot` - Protection (e.g. readable/writable) of the memory region.
220 ///
221 /// # Safety
222 ///
223 /// This function should not be called before the caller unmaps any mmap'd regions already
224 /// present at `(addr..addr+size)`.
from_descriptor_offset_protection_fixed( addr: *mut u8, fd: &dyn AsRawDescriptor, size: usize, offset: u64, prot: Protection, ) -> Result<MemoryMapping>225 pub unsafe fn from_descriptor_offset_protection_fixed(
226 addr: *mut u8,
227 fd: &dyn AsRawDescriptor,
228 size: usize,
229 offset: u64,
230 prot: Protection,
231 ) -> Result<MemoryMapping> {
232 MemoryMapping::try_mmap(Some(addr), size, None, prot.into(), Some((fd, offset)))
233 }
234
235 /// Helper wrapper around try_mmap_populate when without MAP_POPULATE
try_mmap( addr: Option<*mut u8>, size: usize, align: Option<u64>, prot: c_int, fd: Option<(&dyn AsRawDescriptor, u64)>, ) -> Result<MemoryMapping>236 unsafe fn try_mmap(
237 addr: Option<*mut u8>,
238 size: usize,
239 align: Option<u64>,
240 prot: c_int,
241 fd: Option<(&dyn AsRawDescriptor, u64)>,
242 ) -> Result<MemoryMapping> {
243 MemoryMapping::try_mmap_populate(addr, size, align, prot, fd, false)
244 }
245
246 /// Helper wrapper around libc::mmap that does some basic validation, and calls
247 /// madvise with MADV_DONTDUMP on the created mmap
try_mmap_populate( addr: Option<*mut u8>, size: usize, align: Option<u64>, prot: c_int, fd: Option<(&dyn AsRawDescriptor, u64)>, populate: bool, ) -> Result<MemoryMapping>248 unsafe fn try_mmap_populate(
249 addr: Option<*mut u8>,
250 size: usize,
251 align: Option<u64>,
252 prot: c_int,
253 fd: Option<(&dyn AsRawDescriptor, u64)>,
254 populate: bool,
255 ) -> Result<MemoryMapping> {
256 let mut flags = libc::MAP_SHARED;
257 if populate {
258 flags |= libc::MAP_POPULATE;
259 }
260 // If addr is provided, set the (FIXED | NORESERVE) flag, and validate addr alignment.
261 let addr = match addr {
262 Some(addr) => {
263 if (addr as usize) % pagesize() != 0 {
264 return Err(Error::NotPageAligned);
265 }
266 flags |= libc::MAP_FIXED | libc::MAP_NORESERVE;
267 addr as *mut libc::c_void
268 }
269 None => null_mut(),
270 };
271
272 // mmap already PAGE_SIZE align the returned address.
273 let align = if align.unwrap_or(0) == pagesize() as u64 {
274 Some(0)
275 } else {
276 align
277 };
278
279 // Add an address if an alignment is requested.
280 let (addr, orig_addr, orig_size) = match align {
281 None | Some(0) => (addr, None, None),
282 Some(align) => {
283 if !addr.is_null() || !align.is_power_of_two() {
284 return Err(Error::InvalidAlignment);
285 }
286 let orig_size = size + align as usize;
287 let orig_addr = libc::mmap64(
288 null_mut(),
289 orig_size,
290 prot,
291 libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS,
292 -1,
293 0,
294 );
295 if orig_addr == libc::MAP_FAILED {
296 return Err(Error::SystemCallFailed(ErrnoError::last()));
297 }
298
299 flags |= libc::MAP_FIXED;
300
301 let mask = align - 1;
302 (
303 (orig_addr.wrapping_add(mask as usize) as u64 & !mask) as *mut libc::c_void,
304 Some(orig_addr),
305 Some(orig_size),
306 )
307 }
308 };
309
310 // If fd is provided, validate fd offset is within bounds. If not, it's anonymous mapping
311 // and set the (ANONYMOUS | NORESERVE) flag.
312 let (fd, offset) = match fd {
313 Some((fd, offset)) => {
314 if offset > libc::off64_t::max_value() as u64 {
315 return Err(Error::InvalidOffset);
316 }
317 // Map private for read-only seal. See below for upstream relax of the restriction.
318 // - https://lore.kernel.org/bpf/20231013103208.kdffpyerufr4ygnw@quack3/T/
319 // SAFETY:
320 // Safe because no third parameter is expected and we check the return result.
321 let seals = unsafe { libc::fcntl(fd.as_raw_descriptor(), libc::F_GET_SEALS) };
322 if (seals >= 0) && (seals & libc::F_SEAL_WRITE != 0) {
323 flags &= !libc::MAP_SHARED;
324 flags |= libc::MAP_PRIVATE;
325 }
326 (fd.as_raw_descriptor(), offset as libc::off64_t)
327 }
328 None => {
329 flags |= libc::MAP_ANONYMOUS | libc::MAP_NORESERVE;
330 (-1, 0)
331 }
332 };
333 let addr = libc::mmap64(addr, size, prot, flags, fd, offset);
334 if addr == libc::MAP_FAILED {
335 return Err(Error::SystemCallFailed(ErrnoError::last()));
336 }
337
338 // If an original mmap exists, we can now remove the unused regions
339 if let Some(orig_addr) = orig_addr {
340 let mut unmap_start = orig_addr as usize;
341 let mut unmap_end = addr as usize;
342 let mut unmap_size = unmap_end - unmap_start;
343
344 if unmap_size > 0 {
345 libc::munmap(orig_addr, unmap_size);
346 }
347
348 unmap_start = addr as usize + size;
349 unmap_end = orig_addr as usize + orig_size.unwrap();
350 unmap_size = unmap_end - unmap_start;
351
352 if unmap_size > 0 {
353 libc::munmap(unmap_start as *mut libc::c_void, unmap_size);
354 }
355 }
356
357 // This is safe because we call madvise with a valid address and size.
358 let _ = libc::madvise(addr, size, libc::MADV_DONTDUMP);
359
360 // This is safe because KSM's only userspace visible effects are timing
361 // and memory consumption; it doesn't affect rust safety semantics.
362 // KSM is also disabled by default, and this flag is only a hint.
363 let _ = libc::madvise(addr, size, libc::MADV_MERGEABLE);
364
365 Ok(MemoryMapping {
366 addr: addr as *mut u8,
367 size,
368 })
369 }
370
371 /// Madvise the kernel to unmap on fork.
use_dontfork(&self) -> Result<()>372 pub fn use_dontfork(&self) -> Result<()> {
373 // SAFETY:
374 // This is safe because we call madvise with a valid address and size, and we check the
375 // return value.
376 let ret = unsafe {
377 libc::madvise(
378 self.as_ptr() as *mut libc::c_void,
379 self.size(),
380 libc::MADV_DONTFORK,
381 )
382 };
383 if ret == -1 {
384 Err(Error::SystemCallFailed(ErrnoError::last()))
385 } else {
386 Ok(())
387 }
388 }
389
390 /// Madvise the kernel to use Huge Pages for this mapping.
use_hugepages(&self) -> Result<()>391 pub fn use_hugepages(&self) -> Result<()> {
392 const SZ_2M: usize = 2 * 1024 * 1024;
393
394 // THP uses 2M pages, so use THP only on mappings that are at least
395 // 2M in size.
396 if self.size() < SZ_2M {
397 return Ok(());
398 }
399
400 // SAFETY:
401 // This is safe because we call madvise with a valid address and size, and we check the
402 // return value.
403 let ret = unsafe {
404 libc::madvise(
405 self.as_ptr() as *mut libc::c_void,
406 self.size(),
407 libc::MADV_HUGEPAGE,
408 )
409 };
410 if ret == -1 {
411 Err(Error::SystemCallFailed(ErrnoError::last()))
412 } else {
413 Ok(())
414 }
415 }
416
417 /// Calls msync with MS_SYNC on the mapping.
msync(&self) -> Result<()>418 pub fn msync(&self) -> Result<()> {
419 // SAFETY:
420 // This is safe since we use the exact address and length of a known
421 // good memory mapping.
422 let ret = unsafe {
423 libc::msync(
424 self.as_ptr() as *mut libc::c_void,
425 self.size(),
426 libc::MS_SYNC,
427 )
428 };
429 if ret == -1 {
430 return Err(Error::SystemCallFailed(ErrnoError::last()));
431 }
432 Ok(())
433 }
434
435 /// Uses madvise to tell the kernel to remove the specified range. Subsequent reads
436 /// to the pages in the range will return zero bytes.
remove_range(&self, mem_offset: usize, count: usize) -> Result<()>437 pub fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()> {
438 self.range_end(mem_offset, count)
439 .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
440 // SAFETY: Safe because all the args to madvise are valid and the return
441 // value is checked.
442 let ret = unsafe {
443 // madvising away the region is the same as the guest changing it.
444 // Next time it is read, it may return zero pages.
445 libc::madvise(
446 (self.addr as usize + mem_offset) as *mut _,
447 count,
448 libc::MADV_REMOVE,
449 )
450 };
451 if ret < 0 {
452 Err(Error::SystemCallFailed(super::Error::last()))
453 } else {
454 Ok(())
455 }
456 }
457
458 /// Tell the kernel to readahead the range.
459 ///
460 /// This does not block the thread by I/O wait from reading the backed file. This does not
461 /// guarantee that the pages are surely present unless the pages are mlock(2)ed by
462 /// `lock_on_fault_unchecked()`.
463 ///
464 /// The `mem_offset` and `count` must be validated by caller.
465 ///
466 /// # Arguments
467 ///
468 /// * `mem_offset` - The offset of the head of the range.
469 /// * `count` - The size in bytes of the range.
async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>470 pub fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()> {
471 // Validation
472 self.range_end(mem_offset, count)
473 .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
474 // SAFETY:
475 // Safe because populating the pages from the backed file does not affect the Rust memory
476 // safety.
477 let ret = unsafe {
478 libc::madvise(
479 (self.addr as usize + mem_offset) as *mut _,
480 count,
481 libc::MADV_WILLNEED,
482 )
483 };
484 if ret < 0 {
485 Err(Error::SystemCallFailed(super::Error::last()))
486 } else {
487 Ok(())
488 }
489 }
490
491 /// Tell the kernel to drop the page cache.
492 ///
493 /// This cannot be applied to locked pages.
494 ///
495 /// The `mem_offset` and `count` must be validated by caller.
496 ///
497 /// NOTE: This function has destructive semantics. It throws away data in the page cache without
498 /// writing it to the backing file. If the data is important, the caller should ensure it is
499 /// written to disk before calling this function or should use MADV_PAGEOUT instead.
500 ///
501 /// # Arguments
502 ///
503 /// * `mem_offset` - The offset of the head of the range.
504 /// * `count` - The size in bytes of the range.
drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>505 pub fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()> {
506 // Validation
507 self.range_end(mem_offset, count)
508 .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
509 // SAFETY:
510 // Safe because dropping the page cache does not affect the Rust memory safety.
511 let ret = unsafe {
512 libc::madvise(
513 (self.addr as usize + mem_offset) as *mut _,
514 count,
515 libc::MADV_DONTNEED,
516 )
517 };
518 if ret < 0 {
519 Err(Error::SystemCallFailed(super::Error::last()))
520 } else {
521 Ok(())
522 }
523 }
524
525 /// Lock the resident pages in the range not to be swapped out.
526 ///
527 /// The remaining nonresident page are locked when they are populated.
528 ///
529 /// The `mem_offset` and `count` must be validated by caller.
530 ///
531 /// # Arguments
532 ///
533 /// * `mem_offset` - The offset of the head of the range.
534 /// * `count` - The size in bytes of the range.
lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>535 pub fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()> {
536 // Validation
537 self.range_end(mem_offset, count)
538 .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
539 let addr = self.addr as usize + mem_offset;
540 // SAFETY:
541 // Safe because MLOCK_ONFAULT only affects the swap behavior of the kernel, so it has no
542 // impact on rust semantics.
543 // let ret = unsafe { libc::mlock2(addr as *mut _, count, libc::MLOCK_ONFAULT) };
544 // ANDROID(b/274805769): android glibc doesn't have mlock2, so we need to make the syscall directly.
545 let ret = unsafe {
546 libc::syscall(
547 libc::SYS_mlock2,
548 addr as *mut libc::c_void,
549 count,
550 libc::MLOCK_ONFAULT,
551 )
552 };
553 if ret < 0 {
554 let errno = super::Error::last();
555 warn!(
556 "failed to mlock at {:#x} with length {}: {}",
557 addr as u64,
558 self.size(),
559 errno,
560 );
561 Err(Error::SystemCallFailed(errno))
562 } else {
563 Ok(())
564 }
565 }
566
567 /// Unlock the range of pages.
568 ///
569 /// Unlocking non-locked pages does not fail.
570 ///
571 /// The `mem_offset` and `count` must be validated by caller.
572 ///
573 /// # Arguments
574 ///
575 /// * `mem_offset` - The offset of the head of the range.
576 /// * `count` - The size in bytes of the range.
unlock(&self, mem_offset: usize, count: usize) -> Result<()>577 pub fn unlock(&self, mem_offset: usize, count: usize) -> Result<()> {
578 // Validation
579 self.range_end(mem_offset, count)
580 .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
581 // SAFETY:
582 // Safe because munlock(2) does not affect the Rust memory safety.
583 let ret = unsafe { libc::munlock((self.addr as usize + mem_offset) as *mut _, count) };
584 if ret < 0 {
585 Err(Error::SystemCallFailed(super::Error::last()))
586 } else {
587 Ok(())
588 }
589 }
590
591 // Check that offset+count is valid and return the sum.
range_end(&self, offset: usize, count: usize) -> Result<usize>592 pub(crate) fn range_end(&self, offset: usize, count: usize) -> Result<usize> {
593 let mem_end = offset.checked_add(count).ok_or(Error::InvalidAddress)?;
594 if mem_end > self.size() {
595 return Err(Error::InvalidAddress);
596 }
597 Ok(mem_end)
598 }
599 }
600
601 // SAFETY:
602 // Safe because the pointer and size point to a memory range owned by this MemoryMapping that won't
603 // be unmapped until it's Dropped.
604 unsafe impl MappedRegion for MemoryMapping {
as_ptr(&self) -> *mut u8605 fn as_ptr(&self) -> *mut u8 {
606 self.addr
607 }
608
size(&self) -> usize609 fn size(&self) -> usize {
610 self.size
611 }
612 }
613
614 impl Drop for MemoryMapping {
drop(&mut self)615 fn drop(&mut self) {
616 // SAFETY:
617 // This is safe because we mmap the area at addr ourselves, and nobody
618 // else is holding a reference to it.
619 unsafe {
620 libc::munmap(self.addr as *mut libc::c_void, self.size);
621 }
622 }
623 }
624
625 /// Tracks Fixed Memory Maps within an anonymous memory-mapped fixed-sized arena
626 /// in the current process.
627 pub struct MemoryMappingArena {
628 addr: *mut u8,
629 size: usize,
630 }
631
632 // SAFETY:
633 // Send and Sync aren't automatically inherited for the raw address pointer.
634 // Accessing that pointer is only done through the stateless interface which
635 // allows the object to be shared by multiple threads without a decrease in
636 // safety.
637 unsafe impl Send for MemoryMappingArena {}
638 // SAFETY: See safety comments for impl Send
639 unsafe impl Sync for MemoryMappingArena {}
640
641 impl MemoryMappingArena {
642 /// Creates an mmap arena of `size` bytes.
643 ///
644 /// # Arguments
645 /// * `size` - Size of memory region in bytes.
new(size: usize) -> Result<MemoryMappingArena>646 pub fn new(size: usize) -> Result<MemoryMappingArena> {
647 // Reserve the arena's memory using an anonymous read-only mmap.
648 MemoryMapping::new_protection(size, None, Protection::read()).map(From::from)
649 }
650
651 /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena
652 /// with `prot` protections. `offset` must be page aligned.
653 ///
654 /// # Arguments
655 /// * `offset` - Page aligned offset into the arena in bytes.
656 /// * `size` - Size of memory region in bytes.
657 /// * `prot` - Protection (e.g. readable/writable) of the memory region.
add_anon_protection( &mut self, offset: usize, size: usize, prot: Protection, ) -> Result<()>658 pub fn add_anon_protection(
659 &mut self,
660 offset: usize,
661 size: usize,
662 prot: Protection,
663 ) -> Result<()> {
664 self.try_add(offset, size, prot, None)
665 }
666
667 /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena.
668 /// `offset` must be page aligned.
669 ///
670 /// # Arguments
671 /// * `offset` - Page aligned offset into the arena in bytes.
672 /// * `size` - Size of memory region in bytes.
add_anon(&mut self, offset: usize, size: usize) -> Result<()>673 pub fn add_anon(&mut self, offset: usize, size: usize) -> Result<()> {
674 self.add_anon_protection(offset, size, Protection::read_write())
675 }
676
677 /// Maps `size` bytes from the start of the given `fd` at `offset` bytes from
678 /// the start of the arena. `offset` must be page aligned.
679 ///
680 /// # Arguments
681 /// * `offset` - Page aligned offset into the arena in bytes.
682 /// * `size` - Size of memory region in bytes.
683 /// * `fd` - File descriptor to mmap from.
add_fd(&mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor) -> Result<()>684 pub fn add_fd(&mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor) -> Result<()> {
685 self.add_fd_offset(offset, size, fd, 0)
686 }
687
688 /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
689 /// at `offset` bytes from the start of the arena. `offset` must be page aligned.
690 ///
691 /// # Arguments
692 /// * `offset` - Page aligned offset into the arena in bytes.
693 /// * `size` - Size of memory region in bytes.
694 /// * `fd` - File descriptor to mmap from.
695 /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
add_fd_offset( &mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, ) -> Result<()>696 pub fn add_fd_offset(
697 &mut self,
698 offset: usize,
699 size: usize,
700 fd: &dyn AsRawDescriptor,
701 fd_offset: u64,
702 ) -> Result<()> {
703 self.add_fd_offset_protection(offset, size, fd, fd_offset, Protection::read_write())
704 }
705
706 /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
707 /// at `offset` bytes from the start of the arena with `prot` protections.
708 /// `offset` must be page aligned.
709 ///
710 /// # Arguments
711 /// * `offset` - Page aligned offset into the arena in bytes.
712 /// * `size` - Size of memory region in bytes.
713 /// * `fd` - File descriptor to mmap from.
714 /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
715 /// * `prot` - Protection (e.g. readable/writable) of the memory region.
add_fd_offset_protection( &mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>716 pub fn add_fd_offset_protection(
717 &mut self,
718 offset: usize,
719 size: usize,
720 fd: &dyn AsRawDescriptor,
721 fd_offset: u64,
722 prot: Protection,
723 ) -> Result<()> {
724 self.try_add(offset, size, prot, Some((fd, fd_offset)))
725 }
726
727 /// Helper method that calls appropriate MemoryMapping constructor and adds
728 /// the resulting map into the arena.
try_add( &mut self, offset: usize, size: usize, prot: Protection, fd: Option<(&dyn AsRawDescriptor, u64)>, ) -> Result<()>729 fn try_add(
730 &mut self,
731 offset: usize,
732 size: usize,
733 prot: Protection,
734 fd: Option<(&dyn AsRawDescriptor, u64)>,
735 ) -> Result<()> {
736 // Ensure offset is page-aligned
737 if offset % pagesize() != 0 {
738 return Err(Error::NotPageAligned);
739 }
740 validate_includes_range(self.size(), offset, size)?;
741
742 // SAFETY:
743 // This is safe since the range has been validated.
744 let mmap = unsafe {
745 match fd {
746 Some((fd, fd_offset)) => MemoryMapping::from_descriptor_offset_protection_fixed(
747 self.addr.add(offset),
748 fd,
749 size,
750 fd_offset,
751 prot,
752 )?,
753 None => MemoryMapping::new_protection_fixed(self.addr.add(offset), size, prot)?,
754 }
755 };
756
757 // This mapping will get automatically removed when we drop the whole arena.
758 std::mem::forget(mmap);
759 Ok(())
760 }
761
762 /// Removes `size` bytes at `offset` bytes from the start of the arena. `offset` must be page
763 /// aligned.
764 ///
765 /// # Arguments
766 /// * `offset` - Page aligned offset into the arena in bytes.
767 /// * `size` - Size of memory region in bytes.
remove(&mut self, offset: usize, size: usize) -> Result<()>768 pub fn remove(&mut self, offset: usize, size: usize) -> Result<()> {
769 self.try_add(offset, size, Protection::read(), None)
770 }
771 }
772
773 // SAFETY:
774 // Safe because the pointer and size point to a memory range owned by this MemoryMappingArena that
775 // won't be unmapped until it's Dropped.
776 unsafe impl MappedRegion for MemoryMappingArena {
as_ptr(&self) -> *mut u8777 fn as_ptr(&self) -> *mut u8 {
778 self.addr
779 }
780
size(&self) -> usize781 fn size(&self) -> usize {
782 self.size
783 }
784
add_fd_mapping( &mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>785 fn add_fd_mapping(
786 &mut self,
787 offset: usize,
788 size: usize,
789 fd: &dyn AsRawDescriptor,
790 fd_offset: u64,
791 prot: Protection,
792 ) -> Result<()> {
793 self.add_fd_offset_protection(offset, size, fd, fd_offset, prot)
794 }
795
remove_mapping(&mut self, offset: usize, size: usize) -> Result<()>796 fn remove_mapping(&mut self, offset: usize, size: usize) -> Result<()> {
797 self.remove(offset, size)
798 }
799 }
800
801 impl From<MemoryMapping> for MemoryMappingArena {
from(mmap: MemoryMapping) -> Self802 fn from(mmap: MemoryMapping) -> Self {
803 let addr = mmap.as_ptr();
804 let size = mmap.size();
805
806 // Forget the original mapping because the `MemoryMappingArena` will take care of calling
807 // `munmap` when it is dropped.
808 std::mem::forget(mmap);
809 MemoryMappingArena { addr, size }
810 }
811 }
812
813 impl From<CrateMemoryMapping> for MemoryMappingArena {
from(mmap: CrateMemoryMapping) -> Self814 fn from(mmap: CrateMemoryMapping) -> Self {
815 MemoryMappingArena::from(mmap.mapping)
816 }
817 }
818
819 impl Drop for MemoryMappingArena {
drop(&mut self)820 fn drop(&mut self) {
821 // SAFETY:
822 // This is safe because we own this memory range, and nobody else is holding a reference to
823 // it.
824 unsafe {
825 libc::munmap(self.addr as *mut libc::c_void, self.size);
826 }
827 }
828 }
829
830 impl CrateMemoryMapping {
use_dontfork(&self) -> Result<()>831 pub fn use_dontfork(&self) -> Result<()> {
832 self.mapping.use_dontfork()
833 }
834
use_hugepages(&self) -> Result<()>835 pub fn use_hugepages(&self) -> Result<()> {
836 self.mapping.use_hugepages()
837 }
838
from_raw_ptr(addr: RawDescriptor, size: usize) -> Result<CrateMemoryMapping>839 pub fn from_raw_ptr(addr: RawDescriptor, size: usize) -> Result<CrateMemoryMapping> {
840 MemoryMapping::from_fd_offset(&Descriptor(addr), size, 0).map(|mapping| {
841 CrateMemoryMapping {
842 mapping,
843 _file_descriptor: None,
844 }
845 })
846 }
847 }
848
849 pub trait MemoryMappingUnix {
850 /// Remove the specified range from the mapping.
remove_range(&self, mem_offset: usize, count: usize) -> Result<()>851 fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()>;
852 /// Tell the kernel to readahead the range.
async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>853 fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>;
854 /// Tell the kernel to drop the page cache.
drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>855 fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>;
856 /// Lock the resident pages in the range not to be swapped out.
lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>857 fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>;
858 /// Unlock the range of pages.
unlock(&self, mem_offset: usize, count: usize) -> Result<()>859 fn unlock(&self, mem_offset: usize, count: usize) -> Result<()>;
860 /// Disable host swap for this mapping.
lock_all(&self) -> Result<()>861 fn lock_all(&self) -> Result<()>;
862 }
863
864 impl MemoryMappingUnix for CrateMemoryMapping {
remove_range(&self, mem_offset: usize, count: usize) -> Result<()>865 fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()> {
866 self.mapping.remove_range(mem_offset, count)
867 }
async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>868 fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()> {
869 self.mapping.async_prefetch(mem_offset, count)
870 }
drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>871 fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()> {
872 self.mapping.drop_page_cache(mem_offset, count)
873 }
lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>874 fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()> {
875 self.mapping.lock_on_fault(mem_offset, count)
876 }
unlock(&self, mem_offset: usize, count: usize) -> Result<()>877 fn unlock(&self, mem_offset: usize, count: usize) -> Result<()> {
878 self.mapping.unlock(mem_offset, count)
879 }
lock_all(&self) -> Result<()>880 fn lock_all(&self) -> Result<()> {
881 self.mapping.lock_on_fault(0, self.mapping.size())
882 }
883 }
884
885 pub trait MemoryMappingBuilderUnix<'a> {
886 #[allow(clippy::wrong_self_convention)]
from_descriptor(self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder887 fn from_descriptor(self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder;
888 }
889
890 impl<'a> MemoryMappingBuilderUnix<'a> for MemoryMappingBuilder<'a> {
891 /// Build the memory mapping given the specified descriptor to mapped memory
892 ///
893 /// Default: Create a new memory mapping.
894 #[allow(clippy::wrong_self_convention)]
from_descriptor(mut self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder895 fn from_descriptor(mut self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder {
896 self.descriptor = Some(descriptor);
897 self
898 }
899 }
900
901 impl<'a> MemoryMappingBuilder<'a> {
902 /// Request that the mapped pages are pre-populated
903 ///
904 /// Default: Do not populate
populate(mut self) -> MemoryMappingBuilder<'a>905 pub fn populate(mut self) -> MemoryMappingBuilder<'a> {
906 self.populate = true;
907 self
908 }
909
910 /// Build a MemoryMapping from the provided options.
build(self) -> Result<CrateMemoryMapping>911 pub fn build(self) -> Result<CrateMemoryMapping> {
912 match self.descriptor {
913 None => {
914 if self.populate {
915 // Population not supported for new mmaps
916 return Err(Error::InvalidArgument);
917 }
918 MemoryMappingBuilder::wrap(
919 MemoryMapping::new_protection(
920 self.size,
921 self.align,
922 self.protection.unwrap_or_else(Protection::read_write),
923 )?,
924 None,
925 )
926 }
927 Some(descriptor) => MemoryMappingBuilder::wrap(
928 MemoryMapping::from_fd_offset_protection_populate(
929 descriptor,
930 self.size,
931 self.offset.unwrap_or(0),
932 self.align.unwrap_or(0),
933 self.protection.unwrap_or_else(Protection::read_write),
934 self.populate,
935 )?,
936 None,
937 ),
938 }
939 }
940
wrap( mapping: MemoryMapping, file_descriptor: Option<&'a dyn AsRawDescriptor>, ) -> Result<CrateMemoryMapping>941 pub(crate) fn wrap(
942 mapping: MemoryMapping,
943 file_descriptor: Option<&'a dyn AsRawDescriptor>,
944 ) -> Result<CrateMemoryMapping> {
945 let file_descriptor = match file_descriptor {
946 Some(descriptor) => Some(
947 SafeDescriptor::try_from(descriptor)
948 .map_err(|_| Error::SystemCallFailed(ErrnoError::last()))?,
949 ),
950 None => None,
951 };
952 Ok(CrateMemoryMapping {
953 mapping,
954 _file_descriptor: file_descriptor,
955 })
956 }
957 }
958
959 #[cfg(test)]
960 mod tests {
961 use tempfile::tempfile;
962
963 use super::*;
964 use crate::descriptor::Descriptor;
965 use crate::VolatileMemory;
966 use crate::VolatileMemoryError;
967
968 #[test]
basic_map()969 fn basic_map() {
970 let m = MemoryMappingBuilder::new(1024).build().unwrap();
971 assert_eq!(1024, m.size());
972 }
973
974 #[test]
map_invalid_size()975 fn map_invalid_size() {
976 let res = MemoryMappingBuilder::new(0).build().unwrap_err();
977 if let Error::SystemCallFailed(e) = res {
978 assert_eq!(e.errno(), libc::EINVAL);
979 } else {
980 panic!("unexpected error: {}", res);
981 }
982 }
983
984 #[test]
map_invalid_fd()985 fn map_invalid_fd() {
986 let fd = Descriptor(-1);
987 let res = MemoryMapping::from_fd(&fd, 1024).unwrap_err();
988 if let Error::SystemCallFailed(e) = res {
989 assert_eq!(e.errno(), libc::EBADF);
990 } else {
991 panic!("unexpected error: {}", res);
992 }
993 }
994
995 #[test]
test_write_past_end()996 fn test_write_past_end() {
997 let m = MemoryMappingBuilder::new(5).build().unwrap();
998 let res = m.write_slice(&[1, 2, 3, 4, 5, 6], 0);
999 assert!(res.is_ok());
1000 assert_eq!(res.unwrap(), 5);
1001 }
1002
1003 #[test]
slice_size()1004 fn slice_size() {
1005 let m = MemoryMappingBuilder::new(5).build().unwrap();
1006 let s = m.get_slice(2, 3).unwrap();
1007 assert_eq!(s.size(), 3);
1008 }
1009
1010 #[test]
slice_addr()1011 fn slice_addr() {
1012 let m = MemoryMappingBuilder::new(5).build().unwrap();
1013 let s = m.get_slice(2, 3).unwrap();
1014 // SAFETY: all addresses are known to exist.
1015 assert_eq!(s.as_ptr(), unsafe { m.as_ptr().offset(2) });
1016 }
1017
1018 #[test]
slice_overflow_error()1019 fn slice_overflow_error() {
1020 let m = MemoryMappingBuilder::new(5).build().unwrap();
1021 let res = m.get_slice(std::usize::MAX, 3).unwrap_err();
1022 assert_eq!(
1023 res,
1024 VolatileMemoryError::Overflow {
1025 base: std::usize::MAX,
1026 offset: 3,
1027 }
1028 );
1029 }
1030 #[test]
slice_oob_error()1031 fn slice_oob_error() {
1032 let m = MemoryMappingBuilder::new(5).build().unwrap();
1033 let res = m.get_slice(3, 3).unwrap_err();
1034 assert_eq!(res, VolatileMemoryError::OutOfBounds { addr: 6 });
1035 }
1036
1037 #[test]
from_fd_offset_invalid()1038 fn from_fd_offset_invalid() {
1039 let fd = tempfile().unwrap();
1040 let res = MemoryMapping::from_fd_offset(&fd, 4096, (libc::off64_t::max_value() as u64) + 1)
1041 .unwrap_err();
1042 match res {
1043 Error::InvalidOffset => {}
1044 e => panic!("unexpected error: {}", e),
1045 }
1046 }
1047
1048 #[test]
arena_new()1049 fn arena_new() {
1050 let m = MemoryMappingArena::new(0x40000).unwrap();
1051 assert_eq!(m.size(), 0x40000);
1052 }
1053
1054 #[test]
arena_add()1055 fn arena_add() {
1056 let mut m = MemoryMappingArena::new(0x40000).unwrap();
1057 assert!(m.add_anon(0, pagesize() * 4).is_ok());
1058 }
1059
1060 #[test]
arena_remove()1061 fn arena_remove() {
1062 let mut m = MemoryMappingArena::new(0x40000).unwrap();
1063 assert!(m.add_anon(0, pagesize() * 4).is_ok());
1064 assert!(m.remove(0, pagesize()).is_ok());
1065 assert!(m.remove(0, pagesize() * 2).is_ok());
1066 }
1067
1068 #[test]
arena_add_alignment_error()1069 fn arena_add_alignment_error() {
1070 let mut m = MemoryMappingArena::new(pagesize() * 2).unwrap();
1071 assert!(m.add_anon(0, 0x100).is_ok());
1072 let res = m.add_anon(pagesize() + 1, 0x100).unwrap_err();
1073 match res {
1074 Error::NotPageAligned => {}
1075 e => panic!("unexpected error: {}", e),
1076 }
1077 }
1078
1079 #[test]
arena_add_oob_error()1080 fn arena_add_oob_error() {
1081 let mut m = MemoryMappingArena::new(pagesize()).unwrap();
1082 let res = m.add_anon(0, pagesize() + 1).unwrap_err();
1083 match res {
1084 Error::InvalidAddress => {}
1085 e => panic!("unexpected error: {}", e),
1086 }
1087 }
1088
1089 #[test]
arena_add_overlapping()1090 fn arena_add_overlapping() {
1091 let ps = pagesize();
1092 let mut m =
1093 MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1094 m.add_anon(ps * 4, ps * 4)
1095 .expect("failed to add sub-mapping");
1096
1097 // Overlap in the front.
1098 m.add_anon(ps * 2, ps * 3)
1099 .expect("failed to add front overlapping sub-mapping");
1100
1101 // Overlap in the back.
1102 m.add_anon(ps * 7, ps * 3)
1103 .expect("failed to add back overlapping sub-mapping");
1104
1105 // Overlap the back of the first mapping, all of the middle mapping, and the front of the
1106 // last mapping.
1107 m.add_anon(ps * 3, ps * 6)
1108 .expect("failed to add mapping that overlaps several mappings");
1109 }
1110
1111 #[test]
arena_remove_overlapping()1112 fn arena_remove_overlapping() {
1113 let ps = pagesize();
1114 let mut m =
1115 MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1116 m.add_anon(ps * 4, ps * 4)
1117 .expect("failed to add sub-mapping");
1118 m.add_anon(ps * 2, ps * 2)
1119 .expect("failed to add front overlapping sub-mapping");
1120 m.add_anon(ps * 8, ps * 2)
1121 .expect("failed to add back overlapping sub-mapping");
1122
1123 // Remove the back of the first mapping and the front of the second.
1124 m.remove(ps * 3, ps * 2)
1125 .expect("failed to remove front overlapping mapping");
1126
1127 // Remove the back of the second mapping and the front of the third.
1128 m.remove(ps * 7, ps * 2)
1129 .expect("failed to remove back overlapping mapping");
1130
1131 // Remove a mapping that completely overlaps the middle mapping.
1132 m.remove(ps * 5, ps * 2)
1133 .expect("failed to remove fully overlapping mapping");
1134 }
1135
1136 #[test]
arena_remove_unaligned()1137 fn arena_remove_unaligned() {
1138 let ps = pagesize();
1139 let mut m =
1140 MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1141
1142 m.add_anon(0, ps).expect("failed to add mapping");
1143 m.remove(0, ps - 1)
1144 .expect("failed to remove unaligned mapping");
1145 }
1146
1147 #[test]
arena_msync()1148 fn arena_msync() {
1149 let size = 0x40000;
1150 let m = MemoryMappingArena::new(size).unwrap();
1151 let ps = pagesize();
1152 <dyn MappedRegion>::msync(&m, 0, ps).unwrap();
1153 <dyn MappedRegion>::msync(&m, 0, size).unwrap();
1154 <dyn MappedRegion>::msync(&m, ps, size - ps).unwrap();
1155 let res = <dyn MappedRegion>::msync(&m, ps, size).unwrap_err();
1156 match res {
1157 Error::InvalidAddress => {}
1158 e => panic!("unexpected error: {}", e),
1159 }
1160 }
1161 }
1162