• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! The mmap module provides a safe interface to mmap memory and ensures unmap is called when the
6 //! mmap object leaves scope.
7 
8 use std::ptr::null_mut;
9 
10 use libc::c_int;
11 use libc::PROT_READ;
12 use libc::PROT_WRITE;
13 use log::warn;
14 
15 use super::Error as ErrnoError;
16 use crate::pagesize;
17 use crate::AsRawDescriptor;
18 use crate::Descriptor;
19 use crate::MappedRegion;
20 use crate::MemoryMapping as CrateMemoryMapping;
21 use crate::MemoryMappingBuilder;
22 use crate::MmapError as Error;
23 use crate::MmapResult as Result;
24 use crate::Protection;
25 use crate::RawDescriptor;
26 use crate::SafeDescriptor;
27 
28 impl From<Protection> for c_int {
29     #[inline(always)]
from(p: Protection) -> Self30     fn from(p: Protection) -> Self {
31         let mut value = 0;
32         if p.read {
33             value |= PROT_READ
34         }
35         if p.write {
36             value |= PROT_WRITE;
37         }
38         value
39     }
40 }
41 
42 /// Validates that `offset`..`offset+range_size` lies within the bounds of a memory mapping of
43 /// `mmap_size` bytes.  Also checks for any overflow.
validate_includes_range(mmap_size: usize, offset: usize, range_size: usize) -> Result<()>44 fn validate_includes_range(mmap_size: usize, offset: usize, range_size: usize) -> Result<()> {
45     // Ensure offset + size doesn't overflow
46     let end_offset = offset
47         .checked_add(range_size)
48         .ok_or(Error::InvalidAddress)?;
49     // Ensure offset + size are within the mapping bounds
50     if end_offset <= mmap_size {
51         Ok(())
52     } else {
53         Err(Error::InvalidAddress)
54     }
55 }
56 
57 impl dyn MappedRegion {
58     /// Calls msync with MS_SYNC on a mapping of `size` bytes starting at `offset` from the start of
59     /// the region.  `offset`..`offset+size` must be contained within the `MappedRegion`.
msync(&self, offset: usize, size: usize) -> Result<()>60     pub fn msync(&self, offset: usize, size: usize) -> Result<()> {
61         validate_includes_range(self.size(), offset, size)?;
62 
63         // SAFETY:
64         // Safe because the MemoryMapping/MemoryMappingArena interface ensures our pointer and size
65         // are correct, and we've validated that `offset`..`offset+size` is in the range owned by
66         // this `MappedRegion`.
67         let ret = unsafe {
68             libc::msync(
69                 (self.as_ptr() as usize + offset) as *mut libc::c_void,
70                 size,
71                 libc::MS_SYNC,
72             )
73         };
74         if ret != -1 {
75             Ok(())
76         } else {
77             Err(Error::SystemCallFailed(ErrnoError::last()))
78         }
79     }
80 }
81 
82 /// Wraps an anonymous shared memory mapping in the current process. Provides
83 /// RAII semantics including munmap when no longer needed.
84 #[derive(Debug)]
85 pub struct MemoryMapping {
86     addr: *mut u8,
87     size: usize,
88 }
89 
90 // SAFETY:
91 // Send and Sync aren't automatically inherited for the raw address pointer.
92 // Accessing that pointer is only done through the stateless interface which
93 // allows the object to be shared by multiple threads without a decrease in
94 // safety.
95 unsafe impl Send for MemoryMapping {}
96 // SAFETY: See safety comments for impl Send
97 unsafe impl Sync for MemoryMapping {}
98 
99 impl MemoryMapping {
100     /// Creates an anonymous shared, read/write mapping of `size` bytes.
101     ///
102     /// # Arguments
103     /// * `size` - Size of memory region in bytes.
new(size: usize) -> Result<MemoryMapping>104     pub fn new(size: usize) -> Result<MemoryMapping> {
105         MemoryMapping::new_protection(size, None, Protection::read_write())
106     }
107 
108     /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
109     ///
110     /// # Arguments
111     /// * `size` - Size of memory region in bytes.
112     /// * `align` - Optional alignment for MemoryMapping::addr.
113     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
new_protection( size: usize, align: Option<u64>, prot: Protection, ) -> Result<MemoryMapping>114     pub fn new_protection(
115         size: usize,
116         align: Option<u64>,
117         prot: Protection,
118     ) -> Result<MemoryMapping> {
119         // SAFETY:
120         // This is safe because we are creating an anonymous mapping in a place not already used by
121         // any other area in this process.
122         unsafe { MemoryMapping::try_mmap(None, size, align, prot.into(), None) }
123     }
124 
125     /// Maps the first `size` bytes of the given `fd` as read/write.
126     ///
127     /// # Arguments
128     /// * `fd` - File descriptor to mmap from.
129     /// * `size` - Size of memory region in bytes.
from_fd(fd: &dyn AsRawDescriptor, size: usize) -> Result<MemoryMapping>130     pub fn from_fd(fd: &dyn AsRawDescriptor, size: usize) -> Result<MemoryMapping> {
131         MemoryMapping::from_fd_offset(fd, size, 0)
132     }
133 
from_fd_offset( fd: &dyn AsRawDescriptor, size: usize, offset: u64, ) -> Result<MemoryMapping>134     pub fn from_fd_offset(
135         fd: &dyn AsRawDescriptor,
136         size: usize,
137         offset: u64,
138     ) -> Result<MemoryMapping> {
139         MemoryMapping::from_fd_offset_protection(fd, size, offset, Protection::read_write())
140     }
141 
142     /// Maps the `size` bytes starting at `offset` bytes of the given `fd` as read/write.
143     ///
144     /// # Arguments
145     /// * `fd` - File descriptor to mmap from.
146     /// * `size` - Size of memory region in bytes.
147     /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
148     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
from_fd_offset_protection( fd: &dyn AsRawDescriptor, size: usize, offset: u64, prot: Protection, ) -> Result<MemoryMapping>149     pub fn from_fd_offset_protection(
150         fd: &dyn AsRawDescriptor,
151         size: usize,
152         offset: u64,
153         prot: Protection,
154     ) -> Result<MemoryMapping> {
155         MemoryMapping::from_fd_offset_protection_populate(fd, size, offset, 0, prot, false)
156     }
157 
158     /// Maps `size` bytes starting at `offset` from the given `fd` as read/write, and requests
159     /// that the pages are pre-populated.
160     /// # Arguments
161     /// * `fd` - File descriptor to mmap from.
162     /// * `size` - Size of memory region in bytes.
163     /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
164     /// * `align` - Alignment for MemoryMapping::addr.
165     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
166     /// * `populate` - Populate (prefault) page tables for a mapping.
from_fd_offset_protection_populate( fd: &dyn AsRawDescriptor, size: usize, offset: u64, align: u64, prot: Protection, populate: bool, ) -> Result<MemoryMapping>167     pub fn from_fd_offset_protection_populate(
168         fd: &dyn AsRawDescriptor,
169         size: usize,
170         offset: u64,
171         align: u64,
172         prot: Protection,
173         populate: bool,
174     ) -> Result<MemoryMapping> {
175         // SAFETY:
176         // This is safe because we are creating an anonymous mapping in a place not already used
177         // by any other area in this process.
178         unsafe {
179             MemoryMapping::try_mmap_populate(
180                 None,
181                 size,
182                 Some(align),
183                 prot.into(),
184                 Some((fd, offset)),
185                 populate,
186             )
187         }
188     }
189 
190     /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
191     ///
192     /// # Arguments
193     ///
194     /// * `addr` - Memory address to mmap at.
195     /// * `size` - Size of memory region in bytes.
196     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
197     ///
198     /// # Safety
199     ///
200     /// This function should not be called before the caller unmaps any mmap'd regions already
201     /// present at `(addr..addr+size)`.
new_protection_fixed( addr: *mut u8, size: usize, prot: Protection, ) -> Result<MemoryMapping>202     pub unsafe fn new_protection_fixed(
203         addr: *mut u8,
204         size: usize,
205         prot: Protection,
206     ) -> Result<MemoryMapping> {
207         MemoryMapping::try_mmap(Some(addr), size, None, prot.into(), None)
208     }
209 
210     /// Maps the `size` bytes starting at `offset` bytes of the given `fd` with
211     /// `prot` protections.
212     ///
213     /// # Arguments
214     ///
215     /// * `addr` - Memory address to mmap at.
216     /// * `fd` - File descriptor to mmap from.
217     /// * `size` - Size of memory region in bytes.
218     /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
219     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
220     ///
221     /// # Safety
222     ///
223     /// This function should not be called before the caller unmaps any mmap'd regions already
224     /// present at `(addr..addr+size)`.
from_descriptor_offset_protection_fixed( addr: *mut u8, fd: &dyn AsRawDescriptor, size: usize, offset: u64, prot: Protection, ) -> Result<MemoryMapping>225     pub unsafe fn from_descriptor_offset_protection_fixed(
226         addr: *mut u8,
227         fd: &dyn AsRawDescriptor,
228         size: usize,
229         offset: u64,
230         prot: Protection,
231     ) -> Result<MemoryMapping> {
232         MemoryMapping::try_mmap(Some(addr), size, None, prot.into(), Some((fd, offset)))
233     }
234 
235     /// Helper wrapper around try_mmap_populate when without MAP_POPULATE
try_mmap( addr: Option<*mut u8>, size: usize, align: Option<u64>, prot: c_int, fd: Option<(&dyn AsRawDescriptor, u64)>, ) -> Result<MemoryMapping>236     unsafe fn try_mmap(
237         addr: Option<*mut u8>,
238         size: usize,
239         align: Option<u64>,
240         prot: c_int,
241         fd: Option<(&dyn AsRawDescriptor, u64)>,
242     ) -> Result<MemoryMapping> {
243         MemoryMapping::try_mmap_populate(addr, size, align, prot, fd, false)
244     }
245 
246     /// Helper wrapper around libc::mmap that does some basic validation, and calls
247     /// madvise with MADV_DONTDUMP on the created mmap
try_mmap_populate( addr: Option<*mut u8>, size: usize, align: Option<u64>, prot: c_int, fd: Option<(&dyn AsRawDescriptor, u64)>, populate: bool, ) -> Result<MemoryMapping>248     unsafe fn try_mmap_populate(
249         addr: Option<*mut u8>,
250         size: usize,
251         align: Option<u64>,
252         prot: c_int,
253         fd: Option<(&dyn AsRawDescriptor, u64)>,
254         populate: bool,
255     ) -> Result<MemoryMapping> {
256         let mut flags = libc::MAP_SHARED;
257         if populate {
258             flags |= libc::MAP_POPULATE;
259         }
260         // If addr is provided, set the (FIXED | NORESERVE) flag, and validate addr alignment.
261         let addr = match addr {
262             Some(addr) => {
263                 if (addr as usize) % pagesize() != 0 {
264                     return Err(Error::NotPageAligned);
265                 }
266                 flags |= libc::MAP_FIXED | libc::MAP_NORESERVE;
267                 addr as *mut libc::c_void
268             }
269             None => null_mut(),
270         };
271 
272         // mmap already PAGE_SIZE align the returned address.
273         let align = if align.unwrap_or(0) == pagesize() as u64 {
274             Some(0)
275         } else {
276             align
277         };
278 
279         // Add an address if an alignment is requested.
280         let (addr, orig_addr, orig_size) = match align {
281             None | Some(0) => (addr, None, None),
282             Some(align) => {
283                 if !addr.is_null() || !align.is_power_of_two() {
284                     return Err(Error::InvalidAlignment);
285                 }
286                 let orig_size = size + align as usize;
287                 let orig_addr = libc::mmap64(
288                     null_mut(),
289                     orig_size,
290                     prot,
291                     libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS,
292                     -1,
293                     0,
294                 );
295                 if orig_addr == libc::MAP_FAILED {
296                     return Err(Error::SystemCallFailed(ErrnoError::last()));
297                 }
298 
299                 flags |= libc::MAP_FIXED;
300 
301                 let mask = align - 1;
302                 (
303                     (orig_addr.wrapping_add(mask as usize) as u64 & !mask) as *mut libc::c_void,
304                     Some(orig_addr),
305                     Some(orig_size),
306                 )
307             }
308         };
309 
310         // If fd is provided, validate fd offset is within bounds. If not, it's anonymous mapping
311         // and set the (ANONYMOUS | NORESERVE) flag.
312         let (fd, offset) = match fd {
313             Some((fd, offset)) => {
314                 if offset > libc::off64_t::max_value() as u64 {
315                     return Err(Error::InvalidOffset);
316                 }
317                 // Map private for read-only seal. See below for upstream relax of the restriction.
318                 // - https://lore.kernel.org/bpf/20231013103208.kdffpyerufr4ygnw@quack3/T/
319                 // SAFETY:
320                 // Safe because no third parameter is expected and we check the return result.
321                 let seals = unsafe { libc::fcntl(fd.as_raw_descriptor(), libc::F_GET_SEALS) };
322                 if (seals >= 0) && (seals & libc::F_SEAL_WRITE != 0) {
323                     flags &= !libc::MAP_SHARED;
324                     flags |= libc::MAP_PRIVATE;
325                 }
326                 (fd.as_raw_descriptor(), offset as libc::off64_t)
327             }
328             None => {
329                 flags |= libc::MAP_ANONYMOUS | libc::MAP_NORESERVE;
330                 (-1, 0)
331             }
332         };
333         let addr = libc::mmap64(addr, size, prot, flags, fd, offset);
334         if addr == libc::MAP_FAILED {
335             return Err(Error::SystemCallFailed(ErrnoError::last()));
336         }
337 
338         // If an original mmap exists, we can now remove the unused regions
339         if let Some(orig_addr) = orig_addr {
340             let mut unmap_start = orig_addr as usize;
341             let mut unmap_end = addr as usize;
342             let mut unmap_size = unmap_end - unmap_start;
343 
344             if unmap_size > 0 {
345                 libc::munmap(orig_addr, unmap_size);
346             }
347 
348             unmap_start = addr as usize + size;
349             unmap_end = orig_addr as usize + orig_size.unwrap();
350             unmap_size = unmap_end - unmap_start;
351 
352             if unmap_size > 0 {
353                 libc::munmap(unmap_start as *mut libc::c_void, unmap_size);
354             }
355         }
356 
357         // This is safe because we call madvise with a valid address and size.
358         let _ = libc::madvise(addr, size, libc::MADV_DONTDUMP);
359 
360         // This is safe because KSM's only userspace visible effects are timing
361         // and memory consumption; it doesn't affect rust safety semantics.
362         // KSM is also disabled by default, and this flag is only a hint.
363         let _ = libc::madvise(addr, size, libc::MADV_MERGEABLE);
364 
365         Ok(MemoryMapping {
366             addr: addr as *mut u8,
367             size,
368         })
369     }
370 
371     /// Madvise the kernel to unmap on fork.
use_dontfork(&self) -> Result<()>372     pub fn use_dontfork(&self) -> Result<()> {
373         // SAFETY:
374         // This is safe because we call madvise with a valid address and size, and we check the
375         // return value.
376         let ret = unsafe {
377             libc::madvise(
378                 self.as_ptr() as *mut libc::c_void,
379                 self.size(),
380                 libc::MADV_DONTFORK,
381             )
382         };
383         if ret == -1 {
384             Err(Error::SystemCallFailed(ErrnoError::last()))
385         } else {
386             Ok(())
387         }
388     }
389 
390     /// Madvise the kernel to use Huge Pages for this mapping.
use_hugepages(&self) -> Result<()>391     pub fn use_hugepages(&self) -> Result<()> {
392         const SZ_2M: usize = 2 * 1024 * 1024;
393 
394         // THP uses 2M pages, so use THP only on mappings that are at least
395         // 2M in size.
396         if self.size() < SZ_2M {
397             return Ok(());
398         }
399 
400         // SAFETY:
401         // This is safe because we call madvise with a valid address and size, and we check the
402         // return value.
403         let ret = unsafe {
404             libc::madvise(
405                 self.as_ptr() as *mut libc::c_void,
406                 self.size(),
407                 libc::MADV_HUGEPAGE,
408             )
409         };
410         if ret == -1 {
411             Err(Error::SystemCallFailed(ErrnoError::last()))
412         } else {
413             Ok(())
414         }
415     }
416 
417     /// Calls msync with MS_SYNC on the mapping.
msync(&self) -> Result<()>418     pub fn msync(&self) -> Result<()> {
419         // SAFETY:
420         // This is safe since we use the exact address and length of a known
421         // good memory mapping.
422         let ret = unsafe {
423             libc::msync(
424                 self.as_ptr() as *mut libc::c_void,
425                 self.size(),
426                 libc::MS_SYNC,
427             )
428         };
429         if ret == -1 {
430             return Err(Error::SystemCallFailed(ErrnoError::last()));
431         }
432         Ok(())
433     }
434 
435     /// Uses madvise to tell the kernel to remove the specified range.  Subsequent reads
436     /// to the pages in the range will return zero bytes.
remove_range(&self, mem_offset: usize, count: usize) -> Result<()>437     pub fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()> {
438         self.range_end(mem_offset, count)
439             .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
440         // SAFETY: Safe because all the args to madvise are valid and the return
441         // value is checked.
442         let ret = unsafe {
443             // madvising away the region is the same as the guest changing it.
444             // Next time it is read, it may return zero pages.
445             libc::madvise(
446                 (self.addr as usize + mem_offset) as *mut _,
447                 count,
448                 libc::MADV_REMOVE,
449             )
450         };
451         if ret < 0 {
452             Err(Error::SystemCallFailed(super::Error::last()))
453         } else {
454             Ok(())
455         }
456     }
457 
458     /// Tell the kernel to readahead the range.
459     ///
460     /// This does not block the thread by I/O wait from reading the backed file. This does not
461     /// guarantee that the pages are surely present unless the pages are mlock(2)ed by
462     /// `lock_on_fault_unchecked()`.
463     ///
464     /// The `mem_offset` and `count` must be validated by caller.
465     ///
466     /// # Arguments
467     ///
468     /// * `mem_offset` - The offset of the head of the range.
469     /// * `count` - The size in bytes of the range.
async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>470     pub fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()> {
471         // Validation
472         self.range_end(mem_offset, count)
473             .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
474         // SAFETY:
475         // Safe because populating the pages from the backed file does not affect the Rust memory
476         // safety.
477         let ret = unsafe {
478             libc::madvise(
479                 (self.addr as usize + mem_offset) as *mut _,
480                 count,
481                 libc::MADV_WILLNEED,
482             )
483         };
484         if ret < 0 {
485             Err(Error::SystemCallFailed(super::Error::last()))
486         } else {
487             Ok(())
488         }
489     }
490 
491     /// Tell the kernel to drop the page cache.
492     ///
493     /// This cannot be applied to locked pages.
494     ///
495     /// The `mem_offset` and `count` must be validated by caller.
496     ///
497     /// NOTE: This function has destructive semantics. It throws away data in the page cache without
498     /// writing it to the backing file. If the data is important, the caller should ensure it is
499     /// written to disk before calling this function or should use MADV_PAGEOUT instead.
500     ///
501     /// # Arguments
502     ///
503     /// * `mem_offset` - The offset of the head of the range.
504     /// * `count` - The size in bytes of the range.
drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>505     pub fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()> {
506         // Validation
507         self.range_end(mem_offset, count)
508             .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
509         // SAFETY:
510         // Safe because dropping the page cache does not affect the Rust memory safety.
511         let ret = unsafe {
512             libc::madvise(
513                 (self.addr as usize + mem_offset) as *mut _,
514                 count,
515                 libc::MADV_DONTNEED,
516             )
517         };
518         if ret < 0 {
519             Err(Error::SystemCallFailed(super::Error::last()))
520         } else {
521             Ok(())
522         }
523     }
524 
525     /// Lock the resident pages in the range not to be swapped out.
526     ///
527     /// The remaining nonresident page are locked when they are populated.
528     ///
529     /// The `mem_offset` and `count` must be validated by caller.
530     ///
531     /// # Arguments
532     ///
533     /// * `mem_offset` - The offset of the head of the range.
534     /// * `count` - The size in bytes of the range.
lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>535     pub fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()> {
536         // Validation
537         self.range_end(mem_offset, count)
538             .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
539         let addr = self.addr as usize + mem_offset;
540         // SAFETY:
541         // Safe because MLOCK_ONFAULT only affects the swap behavior of the kernel, so it has no
542         // impact on rust semantics.
543         // let ret = unsafe { libc::mlock2(addr as *mut _, count, libc::MLOCK_ONFAULT) };
544         // ANDROID(b/274805769): android glibc doesn't have mlock2, so we need to make the syscall directly.
545         let ret = unsafe {
546             libc::syscall(
547                 libc::SYS_mlock2,
548                 addr as *mut libc::c_void,
549                 count,
550                 libc::MLOCK_ONFAULT,
551             )
552         };
553         if ret < 0 {
554             let errno = super::Error::last();
555             warn!(
556                 "failed to mlock at {:#x} with length {}: {}",
557                 addr as u64,
558                 self.size(),
559                 errno,
560             );
561             Err(Error::SystemCallFailed(errno))
562         } else {
563             Ok(())
564         }
565     }
566 
567     /// Unlock the range of pages.
568     ///
569     /// Unlocking non-locked pages does not fail.
570     ///
571     /// The `mem_offset` and `count` must be validated by caller.
572     ///
573     /// # Arguments
574     ///
575     /// * `mem_offset` - The offset of the head of the range.
576     /// * `count` - The size in bytes of the range.
unlock(&self, mem_offset: usize, count: usize) -> Result<()>577     pub fn unlock(&self, mem_offset: usize, count: usize) -> Result<()> {
578         // Validation
579         self.range_end(mem_offset, count)
580             .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
581         // SAFETY:
582         // Safe because munlock(2) does not affect the Rust memory safety.
583         let ret = unsafe { libc::munlock((self.addr as usize + mem_offset) as *mut _, count) };
584         if ret < 0 {
585             Err(Error::SystemCallFailed(super::Error::last()))
586         } else {
587             Ok(())
588         }
589     }
590 
591     // Check that offset+count is valid and return the sum.
range_end(&self, offset: usize, count: usize) -> Result<usize>592     pub(crate) fn range_end(&self, offset: usize, count: usize) -> Result<usize> {
593         let mem_end = offset.checked_add(count).ok_or(Error::InvalidAddress)?;
594         if mem_end > self.size() {
595             return Err(Error::InvalidAddress);
596         }
597         Ok(mem_end)
598     }
599 }
600 
601 // SAFETY:
602 // Safe because the pointer and size point to a memory range owned by this MemoryMapping that won't
603 // be unmapped until it's Dropped.
604 unsafe impl MappedRegion for MemoryMapping {
as_ptr(&self) -> *mut u8605     fn as_ptr(&self) -> *mut u8 {
606         self.addr
607     }
608 
size(&self) -> usize609     fn size(&self) -> usize {
610         self.size
611     }
612 }
613 
614 impl Drop for MemoryMapping {
drop(&mut self)615     fn drop(&mut self) {
616         // SAFETY:
617         // This is safe because we mmap the area at addr ourselves, and nobody
618         // else is holding a reference to it.
619         unsafe {
620             libc::munmap(self.addr as *mut libc::c_void, self.size);
621         }
622     }
623 }
624 
625 /// Tracks Fixed Memory Maps within an anonymous memory-mapped fixed-sized arena
626 /// in the current process.
627 pub struct MemoryMappingArena {
628     addr: *mut u8,
629     size: usize,
630 }
631 
632 // SAFETY:
633 // Send and Sync aren't automatically inherited for the raw address pointer.
634 // Accessing that pointer is only done through the stateless interface which
635 // allows the object to be shared by multiple threads without a decrease in
636 // safety.
637 unsafe impl Send for MemoryMappingArena {}
638 // SAFETY: See safety comments for impl Send
639 unsafe impl Sync for MemoryMappingArena {}
640 
641 impl MemoryMappingArena {
642     /// Creates an mmap arena of `size` bytes.
643     ///
644     /// # Arguments
645     /// * `size` - Size of memory region in bytes.
new(size: usize) -> Result<MemoryMappingArena>646     pub fn new(size: usize) -> Result<MemoryMappingArena> {
647         // Reserve the arena's memory using an anonymous read-only mmap.
648         MemoryMapping::new_protection(size, None, Protection::read()).map(From::from)
649     }
650 
651     /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena
652     /// with `prot` protections. `offset` must be page aligned.
653     ///
654     /// # Arguments
655     /// * `offset` - Page aligned offset into the arena in bytes.
656     /// * `size` - Size of memory region in bytes.
657     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
add_anon_protection( &mut self, offset: usize, size: usize, prot: Protection, ) -> Result<()>658     pub fn add_anon_protection(
659         &mut self,
660         offset: usize,
661         size: usize,
662         prot: Protection,
663     ) -> Result<()> {
664         self.try_add(offset, size, prot, None)
665     }
666 
667     /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena.
668     /// `offset` must be page aligned.
669     ///
670     /// # Arguments
671     /// * `offset` - Page aligned offset into the arena in bytes.
672     /// * `size` - Size of memory region in bytes.
add_anon(&mut self, offset: usize, size: usize) -> Result<()>673     pub fn add_anon(&mut self, offset: usize, size: usize) -> Result<()> {
674         self.add_anon_protection(offset, size, Protection::read_write())
675     }
676 
677     /// Maps `size` bytes from the start of the given `fd` at `offset` bytes from
678     /// the start of the arena. `offset` must be page aligned.
679     ///
680     /// # Arguments
681     /// * `offset` - Page aligned offset into the arena in bytes.
682     /// * `size` - Size of memory region in bytes.
683     /// * `fd` - File descriptor to mmap from.
add_fd(&mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor) -> Result<()>684     pub fn add_fd(&mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor) -> Result<()> {
685         self.add_fd_offset(offset, size, fd, 0)
686     }
687 
688     /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
689     /// at `offset` bytes from the start of the arena. `offset` must be page aligned.
690     ///
691     /// # Arguments
692     /// * `offset` - Page aligned offset into the arena in bytes.
693     /// * `size` - Size of memory region in bytes.
694     /// * `fd` - File descriptor to mmap from.
695     /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
add_fd_offset( &mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, ) -> Result<()>696     pub fn add_fd_offset(
697         &mut self,
698         offset: usize,
699         size: usize,
700         fd: &dyn AsRawDescriptor,
701         fd_offset: u64,
702     ) -> Result<()> {
703         self.add_fd_offset_protection(offset, size, fd, fd_offset, Protection::read_write())
704     }
705 
706     /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
707     /// at `offset` bytes from the start of the arena with `prot` protections.
708     /// `offset` must be page aligned.
709     ///
710     /// # Arguments
711     /// * `offset` - Page aligned offset into the arena in bytes.
712     /// * `size` - Size of memory region in bytes.
713     /// * `fd` - File descriptor to mmap from.
714     /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
715     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
add_fd_offset_protection( &mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>716     pub fn add_fd_offset_protection(
717         &mut self,
718         offset: usize,
719         size: usize,
720         fd: &dyn AsRawDescriptor,
721         fd_offset: u64,
722         prot: Protection,
723     ) -> Result<()> {
724         self.try_add(offset, size, prot, Some((fd, fd_offset)))
725     }
726 
727     /// Helper method that calls appropriate MemoryMapping constructor and adds
728     /// the resulting map into the arena.
try_add( &mut self, offset: usize, size: usize, prot: Protection, fd: Option<(&dyn AsRawDescriptor, u64)>, ) -> Result<()>729     fn try_add(
730         &mut self,
731         offset: usize,
732         size: usize,
733         prot: Protection,
734         fd: Option<(&dyn AsRawDescriptor, u64)>,
735     ) -> Result<()> {
736         // Ensure offset is page-aligned
737         if offset % pagesize() != 0 {
738             return Err(Error::NotPageAligned);
739         }
740         validate_includes_range(self.size(), offset, size)?;
741 
742         // SAFETY:
743         // This is safe since the range has been validated.
744         let mmap = unsafe {
745             match fd {
746                 Some((fd, fd_offset)) => MemoryMapping::from_descriptor_offset_protection_fixed(
747                     self.addr.add(offset),
748                     fd,
749                     size,
750                     fd_offset,
751                     prot,
752                 )?,
753                 None => MemoryMapping::new_protection_fixed(self.addr.add(offset), size, prot)?,
754             }
755         };
756 
757         // This mapping will get automatically removed when we drop the whole arena.
758         std::mem::forget(mmap);
759         Ok(())
760     }
761 
762     /// Removes `size` bytes at `offset` bytes from the start of the arena. `offset` must be page
763     /// aligned.
764     ///
765     /// # Arguments
766     /// * `offset` - Page aligned offset into the arena in bytes.
767     /// * `size` - Size of memory region in bytes.
remove(&mut self, offset: usize, size: usize) -> Result<()>768     pub fn remove(&mut self, offset: usize, size: usize) -> Result<()> {
769         self.try_add(offset, size, Protection::read(), None)
770     }
771 }
772 
773 // SAFETY:
774 // Safe because the pointer and size point to a memory range owned by this MemoryMappingArena that
775 // won't be unmapped until it's Dropped.
776 unsafe impl MappedRegion for MemoryMappingArena {
as_ptr(&self) -> *mut u8777     fn as_ptr(&self) -> *mut u8 {
778         self.addr
779     }
780 
size(&self) -> usize781     fn size(&self) -> usize {
782         self.size
783     }
784 
add_fd_mapping( &mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>785     fn add_fd_mapping(
786         &mut self,
787         offset: usize,
788         size: usize,
789         fd: &dyn AsRawDescriptor,
790         fd_offset: u64,
791         prot: Protection,
792     ) -> Result<()> {
793         self.add_fd_offset_protection(offset, size, fd, fd_offset, prot)
794     }
795 
remove_mapping(&mut self, offset: usize, size: usize) -> Result<()>796     fn remove_mapping(&mut self, offset: usize, size: usize) -> Result<()> {
797         self.remove(offset, size)
798     }
799 }
800 
801 impl From<MemoryMapping> for MemoryMappingArena {
from(mmap: MemoryMapping) -> Self802     fn from(mmap: MemoryMapping) -> Self {
803         let addr = mmap.as_ptr();
804         let size = mmap.size();
805 
806         // Forget the original mapping because the `MemoryMappingArena` will take care of calling
807         // `munmap` when it is dropped.
808         std::mem::forget(mmap);
809         MemoryMappingArena { addr, size }
810     }
811 }
812 
813 impl From<CrateMemoryMapping> for MemoryMappingArena {
from(mmap: CrateMemoryMapping) -> Self814     fn from(mmap: CrateMemoryMapping) -> Self {
815         MemoryMappingArena::from(mmap.mapping)
816     }
817 }
818 
819 impl Drop for MemoryMappingArena {
drop(&mut self)820     fn drop(&mut self) {
821         // SAFETY:
822         // This is safe because we own this memory range, and nobody else is holding a reference to
823         // it.
824         unsafe {
825             libc::munmap(self.addr as *mut libc::c_void, self.size);
826         }
827     }
828 }
829 
830 impl CrateMemoryMapping {
use_dontfork(&self) -> Result<()>831     pub fn use_dontfork(&self) -> Result<()> {
832         self.mapping.use_dontfork()
833     }
834 
use_hugepages(&self) -> Result<()>835     pub fn use_hugepages(&self) -> Result<()> {
836         self.mapping.use_hugepages()
837     }
838 
from_raw_ptr(addr: RawDescriptor, size: usize) -> Result<CrateMemoryMapping>839     pub fn from_raw_ptr(addr: RawDescriptor, size: usize) -> Result<CrateMemoryMapping> {
840         MemoryMapping::from_fd_offset(&Descriptor(addr), size, 0).map(|mapping| {
841             CrateMemoryMapping {
842                 mapping,
843                 _file_descriptor: None,
844             }
845         })
846     }
847 }
848 
849 pub trait MemoryMappingUnix {
850     /// Remove the specified range from the mapping.
remove_range(&self, mem_offset: usize, count: usize) -> Result<()>851     fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()>;
852     /// Tell the kernel to readahead the range.
async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>853     fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>;
854     /// Tell the kernel to drop the page cache.
drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>855     fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>;
856     /// Lock the resident pages in the range not to be swapped out.
lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>857     fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>;
858     /// Unlock the range of pages.
unlock(&self, mem_offset: usize, count: usize) -> Result<()>859     fn unlock(&self, mem_offset: usize, count: usize) -> Result<()>;
860     /// Disable host swap for this mapping.
lock_all(&self) -> Result<()>861     fn lock_all(&self) -> Result<()>;
862 }
863 
864 impl MemoryMappingUnix for CrateMemoryMapping {
remove_range(&self, mem_offset: usize, count: usize) -> Result<()>865     fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()> {
866         self.mapping.remove_range(mem_offset, count)
867     }
async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>868     fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()> {
869         self.mapping.async_prefetch(mem_offset, count)
870     }
drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>871     fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()> {
872         self.mapping.drop_page_cache(mem_offset, count)
873     }
lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>874     fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()> {
875         self.mapping.lock_on_fault(mem_offset, count)
876     }
unlock(&self, mem_offset: usize, count: usize) -> Result<()>877     fn unlock(&self, mem_offset: usize, count: usize) -> Result<()> {
878         self.mapping.unlock(mem_offset, count)
879     }
lock_all(&self) -> Result<()>880     fn lock_all(&self) -> Result<()> {
881         self.mapping.lock_on_fault(0, self.mapping.size())
882     }
883 }
884 
885 pub trait MemoryMappingBuilderUnix<'a> {
886     #[allow(clippy::wrong_self_convention)]
from_descriptor(self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder887     fn from_descriptor(self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder;
888 }
889 
890 impl<'a> MemoryMappingBuilderUnix<'a> for MemoryMappingBuilder<'a> {
891     /// Build the memory mapping given the specified descriptor to mapped memory
892     ///
893     /// Default: Create a new memory mapping.
894     #[allow(clippy::wrong_self_convention)]
from_descriptor(mut self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder895     fn from_descriptor(mut self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder {
896         self.descriptor = Some(descriptor);
897         self
898     }
899 }
900 
901 impl<'a> MemoryMappingBuilder<'a> {
902     /// Request that the mapped pages are pre-populated
903     ///
904     /// Default: Do not populate
populate(mut self) -> MemoryMappingBuilder<'a>905     pub fn populate(mut self) -> MemoryMappingBuilder<'a> {
906         self.populate = true;
907         self
908     }
909 
910     /// Build a MemoryMapping from the provided options.
build(self) -> Result<CrateMemoryMapping>911     pub fn build(self) -> Result<CrateMemoryMapping> {
912         match self.descriptor {
913             None => {
914                 if self.populate {
915                     // Population not supported for new mmaps
916                     return Err(Error::InvalidArgument);
917                 }
918                 MemoryMappingBuilder::wrap(
919                     MemoryMapping::new_protection(
920                         self.size,
921                         self.align,
922                         self.protection.unwrap_or_else(Protection::read_write),
923                     )?,
924                     None,
925                 )
926             }
927             Some(descriptor) => MemoryMappingBuilder::wrap(
928                 MemoryMapping::from_fd_offset_protection_populate(
929                     descriptor,
930                     self.size,
931                     self.offset.unwrap_or(0),
932                     self.align.unwrap_or(0),
933                     self.protection.unwrap_or_else(Protection::read_write),
934                     self.populate,
935                 )?,
936                 None,
937             ),
938         }
939     }
940 
wrap( mapping: MemoryMapping, file_descriptor: Option<&'a dyn AsRawDescriptor>, ) -> Result<CrateMemoryMapping>941     pub(crate) fn wrap(
942         mapping: MemoryMapping,
943         file_descriptor: Option<&'a dyn AsRawDescriptor>,
944     ) -> Result<CrateMemoryMapping> {
945         let file_descriptor = match file_descriptor {
946             Some(descriptor) => Some(
947                 SafeDescriptor::try_from(descriptor)
948                     .map_err(|_| Error::SystemCallFailed(ErrnoError::last()))?,
949             ),
950             None => None,
951         };
952         Ok(CrateMemoryMapping {
953             mapping,
954             _file_descriptor: file_descriptor,
955         })
956     }
957 }
958 
959 #[cfg(test)]
960 mod tests {
961     use tempfile::tempfile;
962 
963     use super::*;
964     use crate::descriptor::Descriptor;
965     use crate::VolatileMemory;
966     use crate::VolatileMemoryError;
967 
968     #[test]
basic_map()969     fn basic_map() {
970         let m = MemoryMappingBuilder::new(1024).build().unwrap();
971         assert_eq!(1024, m.size());
972     }
973 
974     #[test]
map_invalid_size()975     fn map_invalid_size() {
976         let res = MemoryMappingBuilder::new(0).build().unwrap_err();
977         if let Error::SystemCallFailed(e) = res {
978             assert_eq!(e.errno(), libc::EINVAL);
979         } else {
980             panic!("unexpected error: {}", res);
981         }
982     }
983 
984     #[test]
map_invalid_fd()985     fn map_invalid_fd() {
986         let fd = Descriptor(-1);
987         let res = MemoryMapping::from_fd(&fd, 1024).unwrap_err();
988         if let Error::SystemCallFailed(e) = res {
989             assert_eq!(e.errno(), libc::EBADF);
990         } else {
991             panic!("unexpected error: {}", res);
992         }
993     }
994 
995     #[test]
test_write_past_end()996     fn test_write_past_end() {
997         let m = MemoryMappingBuilder::new(5).build().unwrap();
998         let res = m.write_slice(&[1, 2, 3, 4, 5, 6], 0);
999         assert!(res.is_ok());
1000         assert_eq!(res.unwrap(), 5);
1001     }
1002 
1003     #[test]
slice_size()1004     fn slice_size() {
1005         let m = MemoryMappingBuilder::new(5).build().unwrap();
1006         let s = m.get_slice(2, 3).unwrap();
1007         assert_eq!(s.size(), 3);
1008     }
1009 
1010     #[test]
slice_addr()1011     fn slice_addr() {
1012         let m = MemoryMappingBuilder::new(5).build().unwrap();
1013         let s = m.get_slice(2, 3).unwrap();
1014         // SAFETY: all addresses are known to exist.
1015         assert_eq!(s.as_ptr(), unsafe { m.as_ptr().offset(2) });
1016     }
1017 
1018     #[test]
slice_overflow_error()1019     fn slice_overflow_error() {
1020         let m = MemoryMappingBuilder::new(5).build().unwrap();
1021         let res = m.get_slice(std::usize::MAX, 3).unwrap_err();
1022         assert_eq!(
1023             res,
1024             VolatileMemoryError::Overflow {
1025                 base: std::usize::MAX,
1026                 offset: 3,
1027             }
1028         );
1029     }
1030     #[test]
slice_oob_error()1031     fn slice_oob_error() {
1032         let m = MemoryMappingBuilder::new(5).build().unwrap();
1033         let res = m.get_slice(3, 3).unwrap_err();
1034         assert_eq!(res, VolatileMemoryError::OutOfBounds { addr: 6 });
1035     }
1036 
1037     #[test]
from_fd_offset_invalid()1038     fn from_fd_offset_invalid() {
1039         let fd = tempfile().unwrap();
1040         let res = MemoryMapping::from_fd_offset(&fd, 4096, (libc::off64_t::max_value() as u64) + 1)
1041             .unwrap_err();
1042         match res {
1043             Error::InvalidOffset => {}
1044             e => panic!("unexpected error: {}", e),
1045         }
1046     }
1047 
1048     #[test]
arena_new()1049     fn arena_new() {
1050         let m = MemoryMappingArena::new(0x40000).unwrap();
1051         assert_eq!(m.size(), 0x40000);
1052     }
1053 
1054     #[test]
arena_add()1055     fn arena_add() {
1056         let mut m = MemoryMappingArena::new(0x40000).unwrap();
1057         assert!(m.add_anon(0, pagesize() * 4).is_ok());
1058     }
1059 
1060     #[test]
arena_remove()1061     fn arena_remove() {
1062         let mut m = MemoryMappingArena::new(0x40000).unwrap();
1063         assert!(m.add_anon(0, pagesize() * 4).is_ok());
1064         assert!(m.remove(0, pagesize()).is_ok());
1065         assert!(m.remove(0, pagesize() * 2).is_ok());
1066     }
1067 
1068     #[test]
arena_add_alignment_error()1069     fn arena_add_alignment_error() {
1070         let mut m = MemoryMappingArena::new(pagesize() * 2).unwrap();
1071         assert!(m.add_anon(0, 0x100).is_ok());
1072         let res = m.add_anon(pagesize() + 1, 0x100).unwrap_err();
1073         match res {
1074             Error::NotPageAligned => {}
1075             e => panic!("unexpected error: {}", e),
1076         }
1077     }
1078 
1079     #[test]
arena_add_oob_error()1080     fn arena_add_oob_error() {
1081         let mut m = MemoryMappingArena::new(pagesize()).unwrap();
1082         let res = m.add_anon(0, pagesize() + 1).unwrap_err();
1083         match res {
1084             Error::InvalidAddress => {}
1085             e => panic!("unexpected error: {}", e),
1086         }
1087     }
1088 
1089     #[test]
arena_add_overlapping()1090     fn arena_add_overlapping() {
1091         let ps = pagesize();
1092         let mut m =
1093             MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1094         m.add_anon(ps * 4, ps * 4)
1095             .expect("failed to add sub-mapping");
1096 
1097         // Overlap in the front.
1098         m.add_anon(ps * 2, ps * 3)
1099             .expect("failed to add front overlapping sub-mapping");
1100 
1101         // Overlap in the back.
1102         m.add_anon(ps * 7, ps * 3)
1103             .expect("failed to add back overlapping sub-mapping");
1104 
1105         // Overlap the back of the first mapping, all of the middle mapping, and the front of the
1106         // last mapping.
1107         m.add_anon(ps * 3, ps * 6)
1108             .expect("failed to add mapping that overlaps several mappings");
1109     }
1110 
1111     #[test]
arena_remove_overlapping()1112     fn arena_remove_overlapping() {
1113         let ps = pagesize();
1114         let mut m =
1115             MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1116         m.add_anon(ps * 4, ps * 4)
1117             .expect("failed to add sub-mapping");
1118         m.add_anon(ps * 2, ps * 2)
1119             .expect("failed to add front overlapping sub-mapping");
1120         m.add_anon(ps * 8, ps * 2)
1121             .expect("failed to add back overlapping sub-mapping");
1122 
1123         // Remove the back of the first mapping and the front of the second.
1124         m.remove(ps * 3, ps * 2)
1125             .expect("failed to remove front overlapping mapping");
1126 
1127         // Remove the back of the second mapping and the front of the third.
1128         m.remove(ps * 7, ps * 2)
1129             .expect("failed to remove back overlapping mapping");
1130 
1131         // Remove a mapping that completely overlaps the middle mapping.
1132         m.remove(ps * 5, ps * 2)
1133             .expect("failed to remove fully overlapping mapping");
1134     }
1135 
1136     #[test]
arena_remove_unaligned()1137     fn arena_remove_unaligned() {
1138         let ps = pagesize();
1139         let mut m =
1140             MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1141 
1142         m.add_anon(0, ps).expect("failed to add mapping");
1143         m.remove(0, ps - 1)
1144             .expect("failed to remove unaligned mapping");
1145     }
1146 
1147     #[test]
arena_msync()1148     fn arena_msync() {
1149         let size = 0x40000;
1150         let m = MemoryMappingArena::new(size).unwrap();
1151         let ps = pagesize();
1152         <dyn MappedRegion>::msync(&m, 0, ps).unwrap();
1153         <dyn MappedRegion>::msync(&m, 0, size).unwrap();
1154         <dyn MappedRegion>::msync(&m, ps, size - ps).unwrap();
1155         let res = <dyn MappedRegion>::msync(&m, ps, size).unwrap_err();
1156         match res {
1157             Error::InvalidAddress => {}
1158             e => panic!("unexpected error: {}", e),
1159         }
1160     }
1161 }
1162