• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
6 mod aarch64;
7 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8 pub use aarch64::*;
9 
10 #[cfg(target_arch = "riscv64")]
11 mod riscv64;
12 
13 #[cfg(target_arch = "x86_64")]
14 mod x86_64;
15 
16 use std::cmp::min;
17 use std::cmp::Reverse;
18 use std::collections::BTreeMap;
19 use std::collections::BinaryHeap;
20 use std::convert::TryFrom;
21 use std::ffi::CString;
22 use std::os::raw::c_ulong;
23 use std::os::raw::c_void;
24 use std::os::unix::prelude::OsStrExt;
25 use std::path::Path;
26 use std::path::PathBuf;
27 use std::ptr::copy_nonoverlapping;
28 use std::sync::Arc;
29 
30 use base::errno_result;
31 use base::error;
32 use base::ioctl;
33 use base::ioctl_with_mut_ref;
34 use base::ioctl_with_ref;
35 use base::ioctl_with_val;
36 use base::linux::MemoryMappingBuilderUnix;
37 use base::pagesize;
38 use base::AsRawDescriptor;
39 use base::Error;
40 use base::Event;
41 use base::FromRawDescriptor;
42 use base::MappedRegion;
43 use base::MemoryMapping;
44 use base::MemoryMappingBuilder;
45 use base::MmapError;
46 use base::Protection;
47 use base::RawDescriptor;
48 use base::Result;
49 use base::SafeDescriptor;
50 use data_model::vec_with_array_field;
51 use kvm_sys::*;
52 use libc::open64;
53 use libc::EFAULT;
54 use libc::EINVAL;
55 use libc::EIO;
56 use libc::ENOENT;
57 use libc::ENOSPC;
58 use libc::ENOSYS;
59 use libc::EOVERFLOW;
60 use libc::O_CLOEXEC;
61 use libc::O_RDWR;
62 #[cfg(target_arch = "riscv64")]
63 use riscv64::*;
64 use sync::Mutex;
65 use vm_memory::GuestAddress;
66 use vm_memory::GuestMemory;
67 #[cfg(target_arch = "x86_64")]
68 pub use x86_64::*;
69 
70 use crate::BalloonEvent;
71 use crate::ClockState;
72 use crate::Config;
73 use crate::Datamatch;
74 use crate::DeviceKind;
75 use crate::HypervHypercall;
76 use crate::Hypervisor;
77 use crate::HypervisorCap;
78 use crate::IoEventAddress;
79 use crate::IoOperation;
80 use crate::IoParams;
81 use crate::IrqRoute;
82 use crate::IrqSource;
83 use crate::MPState;
84 use crate::MemCacheType;
85 use crate::MemSlot;
86 use crate::Vcpu;
87 use crate::VcpuExit;
88 use crate::VcpuSignalHandle;
89 use crate::VcpuSignalHandleInner;
90 use crate::Vm;
91 use crate::VmCap;
92 
93 // Wrapper around KVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping
94 // from guest physical to host user pages.
95 //
96 // SAFETY:
97 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( descriptor: &SafeDescriptor, slot: MemSlot, read_only: bool, log_dirty_pages: bool, cache: MemCacheType, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>98 unsafe fn set_user_memory_region(
99     descriptor: &SafeDescriptor,
100     slot: MemSlot,
101     read_only: bool,
102     log_dirty_pages: bool,
103     cache: MemCacheType,
104     guest_addr: u64,
105     memory_size: u64,
106     userspace_addr: *mut u8,
107 ) -> Result<()> {
108     let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
109     if log_dirty_pages {
110         flags |= KVM_MEM_LOG_DIRTY_PAGES;
111     }
112     if cache == MemCacheType::CacheNonCoherent {
113         flags |= KVM_MEM_NON_COHERENT_DMA;
114     }
115     let region = kvm_userspace_memory_region {
116         slot,
117         flags,
118         guest_phys_addr: guest_addr,
119         memory_size,
120         userspace_addr: userspace_addr as u64,
121     };
122 
123     let ret = ioctl_with_ref(descriptor, KVM_SET_USER_MEMORY_REGION(), &region);
124     if ret == 0 {
125         Ok(())
126     } else {
127         errno_result()
128     }
129 }
130 
131 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
132 /// size.
133 ///
134 /// # Arguments
135 ///
136 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize137 pub fn dirty_log_bitmap_size(size: usize) -> usize {
138     let page_size = pagesize();
139     (((size + page_size - 1) / page_size) + 7) / 8
140 }
141 
142 pub struct Kvm {
143     kvm: SafeDescriptor,
144 }
145 
146 pub type KvmCap = kvm::Cap;
147 
148 impl Kvm {
new_with_path(device_path: &Path) -> Result<Kvm>149     pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
150         let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
151         // SAFETY:
152         // Open calls are safe because we give a nul-terminated string and verify the result.
153         let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
154         if ret < 0 {
155             return errno_result();
156         }
157         // SAFETY:
158         // Safe because we verify that ret is valid and we own the fd.
159         let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
160 
161         // SAFETY:
162         // Safe because we know that the descriptor is valid and we verify the return result.
163         let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION()) };
164         if version < 0 {
165             return errno_result();
166         }
167 
168         // Per the kernel KVM API documentation: "Applications should refuse to run if
169         // KVM_GET_API_VERSION returns a value other than 12."
170         if version as u32 != KVM_API_VERSION {
171             error!(
172                 "KVM_GET_API_VERSION: expected {}, got {}",
173                 KVM_API_VERSION, version,
174             );
175             return Err(Error::new(ENOSYS));
176         }
177 
178         Ok(Kvm { kvm })
179     }
180 
181     /// Opens `/dev/kvm/` and returns a Kvm object on success.
new() -> Result<Kvm>182     pub fn new() -> Result<Kvm> {
183         Kvm::new_with_path(&PathBuf::from("/dev/kvm"))
184     }
185 
186     /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>187     pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
188         // SAFETY:
189         // Safe because we know that our file is a KVM fd and we verify the return result.
190         let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) };
191         if res > 0 {
192             Ok(res as usize)
193         } else {
194             errno_result()
195         }
196     }
197 }
198 
199 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor200     fn as_raw_descriptor(&self) -> RawDescriptor {
201         self.kvm.as_raw_descriptor()
202     }
203 }
204 
205 impl Hypervisor for Kvm {
try_clone(&self) -> Result<Self>206     fn try_clone(&self) -> Result<Self> {
207         Ok(Kvm {
208             kvm: self.kvm.try_clone()?,
209         })
210     }
211 
check_capability(&self, cap: HypervisorCap) -> bool212     fn check_capability(&self, cap: HypervisorCap) -> bool {
213         if let Ok(kvm_cap) = KvmCap::try_from(cap) {
214             // SAFETY:
215             // this ioctl is safe because we know this kvm descriptor is valid,
216             // and we are copying over the kvm capability (u32) as a c_ulong value.
217             unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), kvm_cap as c_ulong) == 1 }
218         } else {
219             // this capability cannot be converted on this platform, so return false
220             false
221         }
222     }
223 }
224 
225 /// A wrapper around creating and using a KVM VM.
226 pub struct KvmVm {
227     kvm: Kvm,
228     vm: SafeDescriptor,
229     guest_mem: GuestMemory,
230     mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
231     /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
232     mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
233 }
234 
235 impl KvmVm {
236     /// Constructs a new `KvmVm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm>237     pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
238         // SAFETY:
239         // Safe because we know kvm is a real kvm fd as this module is the only one that can make
240         // Kvm objects.
241         let ret = unsafe {
242             ioctl_with_val(
243                 kvm,
244                 KVM_CREATE_VM(),
245                 kvm.get_vm_type(cfg.protection_type)? as c_ulong,
246             )
247         };
248         if ret < 0 {
249             return errno_result();
250         }
251         // SAFETY:
252         // Safe because we verify that ret is valid and we own the fd.
253         let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
254         for region in guest_mem.regions() {
255             // SAFETY:
256             // Safe because the guest regions are guaranteed not to overlap.
257             unsafe {
258                 set_user_memory_region(
259                     &vm_descriptor,
260                     region.index as MemSlot,
261                     false,
262                     false,
263                     MemCacheType::CacheCoherent,
264                     region.guest_addr.offset(),
265                     region.size as u64,
266                     region.host_addr as *mut u8,
267                 )
268             }?;
269         }
270 
271         let vm = KvmVm {
272             kvm: kvm.try_clone()?,
273             vm: vm_descriptor,
274             guest_mem,
275             mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
276             mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
277         };
278         vm.init_arch(&cfg)?;
279         Ok(vm)
280     }
281 
create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu>282     pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
283         let run_mmap_size = self.kvm.get_vcpu_mmap_size()?;
284 
285         // SAFETY:
286         // Safe because we know that our file is a VM fd and we verify the return result.
287         let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU(), c_ulong::try_from(id).unwrap()) };
288         if fd < 0 {
289             return errno_result();
290         }
291 
292         // SAFETY:
293         // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
294         // the value of the fd and we own the fd.
295         let vcpu = unsafe { SafeDescriptor::from_raw_descriptor(fd) };
296 
297         // The VCPU mapping is held by an `Arc` inside `KvmVcpu`, and it can also be cloned by
298         // `signal_handle()` for use in `KvmVcpuSignalHandle`. The mapping will not be destroyed
299         // until all references are dropped, so it is safe to reference `kvm_run` fields via the
300         // `as_ptr()` function during either type's lifetime.
301         let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
302             .from_descriptor(&vcpu)
303             .build()
304             .map_err(|_| Error::new(ENOSPC))?;
305 
306         let cap_kvmclock_ctrl = self.check_raw_capability(KvmCap::KvmclockCtrl);
307 
308         Ok(KvmVcpu {
309             kvm: self.kvm.try_clone()?,
310             vm: self.vm.try_clone()?,
311             vcpu,
312             id,
313             cap_kvmclock_ctrl,
314             run_mmap: Arc::new(run_mmap),
315         })
316     }
317 
318     /// Creates an in kernel interrupt controller.
319     ///
320     /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
create_irq_chip(&self) -> Result<()>321     pub fn create_irq_chip(&self) -> Result<()> {
322         // SAFETY:
323         // Safe because we know that our file is a VM fd and we verify the return result.
324         let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) };
325         if ret == 0 {
326             Ok(())
327         } else {
328             errno_result()
329         }
330     }
331 
332     /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
set_irq_line(&self, irq: u32, active: bool) -> Result<()>333     pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
334         let mut irq_level = kvm_irq_level::default();
335         irq_level.__bindgen_anon_1.irq = irq;
336         irq_level.level = active.into();
337 
338         // SAFETY:
339         // Safe because we know that our file is a VM fd, we know the kernel will only read the
340         // correct amount of memory from our pointer, and we verify the return result.
341         let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) };
342         if ret == 0 {
343             Ok(())
344         } else {
345             errno_result()
346         }
347     }
348 
349     /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt`
350     /// ( when not None ) will be triggered when the irqchip is resampled.
register_irqfd( &self, gsi: u32, evt: &Event, resample_evt: Option<&Event>, ) -> Result<()>351     pub fn register_irqfd(
352         &self,
353         gsi: u32,
354         evt: &Event,
355         resample_evt: Option<&Event>,
356     ) -> Result<()> {
357         let mut irqfd = kvm_irqfd {
358             fd: evt.as_raw_descriptor() as u32,
359             gsi,
360             ..Default::default()
361         };
362 
363         if let Some(r_evt) = resample_evt {
364             irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
365             irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
366         }
367 
368         // SAFETY:
369         // Safe because we know that our file is a VM fd, we know the kernel will only read the
370         // correct amount of memory from our pointer, and we verify the return result.
371         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
372         if ret == 0 {
373             Ok(())
374         } else {
375             errno_result()
376         }
377     }
378 
379     /// Unregisters an event that was previously registered with
380     /// `register_irqfd`.
381     ///
382     /// The `evt` and `gsi` pair must be the same as the ones passed into
383     /// `register_irqfd`.
unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()>384     pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
385         let irqfd = kvm_irqfd {
386             fd: evt.as_raw_descriptor() as u32,
387             gsi,
388             flags: KVM_IRQFD_FLAG_DEASSIGN,
389             ..Default::default()
390         };
391         // SAFETY:
392         // Safe because we know that our file is a VM fd, we know the kernel will only read the
393         // correct amount of memory from our pointer, and we verify the return result.
394         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
395         if ret == 0 {
396             Ok(())
397         } else {
398             errno_result()
399         }
400     }
401 
402     /// Sets the GSI routing table, replacing any table set with previous calls to
403     /// `set_gsi_routing`.
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>404     pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
405         let mut irq_routing =
406             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
407         irq_routing[0].nr = routes.len() as u32;
408 
409         // SAFETY:
410         // Safe because we ensured there is enough space in irq_routing to hold the number of
411         // route entries.
412         let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
413         for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
414             *irq_route = kvm_irq_routing_entry::from(route);
415         }
416 
417         // TODO(b/315998194): Add safety comment
418         #[allow(clippy::undocumented_unsafe_blocks)]
419         let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), &irq_routing[0]) };
420         if ret == 0 {
421             Ok(())
422         } else {
423             errno_result()
424         }
425     }
426 
ioeventfd( &self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>427     fn ioeventfd(
428         &self,
429         evt: &Event,
430         addr: IoEventAddress,
431         datamatch: Datamatch,
432         deassign: bool,
433     ) -> Result<()> {
434         let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
435             Datamatch::AnyLength => (false, 0, 0),
436             Datamatch::U8(v) => match v {
437                 Some(u) => (true, u as u64, 1),
438                 None => (false, 0, 1),
439             },
440             Datamatch::U16(v) => match v {
441                 Some(u) => (true, u as u64, 2),
442                 None => (false, 0, 2),
443             },
444             Datamatch::U32(v) => match v {
445                 Some(u) => (true, u as u64, 4),
446                 None => (false, 0, 4),
447             },
448             Datamatch::U64(v) => match v {
449                 Some(u) => (true, u, 8),
450                 None => (false, 0, 8),
451             },
452         };
453         let mut flags = 0;
454         if deassign {
455             flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
456         }
457         if do_datamatch {
458             flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
459         }
460         if let IoEventAddress::Pio(_) = addr {
461             flags |= 1 << kvm_ioeventfd_flag_nr_pio;
462         }
463         let ioeventfd = kvm_ioeventfd {
464             datamatch: datamatch_value,
465             len: datamatch_len,
466             addr: match addr {
467                 IoEventAddress::Pio(p) => p,
468                 IoEventAddress::Mmio(m) => m,
469             },
470             fd: evt.as_raw_descriptor(),
471             flags,
472             ..Default::default()
473         };
474         // SAFETY:
475         // Safe because we know that our file is a VM fd, we know the kernel will only read the
476         // correct amount of memory from our pointer, and we verify the return result.
477         let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) };
478         if ret == 0 {
479             Ok(())
480         } else {
481             errno_result()
482         }
483     }
484 
485     /// Checks whether a particular KVM-specific capability is available for this VM.
check_raw_capability(&self, capability: KvmCap) -> bool486     pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
487         // SAFETY:
488         // Safe because we know that our file is a KVM fd, and if the cap is invalid KVM assumes
489         // it's an unavailable extension and returns 0.
490         let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), capability as c_ulong) };
491         match capability {
492             #[cfg(target_arch = "x86_64")]
493             KvmCap::BusLockDetect => {
494                 if ret > 0 {
495                     ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
496                 } else {
497                     false
498                 }
499             }
500             _ => ret == 1,
501         }
502     }
503 
504     // Currently only used on aarch64, but works on any architecture.
505     #[allow(dead_code)]
506     /// Enables a KVM-specific capability for this VM, with the given arguments.
507     ///
508     /// # Safety
509     /// This function is marked as unsafe because `args` may be interpreted as pointers for some
510     /// capabilities. The caller must ensure that any pointers passed in the `args` array are
511     /// allocated as the kernel expects, and that mutable pointers are owned.
enable_raw_capability( &self, capability: KvmCap, flags: u32, args: &[u64; 4], ) -> Result<()>512     unsafe fn enable_raw_capability(
513         &self,
514         capability: KvmCap,
515         flags: u32,
516         args: &[u64; 4],
517     ) -> Result<()> {
518         let kvm_cap = kvm_enable_cap {
519             cap: capability as u32,
520             args: *args,
521             flags,
522             ..Default::default()
523         };
524         // SAFETY:
525         // Safe because we allocated the struct and we know the kernel will read exactly the size of
526         // the struct, and because we assume the caller has allocated the args appropriately.
527         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), &kvm_cap);
528         if ret == 0 {
529             Ok(())
530         } else {
531             errno_result()
532         }
533     }
534 
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>535     fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
536         match self.guest_mem.remove_range(guest_address, size) {
537             Ok(_) => Ok(()),
538             Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
539             Err(_) => Err(Error::new(EIO)),
540         }
541     }
542 
handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()>543     fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
544         // No-op, when the guest attempts to access the pages again, Linux/KVM will provide them.
545         Ok(())
546     }
547 }
548 
549 impl Vm for KvmVm {
try_clone(&self) -> Result<Self>550     fn try_clone(&self) -> Result<Self> {
551         Ok(KvmVm {
552             kvm: self.kvm.try_clone()?,
553             vm: self.vm.try_clone()?,
554             guest_mem: self.guest_mem.clone(),
555             mem_regions: self.mem_regions.clone(),
556             mem_slot_gaps: self.mem_slot_gaps.clone(),
557         })
558     }
559 
check_capability(&self, c: VmCap) -> bool560     fn check_capability(&self, c: VmCap) -> bool {
561         if let Some(val) = self.check_capability_arch(c) {
562             return val;
563         }
564         match c {
565             VmCap::DirtyLog => true,
566             VmCap::PvClock => false,
567             VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
568             VmCap::EarlyInitCpuid => false,
569             #[cfg(target_arch = "x86_64")]
570             VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
571             // When pKVM is the hypervisor, read-only memslots aren't supported, even for
572             // non-protected VMs.
573             VmCap::ReadOnlyMemoryRegion => !self.is_pkvm(),
574             VmCap::MemNoncoherentDma => {
575                 cfg!(feature = "noncoherent-dma")
576                     && self.check_raw_capability(KvmCap::MemNoncoherentDma)
577             }
578         }
579     }
580 
enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool>581     fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
582         match c {
583             #[cfg(target_arch = "x86_64")]
584             VmCap::BusLockDetect => {
585                 let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
586                 Ok(
587                     // TODO(b/315998194): Add safety comment
588                     #[allow(clippy::undocumented_unsafe_blocks)]
589                     unsafe {
590                         self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
591                     },
592                 )
593             }
594             _ => Ok(false),
595         }
596     }
597 
get_guest_phys_addr_bits(&self) -> u8598     fn get_guest_phys_addr_bits(&self) -> u8 {
599         self.kvm.get_guest_phys_addr_bits()
600     }
601 
get_memory(&self) -> &GuestMemory602     fn get_memory(&self) -> &GuestMemory {
603         &self.guest_mem
604     }
605 
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, cache: MemCacheType, ) -> Result<MemSlot>606     fn add_memory_region(
607         &mut self,
608         guest_addr: GuestAddress,
609         mem: Box<dyn MappedRegion>,
610         read_only: bool,
611         log_dirty_pages: bool,
612         cache: MemCacheType,
613     ) -> Result<MemSlot> {
614         let pgsz = pagesize() as u64;
615         // KVM require to set the user memory region with page size aligned size. Safe to extend
616         // the mem.size() to be page size aligned because the mmap will round up the size to be
617         // page size aligned if it is not.
618         let size = (mem.size() as u64 + pgsz - 1) / pgsz * pgsz;
619         let end_addr = guest_addr
620             .checked_add(size)
621             .ok_or_else(|| Error::new(EOVERFLOW))?;
622         if self.guest_mem.range_overlap(guest_addr, end_addr) {
623             return Err(Error::new(ENOSPC));
624         }
625         let mut regions = self.mem_regions.lock();
626         let mut gaps = self.mem_slot_gaps.lock();
627         let slot = match gaps.pop() {
628             Some(gap) => gap.0,
629             None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
630         };
631 
632         let cache_type = if self.check_capability(VmCap::MemNoncoherentDma) {
633             cache
634         } else {
635             MemCacheType::CacheCoherent
636         };
637 
638         // SAFETY:
639         // Safe because we check that the given guest address is valid and has no overlaps. We also
640         // know that the pointer and size are correct because the MemoryMapping interface ensures
641         // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
642         // is removed.
643         let res = unsafe {
644             set_user_memory_region(
645                 &self.vm,
646                 slot,
647                 read_only,
648                 log_dirty_pages,
649                 cache_type,
650                 guest_addr.offset(),
651                 size,
652                 mem.as_ptr(),
653             )
654         };
655 
656         if let Err(e) = res {
657             gaps.push(Reverse(slot));
658             return Err(e);
659         }
660         regions.insert(slot, mem);
661         Ok(slot)
662     }
663 
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>664     fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
665         let mut regions = self.mem_regions.lock();
666         let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
667 
668         mem.msync(offset, size).map_err(|err| match err {
669             MmapError::InvalidAddress => Error::new(EFAULT),
670             MmapError::NotPageAligned => Error::new(EINVAL),
671             MmapError::SystemCallFailed(e) => e,
672             _ => Error::new(EIO),
673         })
674     }
675 
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>676     fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
677         let mut regions = self.mem_regions.lock();
678         if !regions.contains_key(&slot) {
679             return Err(Error::new(ENOENT));
680         }
681         // SAFETY:
682         // Safe because the slot is checked against the list of memory slots.
683         unsafe {
684             set_user_memory_region(
685                 &self.vm,
686                 slot,
687                 false,
688                 false,
689                 MemCacheType::CacheCoherent,
690                 0,
691                 0,
692                 std::ptr::null_mut(),
693             )?;
694         }
695         self.mem_slot_gaps.lock().push(Reverse(slot));
696         // This remove will always succeed because of the contains_key check above.
697         Ok(regions.remove(&slot).unwrap())
698     }
699 
create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>700     fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
701         let device = if let Some(dev) = self.get_device_params_arch(kind) {
702             dev
703         } else {
704             match kind {
705                 DeviceKind::Vfio => kvm_create_device {
706                     type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
707                     fd: 0,
708                     flags: 0,
709                 },
710 
711                 // ARM and risc-v have additional DeviceKinds, so it needs the catch-all pattern
712                 #[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))]
713                 _ => return Err(Error::new(libc::ENXIO)),
714             }
715         };
716 
717         // SAFETY:
718         // Safe because we know that our file is a VM fd, we know the kernel will only write correct
719         // amount of memory to our pointer, and we verify the return result.
720         let ret = unsafe { base::ioctl_with_ref(self, KVM_CREATE_DEVICE(), &device) };
721         if ret == 0 {
722             Ok(
723                 // SAFETY:
724                 // Safe because we verify that ret is valid and we own the fd.
725                 unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) },
726             )
727         } else {
728             errno_result()
729         }
730     }
731 
get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>732     fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
733         let regions = self.mem_regions.lock();
734         let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
735         // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
736         if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
737             return Err(Error::new(EINVAL));
738         }
739 
740         let mut dirty_log_kvm = kvm_dirty_log {
741             slot,
742             ..Default::default()
743         };
744         dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
745         // SAFETY:
746         // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid (because
747         // it's from a slice) and we checked that it will be large enough to hold the entire log.
748         let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirty_log_kvm) };
749         if ret == 0 {
750             Ok(())
751         } else {
752             errno_result()
753         }
754     }
755 
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>756     fn register_ioevent(
757         &mut self,
758         evt: &Event,
759         addr: IoEventAddress,
760         datamatch: Datamatch,
761     ) -> Result<()> {
762         self.ioeventfd(evt, addr, datamatch, false)
763     }
764 
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>765     fn unregister_ioevent(
766         &mut self,
767         evt: &Event,
768         addr: IoEventAddress,
769         datamatch: Datamatch,
770     ) -> Result<()> {
771         self.ioeventfd(evt, addr, datamatch, true)
772     }
773 
handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()>774     fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
775         // KVM delivers IO events in-kernel with ioeventfds, so this is a no-op
776         Ok(())
777     }
778 
get_pvclock(&self) -> Result<ClockState>779     fn get_pvclock(&self) -> Result<ClockState> {
780         self.get_pvclock_arch()
781     }
782 
set_pvclock(&self, state: &ClockState) -> Result<()>783     fn set_pvclock(&self, state: &ClockState) -> Result<()> {
784         self.set_pvclock_arch(state)
785     }
786 
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>787     fn add_fd_mapping(
788         &mut self,
789         slot: u32,
790         offset: usize,
791         size: usize,
792         fd: &dyn AsRawDescriptor,
793         fd_offset: u64,
794         prot: Protection,
795     ) -> Result<()> {
796         let mut regions = self.mem_regions.lock();
797         let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
798 
799         match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
800             Ok(()) => Ok(()),
801             Err(MmapError::SystemCallFailed(e)) => Err(e),
802             Err(_) => Err(Error::new(EIO)),
803         }
804     }
805 
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>806     fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
807         let mut regions = self.mem_regions.lock();
808         let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
809 
810         match region.remove_mapping(offset, size) {
811             Ok(()) => Ok(()),
812             Err(MmapError::SystemCallFailed(e)) => Err(e),
813             Err(_) => Err(Error::new(EIO)),
814         }
815     }
816 
handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()>817     fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
818         match event {
819             BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
820             BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
821             BalloonEvent::BalloonTargetReached(_) => Ok(()),
822         }
823     }
824 }
825 
826 impl AsRawDescriptor for KvmVm {
as_raw_descriptor(&self) -> RawDescriptor827     fn as_raw_descriptor(&self) -> RawDescriptor {
828         self.vm.as_raw_descriptor()
829     }
830 }
831 
832 struct KvmVcpuSignalHandle {
833     run_mmap: Arc<MemoryMapping>,
834 }
835 
836 impl VcpuSignalHandleInner for KvmVcpuSignalHandle {
signal_immediate_exit(&self)837     fn signal_immediate_exit(&self) {
838         // SAFETY: we ensure `run_mmap` is a valid mapping of `kvm_run` at creation time, and the
839         // `Arc` ensures the mapping still exists while we hold a reference to it.
840         unsafe {
841             let run = self.run_mmap.as_ptr() as *mut kvm_run;
842             (*run).immediate_exit = 1;
843         }
844     }
845 }
846 
847 /// A wrapper around using a KVM Vcpu.
848 pub struct KvmVcpu {
849     kvm: Kvm,
850     vm: SafeDescriptor,
851     vcpu: SafeDescriptor,
852     id: usize,
853     cap_kvmclock_ctrl: bool,
854     run_mmap: Arc<MemoryMapping>,
855 }
856 
857 impl Vcpu for KvmVcpu {
try_clone(&self) -> Result<Self>858     fn try_clone(&self) -> Result<Self> {
859         let vm = self.vm.try_clone()?;
860         let vcpu = self.vcpu.try_clone()?;
861 
862         Ok(KvmVcpu {
863             kvm: self.kvm.try_clone()?,
864             vm,
865             vcpu,
866             cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
867             id: self.id,
868             run_mmap: self.run_mmap.clone(),
869         })
870     }
871 
as_vcpu(&self) -> &dyn Vcpu872     fn as_vcpu(&self) -> &dyn Vcpu {
873         self
874     }
875 
id(&self) -> usize876     fn id(&self) -> usize {
877         self.id
878     }
879 
880     #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)881     fn set_immediate_exit(&self, exit: bool) {
882         // SAFETY:
883         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
884         // kernel told us how large it was. The pointer is page aligned so casting to a different
885         // type is well defined, hence the clippy allow attribute.
886         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
887         run.immediate_exit = exit.into();
888     }
889 
signal_handle(&self) -> VcpuSignalHandle890     fn signal_handle(&self) -> VcpuSignalHandle {
891         VcpuSignalHandle {
892             inner: Box::new(KvmVcpuSignalHandle {
893                 run_mmap: self.run_mmap.clone(),
894             }),
895         }
896     }
897 
on_suspend(&self) -> Result<()>898     fn on_suspend(&self) -> Result<()> {
899         // On KVM implementations that use a paravirtualized clock (e.g. x86), a flag must be set to
900         // indicate to the guest kernel that a vCPU was suspended. The guest kernel will use this
901         // flag to prevent the soft lockup detection from triggering when this vCPU resumes, which
902         // could happen days later in realtime.
903         if self.cap_kvmclock_ctrl {
904             // SAFETY:
905             // The ioctl is safe because it does not read or write memory in this process.
906             if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL()) } != 0 {
907                 return errno_result();
908             }
909         }
910 
911         Ok(())
912     }
913 
enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>914     unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
915         let kvm_cap = kvm_enable_cap {
916             cap,
917             args: *args,
918             ..Default::default()
919         };
920         // SAFETY:
921         // Safe because we allocated the struct and we know the kernel will read exactly the size of
922         // the struct, and because we assume the caller has allocated the args appropriately.
923         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), &kvm_cap);
924         if ret == 0 {
925             Ok(())
926         } else {
927             errno_result()
928         }
929     }
930 
931     #[allow(clippy::cast_ptr_alignment)]
932     // The pointer is page aligned so casting to a different type is well defined, hence the clippy
933     // allow attribute.
run(&mut self) -> Result<VcpuExit>934     fn run(&mut self) -> Result<VcpuExit> {
935         // SAFETY:
936         // Safe because we know that our file is a VCPU fd and we verify the return result.
937         let ret = unsafe { ioctl(self, KVM_RUN()) };
938         if ret != 0 {
939             return errno_result();
940         }
941 
942         // SAFETY:
943         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
944         // kernel told us how large it was.
945         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
946         match run.exit_reason {
947             KVM_EXIT_IO => Ok(VcpuExit::Io),
948             KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
949             KVM_EXIT_IOAPIC_EOI => {
950                 // SAFETY:
951                 // Safe because the exit_reason (which comes from the kernel) told us which
952                 // union field to use.
953                 let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
954                 Ok(VcpuExit::IoapicEoi { vector })
955             }
956             KVM_EXIT_HYPERV => Ok(VcpuExit::HypervHypercall),
957             KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
958             KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
959             KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
960             KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
961             KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
962             KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
963             KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
964             KVM_EXIT_FAIL_ENTRY => {
965                 // SAFETY:
966                 // Safe because the exit_reason (which comes from the kernel) told us which
967                 // union field to use.
968                 let hardware_entry_failure_reason = unsafe {
969                     run.__bindgen_anon_1
970                         .fail_entry
971                         .hardware_entry_failure_reason
972                 };
973                 Ok(VcpuExit::FailEntry {
974                     hardware_entry_failure_reason,
975                 })
976             }
977             KVM_EXIT_INTR => Ok(VcpuExit::Intr),
978             KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
979             KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
980             KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
981             KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
982             KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
983             KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
984             KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
985             KVM_EXIT_OSI => Ok(VcpuExit::Osi),
986             KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
987             KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
988             KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
989             KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
990             KVM_EXIT_EPR => Ok(VcpuExit::Epr),
991             KVM_EXIT_SYSTEM_EVENT => {
992                 // SAFETY:
993                 // Safe because we know the exit reason told us this union
994                 // field is valid
995                 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
996                 let event_flags =
997                     // SAFETY:
998                     // Safe because we know the exit reason told us this union
999                     // field is valid
1000                     unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1001                 match event_type {
1002                     KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1003                     KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1004                     KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1005                     _ => {
1006                         error!(
1007                             "Unknown KVM system event {} with flags {}",
1008                             event_type, event_flags
1009                         );
1010                         Err(Error::new(EINVAL))
1011                     }
1012                 }
1013             }
1014             KVM_EXIT_X86_RDMSR => {
1015                 // SAFETY:
1016                 // Safe because the exit_reason (which comes from the kernel) told us which
1017                 // union field to use.
1018                 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1019                 let index = msr.index;
1020                 // By default fail the MSR read unless it was handled later.
1021                 msr.error = 1;
1022                 Ok(VcpuExit::RdMsr { index })
1023             }
1024             KVM_EXIT_X86_WRMSR => {
1025                 // SAFETY:
1026                 // Safe because the exit_reason (which comes from the kernel) told us which
1027                 // union field to use.
1028                 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1029                 // By default fail the MSR write.
1030                 msr.error = 1;
1031                 let index = msr.index;
1032                 let data = msr.data;
1033                 Ok(VcpuExit::WrMsr { index, data })
1034             }
1035             KVM_EXIT_X86_BUS_LOCK => Ok(VcpuExit::BusLock),
1036             #[cfg(target_arch = "riscv64")]
1037             KVM_EXIT_RISCV_SBI => {
1038                 // Safe because we trust the kernel to correctly fill in the union
1039                 let extension_id = unsafe { run.__bindgen_anon_1.riscv_sbi.extension_id };
1040                 let function_id = unsafe { run.__bindgen_anon_1.riscv_sbi.function_id };
1041                 let args = unsafe { run.__bindgen_anon_1.riscv_sbi.args };
1042                 Ok(VcpuExit::Sbi {
1043                     extension_id,
1044                     function_id,
1045                     args,
1046                 })
1047             }
1048             #[cfg(target_arch = "riscv64")]
1049             KVM_EXIT_RISCV_CSR => {
1050                 // Safe because we trust the kernel to correctly fill in the union
1051                 let csr_num = unsafe { run.__bindgen_anon_1.riscv_csr.csr_num };
1052                 let new_value = unsafe { run.__bindgen_anon_1.riscv_csr.new_value };
1053                 let write_mask = unsafe { run.__bindgen_anon_1.riscv_csr.write_mask };
1054                 let ret_value = unsafe { run.__bindgen_anon_1.riscv_csr.ret_value };
1055                 Ok(VcpuExit::RiscvCsr {
1056                     csr_num,
1057                     new_value,
1058                     write_mask,
1059                     ret_value,
1060                 })
1061             }
1062             r => panic!("unknown kvm exit reason: {}", r),
1063         }
1064     }
1065 
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>1066     fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
1067         // SAFETY:
1068         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1069         // kernel told us how large it was. The pointer is page aligned so casting to a different
1070         // type is well defined, hence the clippy allow attribute.
1071         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1072         // Verify that the handler is called in the right context.
1073         assert!(run.exit_reason == KVM_EXIT_MMIO);
1074         // SAFETY:
1075         // Safe because the exit_reason (which comes from the kernel) told us which
1076         // union field to use.
1077         let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1078         let address = mmio.phys_addr;
1079         let size = min(mmio.len as usize, mmio.data.len());
1080         if mmio.is_write != 0 {
1081             handle_fn(IoParams {
1082                 address,
1083                 size,
1084                 operation: IoOperation::Write { data: mmio.data },
1085             });
1086             Ok(())
1087         } else if let Some(data) = handle_fn(IoParams {
1088             address,
1089             size,
1090             operation: IoOperation::Read,
1091         }) {
1092             mmio.data[..size].copy_from_slice(&data[..size]);
1093             Ok(())
1094         } else {
1095             Err(Error::new(EINVAL))
1096         }
1097     }
1098 
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>1099     fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
1100         // SAFETY:
1101         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1102         // kernel told us how large it was. The pointer is page aligned so casting to a different
1103         // type is well defined, hence the clippy allow attribute.
1104         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1105         // Verify that the handler is called in the right context.
1106         assert!(run.exit_reason == KVM_EXIT_IO);
1107         // SAFETY:
1108         // Safe because the exit_reason (which comes from the kernel) told us which
1109         // union field to use.
1110         let io = unsafe { run.__bindgen_anon_1.io };
1111         let size = usize::from(io.size);
1112 
1113         // SAFETY:
1114         // The data_offset is defined by the kernel to be some number of bytes into the kvm_run
1115         // structure, which we have fully mmap'd.
1116         let mut data_ptr = unsafe { (run as *mut kvm_run as *mut u8).add(io.data_offset as usize) };
1117 
1118         match io.direction as u32 {
1119             KVM_EXIT_IO_IN => {
1120                 for _ in 0..io.count {
1121                     if let Some(data) = handle_fn(IoParams {
1122                         address: io.port.into(),
1123                         size,
1124                         operation: IoOperation::Read,
1125                     }) {
1126                         // TODO(b/315998194): Add safety comment
1127                         #[allow(clippy::undocumented_unsafe_blocks)]
1128                         unsafe {
1129                             copy_nonoverlapping(data.as_ptr(), data_ptr, size);
1130                             data_ptr = data_ptr.add(size);
1131                         }
1132                     } else {
1133                         return Err(Error::new(EINVAL));
1134                     }
1135                 }
1136                 Ok(())
1137             }
1138             KVM_EXIT_IO_OUT => {
1139                 for _ in 0..io.count {
1140                     let mut data = [0; 8];
1141                     // TODO(b/315998194): Add safety comment
1142                     #[allow(clippy::undocumented_unsafe_blocks)]
1143                     unsafe {
1144                         copy_nonoverlapping(data_ptr, data.as_mut_ptr(), min(size, data.len()));
1145                         data_ptr = data_ptr.add(size);
1146                     }
1147                     handle_fn(IoParams {
1148                         address: io.port.into(),
1149                         size,
1150                         operation: IoOperation::Write { data },
1151                     });
1152                 }
1153                 Ok(())
1154             }
1155             _ => Err(Error::new(EINVAL)),
1156         }
1157     }
1158 
handle_hyperv_hypercall( &self, handle_fn: &mut dyn FnMut(HypervHypercall) -> u64, ) -> Result<()>1159     fn handle_hyperv_hypercall(
1160         &self,
1161         handle_fn: &mut dyn FnMut(HypervHypercall) -> u64,
1162     ) -> Result<()> {
1163         // SAFETY:
1164         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1165         // kernel told us how large it was.
1166         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1167         // Verify that the handler is called in the right context.
1168         assert!(run.exit_reason == KVM_EXIT_HYPERV);
1169         // SAFETY:
1170         // Safe because the exit_reason (which comes from the kernel) told us which
1171         // union field to use.
1172         let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1173         match hyperv.type_ {
1174             KVM_EXIT_HYPERV_SYNIC => {
1175                 // TODO(b/315998194): Add safety comment
1176                 #[allow(clippy::undocumented_unsafe_blocks)]
1177                 let synic = unsafe { &hyperv.u.synic };
1178                 handle_fn(HypervHypercall::HypervSynic {
1179                     msr: synic.msr,
1180                     control: synic.control,
1181                     evt_page: synic.evt_page,
1182                     msg_page: synic.msg_page,
1183                 });
1184                 Ok(())
1185             }
1186             KVM_EXIT_HYPERV_HCALL => {
1187                 // TODO(b/315998194): Add safety comment
1188                 #[allow(clippy::undocumented_unsafe_blocks)]
1189                 let hcall = unsafe { &mut hyperv.u.hcall };
1190                 hcall.result = handle_fn(HypervHypercall::HypervHcall {
1191                     input: hcall.input,
1192                     params: hcall.params,
1193                 });
1194                 Ok(())
1195             }
1196             _ => Err(Error::new(EINVAL)),
1197         }
1198     }
1199 
handle_rdmsr(&self, data: u64) -> Result<()>1200     fn handle_rdmsr(&self, data: u64) -> Result<()> {
1201         // SAFETY:
1202         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1203         // kernel told us how large it was.
1204         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1205         // Verify that the handler is called in the right context.
1206         assert!(run.exit_reason == KVM_EXIT_X86_RDMSR);
1207         // SAFETY:
1208         // Safe because the exit_reason (which comes from the kernel) told us which
1209         // union field to use.
1210         let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1211         msr.data = data;
1212         msr.error = 0;
1213         Ok(())
1214     }
1215 
handle_wrmsr(&self)1216     fn handle_wrmsr(&self) {
1217         // SAFETY:
1218         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1219         // kernel told us how large it was.
1220         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1221         // Verify that the handler is called in the right context.
1222         assert!(run.exit_reason == KVM_EXIT_X86_WRMSR);
1223         // SAFETY:
1224         // Safe because the exit_reason (which comes from the kernel) told us which
1225         // union field to use.
1226         let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1227         msr.error = 0;
1228     }
1229 }
1230 
1231 impl KvmVcpu {
1232     /// Gets the vcpu's current "multiprocessing state".
1233     ///
1234     /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1235     /// a call to `Vm::create_irq_chip`.
1236     ///
1237     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1238     /// to run crosvm on s390.
get_mp_state(&self) -> Result<kvm_mp_state>1239     pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1240         // SAFETY: trivially safe
1241         let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1242         let ret = {
1243             // SAFETY:
1244             // Safe because we know that our file is a VCPU fd, we know the kernel will only write
1245             // the correct amount of memory to our pointer, and we verify the return
1246             // result.
1247             unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut state) }
1248         };
1249         if ret < 0 {
1250             return errno_result();
1251         }
1252         Ok(state)
1253     }
1254 
1255     /// Sets the vcpu's current "multiprocessing state".
1256     ///
1257     /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1258     /// a call to `Vm::create_irq_chip`.
1259     ///
1260     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1261     /// to run crosvm on s390.
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1262     pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1263         let ret = {
1264             // SAFETY:
1265             // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1266             unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), state) }
1267         };
1268         if ret < 0 {
1269             return errno_result();
1270         }
1271         Ok(())
1272     }
1273 }
1274 
1275 impl AsRawDescriptor for KvmVcpu {
as_raw_descriptor(&self) -> RawDescriptor1276     fn as_raw_descriptor(&self) -> RawDescriptor {
1277         self.vcpu.as_raw_descriptor()
1278     }
1279 }
1280 
1281 impl TryFrom<HypervisorCap> for KvmCap {
1282     type Error = Error;
1283 
try_from(cap: HypervisorCap) -> Result<KvmCap>1284     fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1285         match cap {
1286             HypervisorCap::ArmPmuV3 => Ok(KvmCap::ArmPmuV3),
1287             HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1288             HypervisorCap::S390UserSigp => Ok(KvmCap::S390UserSigp),
1289             HypervisorCap::TscDeadlineTimer => Ok(KvmCap::TscDeadlineTimer),
1290             HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1291             #[cfg(target_arch = "x86_64")]
1292             HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1293             #[cfg(target_arch = "x86_64")]
1294             HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1295             HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1296             HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1297         }
1298     }
1299 }
1300 
1301 impl From<&IrqRoute> for kvm_irq_routing_entry {
from(item: &IrqRoute) -> Self1302     fn from(item: &IrqRoute) -> Self {
1303         match &item.source {
1304             IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1305                 gsi: item.gsi,
1306                 type_: KVM_IRQ_ROUTING_IRQCHIP,
1307                 u: kvm_irq_routing_entry__bindgen_ty_1 {
1308                     irqchip: kvm_irq_routing_irqchip {
1309                         irqchip: chip_to_kvm_chip(*chip),
1310                         pin: *pin,
1311                     },
1312                 },
1313                 ..Default::default()
1314             },
1315             IrqSource::Msi { address, data } => kvm_irq_routing_entry {
1316                 gsi: item.gsi,
1317                 type_: KVM_IRQ_ROUTING_MSI,
1318                 u: kvm_irq_routing_entry__bindgen_ty_1 {
1319                     msi: kvm_irq_routing_msi {
1320                         address_lo: *address as u32,
1321                         address_hi: (*address >> 32) as u32,
1322                         data: *data,
1323                         ..Default::default()
1324                     },
1325                 },
1326                 ..Default::default()
1327             },
1328         }
1329     }
1330 }
1331 
1332 impl From<&kvm_mp_state> for MPState {
from(item: &kvm_mp_state) -> Self1333     fn from(item: &kvm_mp_state) -> Self {
1334         match item.mp_state {
1335             KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1336             KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1337             KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1338             KVM_MP_STATE_HALTED => MPState::Halted,
1339             KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1340             KVM_MP_STATE_STOPPED => MPState::Stopped,
1341             state => {
1342                 error!(
1343                     "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1344                     state
1345                 );
1346                 MPState::Runnable
1347             }
1348         }
1349     }
1350 }
1351 
1352 impl From<&MPState> for kvm_mp_state {
from(item: &MPState) -> Self1353     fn from(item: &MPState) -> Self {
1354         kvm_mp_state {
1355             mp_state: match item {
1356                 MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1357                 MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1358                 MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1359                 MPState::Halted => KVM_MP_STATE_HALTED,
1360                 MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1361                 MPState::Stopped => KVM_MP_STATE_STOPPED,
1362             },
1363         }
1364     }
1365 }
1366