1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
6 mod aarch64;
7 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8 pub use aarch64::*;
9
10 #[cfg(target_arch = "riscv64")]
11 mod riscv64;
12
13 #[cfg(target_arch = "x86_64")]
14 mod x86_64;
15
16 use std::cmp::min;
17 use std::cmp::Reverse;
18 use std::collections::BTreeMap;
19 use std::collections::BinaryHeap;
20 use std::convert::TryFrom;
21 use std::ffi::CString;
22 use std::os::raw::c_ulong;
23 use std::os::raw::c_void;
24 use std::os::unix::prelude::OsStrExt;
25 use std::path::Path;
26 use std::path::PathBuf;
27 use std::ptr::copy_nonoverlapping;
28 use std::sync::Arc;
29
30 use base::errno_result;
31 use base::error;
32 use base::ioctl;
33 use base::ioctl_with_mut_ref;
34 use base::ioctl_with_ref;
35 use base::ioctl_with_val;
36 use base::linux::MemoryMappingBuilderUnix;
37 use base::pagesize;
38 use base::AsRawDescriptor;
39 use base::Error;
40 use base::Event;
41 use base::FromRawDescriptor;
42 use base::MappedRegion;
43 use base::MemoryMapping;
44 use base::MemoryMappingBuilder;
45 use base::MmapError;
46 use base::Protection;
47 use base::RawDescriptor;
48 use base::Result;
49 use base::SafeDescriptor;
50 use data_model::vec_with_array_field;
51 use kvm_sys::*;
52 use libc::open64;
53 use libc::EFAULT;
54 use libc::EINVAL;
55 use libc::EIO;
56 use libc::ENOENT;
57 use libc::ENOSPC;
58 use libc::ENOSYS;
59 use libc::EOVERFLOW;
60 use libc::O_CLOEXEC;
61 use libc::O_RDWR;
62 #[cfg(target_arch = "riscv64")]
63 use riscv64::*;
64 use sync::Mutex;
65 use vm_memory::GuestAddress;
66 use vm_memory::GuestMemory;
67 #[cfg(target_arch = "x86_64")]
68 pub use x86_64::*;
69
70 use crate::BalloonEvent;
71 use crate::ClockState;
72 use crate::Config;
73 use crate::Datamatch;
74 use crate::DeviceKind;
75 use crate::HypervHypercall;
76 use crate::Hypervisor;
77 use crate::HypervisorCap;
78 use crate::IoEventAddress;
79 use crate::IoOperation;
80 use crate::IoParams;
81 use crate::IrqRoute;
82 use crate::IrqSource;
83 use crate::MPState;
84 use crate::MemCacheType;
85 use crate::MemSlot;
86 use crate::Vcpu;
87 use crate::VcpuExit;
88 use crate::VcpuSignalHandle;
89 use crate::VcpuSignalHandleInner;
90 use crate::Vm;
91 use crate::VmCap;
92
93 // Wrapper around KVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping
94 // from guest physical to host user pages.
95 //
96 // SAFETY:
97 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( descriptor: &SafeDescriptor, slot: MemSlot, read_only: bool, log_dirty_pages: bool, cache: MemCacheType, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>98 unsafe fn set_user_memory_region(
99 descriptor: &SafeDescriptor,
100 slot: MemSlot,
101 read_only: bool,
102 log_dirty_pages: bool,
103 cache: MemCacheType,
104 guest_addr: u64,
105 memory_size: u64,
106 userspace_addr: *mut u8,
107 ) -> Result<()> {
108 let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
109 if log_dirty_pages {
110 flags |= KVM_MEM_LOG_DIRTY_PAGES;
111 }
112 if cache == MemCacheType::CacheNonCoherent {
113 flags |= KVM_MEM_NON_COHERENT_DMA;
114 }
115 let region = kvm_userspace_memory_region {
116 slot,
117 flags,
118 guest_phys_addr: guest_addr,
119 memory_size,
120 userspace_addr: userspace_addr as u64,
121 };
122
123 let ret = ioctl_with_ref(descriptor, KVM_SET_USER_MEMORY_REGION(), ®ion);
124 if ret == 0 {
125 Ok(())
126 } else {
127 errno_result()
128 }
129 }
130
131 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
132 /// size.
133 ///
134 /// # Arguments
135 ///
136 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize137 pub fn dirty_log_bitmap_size(size: usize) -> usize {
138 let page_size = pagesize();
139 (((size + page_size - 1) / page_size) + 7) / 8
140 }
141
142 pub struct Kvm {
143 kvm: SafeDescriptor,
144 }
145
146 pub type KvmCap = kvm::Cap;
147
148 impl Kvm {
new_with_path(device_path: &Path) -> Result<Kvm>149 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
150 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
151 // SAFETY:
152 // Open calls are safe because we give a nul-terminated string and verify the result.
153 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
154 if ret < 0 {
155 return errno_result();
156 }
157 // SAFETY:
158 // Safe because we verify that ret is valid and we own the fd.
159 let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
160
161 // SAFETY:
162 // Safe because we know that the descriptor is valid and we verify the return result.
163 let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION()) };
164 if version < 0 {
165 return errno_result();
166 }
167
168 // Per the kernel KVM API documentation: "Applications should refuse to run if
169 // KVM_GET_API_VERSION returns a value other than 12."
170 if version as u32 != KVM_API_VERSION {
171 error!(
172 "KVM_GET_API_VERSION: expected {}, got {}",
173 KVM_API_VERSION, version,
174 );
175 return Err(Error::new(ENOSYS));
176 }
177
178 Ok(Kvm { kvm })
179 }
180
181 /// Opens `/dev/kvm/` and returns a Kvm object on success.
new() -> Result<Kvm>182 pub fn new() -> Result<Kvm> {
183 Kvm::new_with_path(&PathBuf::from("/dev/kvm"))
184 }
185
186 /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>187 pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
188 // SAFETY:
189 // Safe because we know that our file is a KVM fd and we verify the return result.
190 let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) };
191 if res > 0 {
192 Ok(res as usize)
193 } else {
194 errno_result()
195 }
196 }
197 }
198
199 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor200 fn as_raw_descriptor(&self) -> RawDescriptor {
201 self.kvm.as_raw_descriptor()
202 }
203 }
204
205 impl Hypervisor for Kvm {
try_clone(&self) -> Result<Self>206 fn try_clone(&self) -> Result<Self> {
207 Ok(Kvm {
208 kvm: self.kvm.try_clone()?,
209 })
210 }
211
check_capability(&self, cap: HypervisorCap) -> bool212 fn check_capability(&self, cap: HypervisorCap) -> bool {
213 if let Ok(kvm_cap) = KvmCap::try_from(cap) {
214 // SAFETY:
215 // this ioctl is safe because we know this kvm descriptor is valid,
216 // and we are copying over the kvm capability (u32) as a c_ulong value.
217 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), kvm_cap as c_ulong) == 1 }
218 } else {
219 // this capability cannot be converted on this platform, so return false
220 false
221 }
222 }
223 }
224
225 /// A wrapper around creating and using a KVM VM.
226 pub struct KvmVm {
227 kvm: Kvm,
228 vm: SafeDescriptor,
229 guest_mem: GuestMemory,
230 mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
231 /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
232 mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
233 }
234
235 impl KvmVm {
236 /// Constructs a new `KvmVm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm>237 pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
238 // SAFETY:
239 // Safe because we know kvm is a real kvm fd as this module is the only one that can make
240 // Kvm objects.
241 let ret = unsafe {
242 ioctl_with_val(
243 kvm,
244 KVM_CREATE_VM(),
245 kvm.get_vm_type(cfg.protection_type)? as c_ulong,
246 )
247 };
248 if ret < 0 {
249 return errno_result();
250 }
251 // SAFETY:
252 // Safe because we verify that ret is valid and we own the fd.
253 let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
254 for region in guest_mem.regions() {
255 // SAFETY:
256 // Safe because the guest regions are guaranteed not to overlap.
257 unsafe {
258 set_user_memory_region(
259 &vm_descriptor,
260 region.index as MemSlot,
261 false,
262 false,
263 MemCacheType::CacheCoherent,
264 region.guest_addr.offset(),
265 region.size as u64,
266 region.host_addr as *mut u8,
267 )
268 }?;
269 }
270
271 let vm = KvmVm {
272 kvm: kvm.try_clone()?,
273 vm: vm_descriptor,
274 guest_mem,
275 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
276 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
277 };
278 vm.init_arch(&cfg)?;
279 Ok(vm)
280 }
281
create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu>282 pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
283 let run_mmap_size = self.kvm.get_vcpu_mmap_size()?;
284
285 // SAFETY:
286 // Safe because we know that our file is a VM fd and we verify the return result.
287 let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU(), c_ulong::try_from(id).unwrap()) };
288 if fd < 0 {
289 return errno_result();
290 }
291
292 // SAFETY:
293 // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
294 // the value of the fd and we own the fd.
295 let vcpu = unsafe { SafeDescriptor::from_raw_descriptor(fd) };
296
297 // The VCPU mapping is held by an `Arc` inside `KvmVcpu`, and it can also be cloned by
298 // `signal_handle()` for use in `KvmVcpuSignalHandle`. The mapping will not be destroyed
299 // until all references are dropped, so it is safe to reference `kvm_run` fields via the
300 // `as_ptr()` function during either type's lifetime.
301 let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
302 .from_descriptor(&vcpu)
303 .build()
304 .map_err(|_| Error::new(ENOSPC))?;
305
306 let cap_kvmclock_ctrl = self.check_raw_capability(KvmCap::KvmclockCtrl);
307
308 Ok(KvmVcpu {
309 kvm: self.kvm.try_clone()?,
310 vm: self.vm.try_clone()?,
311 vcpu,
312 id,
313 cap_kvmclock_ctrl,
314 run_mmap: Arc::new(run_mmap),
315 })
316 }
317
318 /// Creates an in kernel interrupt controller.
319 ///
320 /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
create_irq_chip(&self) -> Result<()>321 pub fn create_irq_chip(&self) -> Result<()> {
322 // SAFETY:
323 // Safe because we know that our file is a VM fd and we verify the return result.
324 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) };
325 if ret == 0 {
326 Ok(())
327 } else {
328 errno_result()
329 }
330 }
331
332 /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
set_irq_line(&self, irq: u32, active: bool) -> Result<()>333 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
334 let mut irq_level = kvm_irq_level::default();
335 irq_level.__bindgen_anon_1.irq = irq;
336 irq_level.level = active.into();
337
338 // SAFETY:
339 // Safe because we know that our file is a VM fd, we know the kernel will only read the
340 // correct amount of memory from our pointer, and we verify the return result.
341 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) };
342 if ret == 0 {
343 Ok(())
344 } else {
345 errno_result()
346 }
347 }
348
349 /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt`
350 /// ( when not None ) will be triggered when the irqchip is resampled.
register_irqfd( &self, gsi: u32, evt: &Event, resample_evt: Option<&Event>, ) -> Result<()>351 pub fn register_irqfd(
352 &self,
353 gsi: u32,
354 evt: &Event,
355 resample_evt: Option<&Event>,
356 ) -> Result<()> {
357 let mut irqfd = kvm_irqfd {
358 fd: evt.as_raw_descriptor() as u32,
359 gsi,
360 ..Default::default()
361 };
362
363 if let Some(r_evt) = resample_evt {
364 irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
365 irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
366 }
367
368 // SAFETY:
369 // Safe because we know that our file is a VM fd, we know the kernel will only read the
370 // correct amount of memory from our pointer, and we verify the return result.
371 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
372 if ret == 0 {
373 Ok(())
374 } else {
375 errno_result()
376 }
377 }
378
379 /// Unregisters an event that was previously registered with
380 /// `register_irqfd`.
381 ///
382 /// The `evt` and `gsi` pair must be the same as the ones passed into
383 /// `register_irqfd`.
unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()>384 pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
385 let irqfd = kvm_irqfd {
386 fd: evt.as_raw_descriptor() as u32,
387 gsi,
388 flags: KVM_IRQFD_FLAG_DEASSIGN,
389 ..Default::default()
390 };
391 // SAFETY:
392 // Safe because we know that our file is a VM fd, we know the kernel will only read the
393 // correct amount of memory from our pointer, and we verify the return result.
394 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
395 if ret == 0 {
396 Ok(())
397 } else {
398 errno_result()
399 }
400 }
401
402 /// Sets the GSI routing table, replacing any table set with previous calls to
403 /// `set_gsi_routing`.
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>404 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
405 let mut irq_routing =
406 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
407 irq_routing[0].nr = routes.len() as u32;
408
409 // SAFETY:
410 // Safe because we ensured there is enough space in irq_routing to hold the number of
411 // route entries.
412 let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
413 for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
414 *irq_route = kvm_irq_routing_entry::from(route);
415 }
416
417 // TODO(b/315998194): Add safety comment
418 #[allow(clippy::undocumented_unsafe_blocks)]
419 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), &irq_routing[0]) };
420 if ret == 0 {
421 Ok(())
422 } else {
423 errno_result()
424 }
425 }
426
ioeventfd( &self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>427 fn ioeventfd(
428 &self,
429 evt: &Event,
430 addr: IoEventAddress,
431 datamatch: Datamatch,
432 deassign: bool,
433 ) -> Result<()> {
434 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
435 Datamatch::AnyLength => (false, 0, 0),
436 Datamatch::U8(v) => match v {
437 Some(u) => (true, u as u64, 1),
438 None => (false, 0, 1),
439 },
440 Datamatch::U16(v) => match v {
441 Some(u) => (true, u as u64, 2),
442 None => (false, 0, 2),
443 },
444 Datamatch::U32(v) => match v {
445 Some(u) => (true, u as u64, 4),
446 None => (false, 0, 4),
447 },
448 Datamatch::U64(v) => match v {
449 Some(u) => (true, u, 8),
450 None => (false, 0, 8),
451 },
452 };
453 let mut flags = 0;
454 if deassign {
455 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
456 }
457 if do_datamatch {
458 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
459 }
460 if let IoEventAddress::Pio(_) = addr {
461 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
462 }
463 let ioeventfd = kvm_ioeventfd {
464 datamatch: datamatch_value,
465 len: datamatch_len,
466 addr: match addr {
467 IoEventAddress::Pio(p) => p,
468 IoEventAddress::Mmio(m) => m,
469 },
470 fd: evt.as_raw_descriptor(),
471 flags,
472 ..Default::default()
473 };
474 // SAFETY:
475 // Safe because we know that our file is a VM fd, we know the kernel will only read the
476 // correct amount of memory from our pointer, and we verify the return result.
477 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) };
478 if ret == 0 {
479 Ok(())
480 } else {
481 errno_result()
482 }
483 }
484
485 /// Checks whether a particular KVM-specific capability is available for this VM.
check_raw_capability(&self, capability: KvmCap) -> bool486 pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
487 // SAFETY:
488 // Safe because we know that our file is a KVM fd, and if the cap is invalid KVM assumes
489 // it's an unavailable extension and returns 0.
490 let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), capability as c_ulong) };
491 match capability {
492 #[cfg(target_arch = "x86_64")]
493 KvmCap::BusLockDetect => {
494 if ret > 0 {
495 ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
496 } else {
497 false
498 }
499 }
500 _ => ret == 1,
501 }
502 }
503
504 // Currently only used on aarch64, but works on any architecture.
505 #[allow(dead_code)]
506 /// Enables a KVM-specific capability for this VM, with the given arguments.
507 ///
508 /// # Safety
509 /// This function is marked as unsafe because `args` may be interpreted as pointers for some
510 /// capabilities. The caller must ensure that any pointers passed in the `args` array are
511 /// allocated as the kernel expects, and that mutable pointers are owned.
enable_raw_capability( &self, capability: KvmCap, flags: u32, args: &[u64; 4], ) -> Result<()>512 unsafe fn enable_raw_capability(
513 &self,
514 capability: KvmCap,
515 flags: u32,
516 args: &[u64; 4],
517 ) -> Result<()> {
518 let kvm_cap = kvm_enable_cap {
519 cap: capability as u32,
520 args: *args,
521 flags,
522 ..Default::default()
523 };
524 // SAFETY:
525 // Safe because we allocated the struct and we know the kernel will read exactly the size of
526 // the struct, and because we assume the caller has allocated the args appropriately.
527 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), &kvm_cap);
528 if ret == 0 {
529 Ok(())
530 } else {
531 errno_result()
532 }
533 }
534
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>535 fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
536 match self.guest_mem.remove_range(guest_address, size) {
537 Ok(_) => Ok(()),
538 Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
539 Err(_) => Err(Error::new(EIO)),
540 }
541 }
542
handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()>543 fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
544 // No-op, when the guest attempts to access the pages again, Linux/KVM will provide them.
545 Ok(())
546 }
547 }
548
549 impl Vm for KvmVm {
try_clone(&self) -> Result<Self>550 fn try_clone(&self) -> Result<Self> {
551 Ok(KvmVm {
552 kvm: self.kvm.try_clone()?,
553 vm: self.vm.try_clone()?,
554 guest_mem: self.guest_mem.clone(),
555 mem_regions: self.mem_regions.clone(),
556 mem_slot_gaps: self.mem_slot_gaps.clone(),
557 })
558 }
559
check_capability(&self, c: VmCap) -> bool560 fn check_capability(&self, c: VmCap) -> bool {
561 if let Some(val) = self.check_capability_arch(c) {
562 return val;
563 }
564 match c {
565 VmCap::DirtyLog => true,
566 VmCap::PvClock => false,
567 VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
568 VmCap::EarlyInitCpuid => false,
569 #[cfg(target_arch = "x86_64")]
570 VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
571 // When pKVM is the hypervisor, read-only memslots aren't supported, even for
572 // non-protected VMs.
573 VmCap::ReadOnlyMemoryRegion => !self.is_pkvm(),
574 VmCap::MemNoncoherentDma => {
575 cfg!(feature = "noncoherent-dma")
576 && self.check_raw_capability(KvmCap::MemNoncoherentDma)
577 }
578 }
579 }
580
enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool>581 fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
582 match c {
583 #[cfg(target_arch = "x86_64")]
584 VmCap::BusLockDetect => {
585 let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
586 Ok(
587 // TODO(b/315998194): Add safety comment
588 #[allow(clippy::undocumented_unsafe_blocks)]
589 unsafe {
590 self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
591 },
592 )
593 }
594 _ => Ok(false),
595 }
596 }
597
get_guest_phys_addr_bits(&self) -> u8598 fn get_guest_phys_addr_bits(&self) -> u8 {
599 self.kvm.get_guest_phys_addr_bits()
600 }
601
get_memory(&self) -> &GuestMemory602 fn get_memory(&self) -> &GuestMemory {
603 &self.guest_mem
604 }
605
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, cache: MemCacheType, ) -> Result<MemSlot>606 fn add_memory_region(
607 &mut self,
608 guest_addr: GuestAddress,
609 mem: Box<dyn MappedRegion>,
610 read_only: bool,
611 log_dirty_pages: bool,
612 cache: MemCacheType,
613 ) -> Result<MemSlot> {
614 let pgsz = pagesize() as u64;
615 // KVM require to set the user memory region with page size aligned size. Safe to extend
616 // the mem.size() to be page size aligned because the mmap will round up the size to be
617 // page size aligned if it is not.
618 let size = (mem.size() as u64 + pgsz - 1) / pgsz * pgsz;
619 let end_addr = guest_addr
620 .checked_add(size)
621 .ok_or_else(|| Error::new(EOVERFLOW))?;
622 if self.guest_mem.range_overlap(guest_addr, end_addr) {
623 return Err(Error::new(ENOSPC));
624 }
625 let mut regions = self.mem_regions.lock();
626 let mut gaps = self.mem_slot_gaps.lock();
627 let slot = match gaps.pop() {
628 Some(gap) => gap.0,
629 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
630 };
631
632 let cache_type = if self.check_capability(VmCap::MemNoncoherentDma) {
633 cache
634 } else {
635 MemCacheType::CacheCoherent
636 };
637
638 // SAFETY:
639 // Safe because we check that the given guest address is valid and has no overlaps. We also
640 // know that the pointer and size are correct because the MemoryMapping interface ensures
641 // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
642 // is removed.
643 let res = unsafe {
644 set_user_memory_region(
645 &self.vm,
646 slot,
647 read_only,
648 log_dirty_pages,
649 cache_type,
650 guest_addr.offset(),
651 size,
652 mem.as_ptr(),
653 )
654 };
655
656 if let Err(e) = res {
657 gaps.push(Reverse(slot));
658 return Err(e);
659 }
660 regions.insert(slot, mem);
661 Ok(slot)
662 }
663
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>664 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
665 let mut regions = self.mem_regions.lock();
666 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
667
668 mem.msync(offset, size).map_err(|err| match err {
669 MmapError::InvalidAddress => Error::new(EFAULT),
670 MmapError::NotPageAligned => Error::new(EINVAL),
671 MmapError::SystemCallFailed(e) => e,
672 _ => Error::new(EIO),
673 })
674 }
675
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>676 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
677 let mut regions = self.mem_regions.lock();
678 if !regions.contains_key(&slot) {
679 return Err(Error::new(ENOENT));
680 }
681 // SAFETY:
682 // Safe because the slot is checked against the list of memory slots.
683 unsafe {
684 set_user_memory_region(
685 &self.vm,
686 slot,
687 false,
688 false,
689 MemCacheType::CacheCoherent,
690 0,
691 0,
692 std::ptr::null_mut(),
693 )?;
694 }
695 self.mem_slot_gaps.lock().push(Reverse(slot));
696 // This remove will always succeed because of the contains_key check above.
697 Ok(regions.remove(&slot).unwrap())
698 }
699
create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>700 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
701 let device = if let Some(dev) = self.get_device_params_arch(kind) {
702 dev
703 } else {
704 match kind {
705 DeviceKind::Vfio => kvm_create_device {
706 type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
707 fd: 0,
708 flags: 0,
709 },
710
711 // ARM and risc-v have additional DeviceKinds, so it needs the catch-all pattern
712 #[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))]
713 _ => return Err(Error::new(libc::ENXIO)),
714 }
715 };
716
717 // SAFETY:
718 // Safe because we know that our file is a VM fd, we know the kernel will only write correct
719 // amount of memory to our pointer, and we verify the return result.
720 let ret = unsafe { base::ioctl_with_ref(self, KVM_CREATE_DEVICE(), &device) };
721 if ret == 0 {
722 Ok(
723 // SAFETY:
724 // Safe because we verify that ret is valid and we own the fd.
725 unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) },
726 )
727 } else {
728 errno_result()
729 }
730 }
731
get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>732 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
733 let regions = self.mem_regions.lock();
734 let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
735 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
736 if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
737 return Err(Error::new(EINVAL));
738 }
739
740 let mut dirty_log_kvm = kvm_dirty_log {
741 slot,
742 ..Default::default()
743 };
744 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
745 // SAFETY:
746 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid (because
747 // it's from a slice) and we checked that it will be large enough to hold the entire log.
748 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirty_log_kvm) };
749 if ret == 0 {
750 Ok(())
751 } else {
752 errno_result()
753 }
754 }
755
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>756 fn register_ioevent(
757 &mut self,
758 evt: &Event,
759 addr: IoEventAddress,
760 datamatch: Datamatch,
761 ) -> Result<()> {
762 self.ioeventfd(evt, addr, datamatch, false)
763 }
764
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>765 fn unregister_ioevent(
766 &mut self,
767 evt: &Event,
768 addr: IoEventAddress,
769 datamatch: Datamatch,
770 ) -> Result<()> {
771 self.ioeventfd(evt, addr, datamatch, true)
772 }
773
handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()>774 fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
775 // KVM delivers IO events in-kernel with ioeventfds, so this is a no-op
776 Ok(())
777 }
778
get_pvclock(&self) -> Result<ClockState>779 fn get_pvclock(&self) -> Result<ClockState> {
780 self.get_pvclock_arch()
781 }
782
set_pvclock(&self, state: &ClockState) -> Result<()>783 fn set_pvclock(&self, state: &ClockState) -> Result<()> {
784 self.set_pvclock_arch(state)
785 }
786
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>787 fn add_fd_mapping(
788 &mut self,
789 slot: u32,
790 offset: usize,
791 size: usize,
792 fd: &dyn AsRawDescriptor,
793 fd_offset: u64,
794 prot: Protection,
795 ) -> Result<()> {
796 let mut regions = self.mem_regions.lock();
797 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
798
799 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
800 Ok(()) => Ok(()),
801 Err(MmapError::SystemCallFailed(e)) => Err(e),
802 Err(_) => Err(Error::new(EIO)),
803 }
804 }
805
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>806 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
807 let mut regions = self.mem_regions.lock();
808 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
809
810 match region.remove_mapping(offset, size) {
811 Ok(()) => Ok(()),
812 Err(MmapError::SystemCallFailed(e)) => Err(e),
813 Err(_) => Err(Error::new(EIO)),
814 }
815 }
816
handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()>817 fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
818 match event {
819 BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
820 BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
821 BalloonEvent::BalloonTargetReached(_) => Ok(()),
822 }
823 }
824 }
825
826 impl AsRawDescriptor for KvmVm {
as_raw_descriptor(&self) -> RawDescriptor827 fn as_raw_descriptor(&self) -> RawDescriptor {
828 self.vm.as_raw_descriptor()
829 }
830 }
831
832 struct KvmVcpuSignalHandle {
833 run_mmap: Arc<MemoryMapping>,
834 }
835
836 impl VcpuSignalHandleInner for KvmVcpuSignalHandle {
signal_immediate_exit(&self)837 fn signal_immediate_exit(&self) {
838 // SAFETY: we ensure `run_mmap` is a valid mapping of `kvm_run` at creation time, and the
839 // `Arc` ensures the mapping still exists while we hold a reference to it.
840 unsafe {
841 let run = self.run_mmap.as_ptr() as *mut kvm_run;
842 (*run).immediate_exit = 1;
843 }
844 }
845 }
846
847 /// A wrapper around using a KVM Vcpu.
848 pub struct KvmVcpu {
849 kvm: Kvm,
850 vm: SafeDescriptor,
851 vcpu: SafeDescriptor,
852 id: usize,
853 cap_kvmclock_ctrl: bool,
854 run_mmap: Arc<MemoryMapping>,
855 }
856
857 impl Vcpu for KvmVcpu {
try_clone(&self) -> Result<Self>858 fn try_clone(&self) -> Result<Self> {
859 let vm = self.vm.try_clone()?;
860 let vcpu = self.vcpu.try_clone()?;
861
862 Ok(KvmVcpu {
863 kvm: self.kvm.try_clone()?,
864 vm,
865 vcpu,
866 cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
867 id: self.id,
868 run_mmap: self.run_mmap.clone(),
869 })
870 }
871
as_vcpu(&self) -> &dyn Vcpu872 fn as_vcpu(&self) -> &dyn Vcpu {
873 self
874 }
875
id(&self) -> usize876 fn id(&self) -> usize {
877 self.id
878 }
879
880 #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)881 fn set_immediate_exit(&self, exit: bool) {
882 // SAFETY:
883 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
884 // kernel told us how large it was. The pointer is page aligned so casting to a different
885 // type is well defined, hence the clippy allow attribute.
886 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
887 run.immediate_exit = exit.into();
888 }
889
signal_handle(&self) -> VcpuSignalHandle890 fn signal_handle(&self) -> VcpuSignalHandle {
891 VcpuSignalHandle {
892 inner: Box::new(KvmVcpuSignalHandle {
893 run_mmap: self.run_mmap.clone(),
894 }),
895 }
896 }
897
on_suspend(&self) -> Result<()>898 fn on_suspend(&self) -> Result<()> {
899 // On KVM implementations that use a paravirtualized clock (e.g. x86), a flag must be set to
900 // indicate to the guest kernel that a vCPU was suspended. The guest kernel will use this
901 // flag to prevent the soft lockup detection from triggering when this vCPU resumes, which
902 // could happen days later in realtime.
903 if self.cap_kvmclock_ctrl {
904 // SAFETY:
905 // The ioctl is safe because it does not read or write memory in this process.
906 if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL()) } != 0 {
907 return errno_result();
908 }
909 }
910
911 Ok(())
912 }
913
enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>914 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
915 let kvm_cap = kvm_enable_cap {
916 cap,
917 args: *args,
918 ..Default::default()
919 };
920 // SAFETY:
921 // Safe because we allocated the struct and we know the kernel will read exactly the size of
922 // the struct, and because we assume the caller has allocated the args appropriately.
923 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), &kvm_cap);
924 if ret == 0 {
925 Ok(())
926 } else {
927 errno_result()
928 }
929 }
930
931 #[allow(clippy::cast_ptr_alignment)]
932 // The pointer is page aligned so casting to a different type is well defined, hence the clippy
933 // allow attribute.
run(&mut self) -> Result<VcpuExit>934 fn run(&mut self) -> Result<VcpuExit> {
935 // SAFETY:
936 // Safe because we know that our file is a VCPU fd and we verify the return result.
937 let ret = unsafe { ioctl(self, KVM_RUN()) };
938 if ret != 0 {
939 return errno_result();
940 }
941
942 // SAFETY:
943 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
944 // kernel told us how large it was.
945 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
946 match run.exit_reason {
947 KVM_EXIT_IO => Ok(VcpuExit::Io),
948 KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
949 KVM_EXIT_IOAPIC_EOI => {
950 // SAFETY:
951 // Safe because the exit_reason (which comes from the kernel) told us which
952 // union field to use.
953 let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
954 Ok(VcpuExit::IoapicEoi { vector })
955 }
956 KVM_EXIT_HYPERV => Ok(VcpuExit::HypervHypercall),
957 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
958 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
959 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
960 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
961 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
962 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
963 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
964 KVM_EXIT_FAIL_ENTRY => {
965 // SAFETY:
966 // Safe because the exit_reason (which comes from the kernel) told us which
967 // union field to use.
968 let hardware_entry_failure_reason = unsafe {
969 run.__bindgen_anon_1
970 .fail_entry
971 .hardware_entry_failure_reason
972 };
973 Ok(VcpuExit::FailEntry {
974 hardware_entry_failure_reason,
975 })
976 }
977 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
978 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
979 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
980 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
981 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
982 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
983 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
984 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
985 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
986 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
987 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
988 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
989 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
990 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
991 KVM_EXIT_SYSTEM_EVENT => {
992 // SAFETY:
993 // Safe because we know the exit reason told us this union
994 // field is valid
995 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
996 let event_flags =
997 // SAFETY:
998 // Safe because we know the exit reason told us this union
999 // field is valid
1000 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1001 match event_type {
1002 KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1003 KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1004 KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1005 _ => {
1006 error!(
1007 "Unknown KVM system event {} with flags {}",
1008 event_type, event_flags
1009 );
1010 Err(Error::new(EINVAL))
1011 }
1012 }
1013 }
1014 KVM_EXIT_X86_RDMSR => {
1015 // SAFETY:
1016 // Safe because the exit_reason (which comes from the kernel) told us which
1017 // union field to use.
1018 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1019 let index = msr.index;
1020 // By default fail the MSR read unless it was handled later.
1021 msr.error = 1;
1022 Ok(VcpuExit::RdMsr { index })
1023 }
1024 KVM_EXIT_X86_WRMSR => {
1025 // SAFETY:
1026 // Safe because the exit_reason (which comes from the kernel) told us which
1027 // union field to use.
1028 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1029 // By default fail the MSR write.
1030 msr.error = 1;
1031 let index = msr.index;
1032 let data = msr.data;
1033 Ok(VcpuExit::WrMsr { index, data })
1034 }
1035 KVM_EXIT_X86_BUS_LOCK => Ok(VcpuExit::BusLock),
1036 #[cfg(target_arch = "riscv64")]
1037 KVM_EXIT_RISCV_SBI => {
1038 // Safe because we trust the kernel to correctly fill in the union
1039 let extension_id = unsafe { run.__bindgen_anon_1.riscv_sbi.extension_id };
1040 let function_id = unsafe { run.__bindgen_anon_1.riscv_sbi.function_id };
1041 let args = unsafe { run.__bindgen_anon_1.riscv_sbi.args };
1042 Ok(VcpuExit::Sbi {
1043 extension_id,
1044 function_id,
1045 args,
1046 })
1047 }
1048 #[cfg(target_arch = "riscv64")]
1049 KVM_EXIT_RISCV_CSR => {
1050 // Safe because we trust the kernel to correctly fill in the union
1051 let csr_num = unsafe { run.__bindgen_anon_1.riscv_csr.csr_num };
1052 let new_value = unsafe { run.__bindgen_anon_1.riscv_csr.new_value };
1053 let write_mask = unsafe { run.__bindgen_anon_1.riscv_csr.write_mask };
1054 let ret_value = unsafe { run.__bindgen_anon_1.riscv_csr.ret_value };
1055 Ok(VcpuExit::RiscvCsr {
1056 csr_num,
1057 new_value,
1058 write_mask,
1059 ret_value,
1060 })
1061 }
1062 r => panic!("unknown kvm exit reason: {}", r),
1063 }
1064 }
1065
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>1066 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
1067 // SAFETY:
1068 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1069 // kernel told us how large it was. The pointer is page aligned so casting to a different
1070 // type is well defined, hence the clippy allow attribute.
1071 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1072 // Verify that the handler is called in the right context.
1073 assert!(run.exit_reason == KVM_EXIT_MMIO);
1074 // SAFETY:
1075 // Safe because the exit_reason (which comes from the kernel) told us which
1076 // union field to use.
1077 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1078 let address = mmio.phys_addr;
1079 let size = min(mmio.len as usize, mmio.data.len());
1080 if mmio.is_write != 0 {
1081 handle_fn(IoParams {
1082 address,
1083 size,
1084 operation: IoOperation::Write { data: mmio.data },
1085 });
1086 Ok(())
1087 } else if let Some(data) = handle_fn(IoParams {
1088 address,
1089 size,
1090 operation: IoOperation::Read,
1091 }) {
1092 mmio.data[..size].copy_from_slice(&data[..size]);
1093 Ok(())
1094 } else {
1095 Err(Error::new(EINVAL))
1096 }
1097 }
1098
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>1099 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
1100 // SAFETY:
1101 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1102 // kernel told us how large it was. The pointer is page aligned so casting to a different
1103 // type is well defined, hence the clippy allow attribute.
1104 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1105 // Verify that the handler is called in the right context.
1106 assert!(run.exit_reason == KVM_EXIT_IO);
1107 // SAFETY:
1108 // Safe because the exit_reason (which comes from the kernel) told us which
1109 // union field to use.
1110 let io = unsafe { run.__bindgen_anon_1.io };
1111 let size = usize::from(io.size);
1112
1113 // SAFETY:
1114 // The data_offset is defined by the kernel to be some number of bytes into the kvm_run
1115 // structure, which we have fully mmap'd.
1116 let mut data_ptr = unsafe { (run as *mut kvm_run as *mut u8).add(io.data_offset as usize) };
1117
1118 match io.direction as u32 {
1119 KVM_EXIT_IO_IN => {
1120 for _ in 0..io.count {
1121 if let Some(data) = handle_fn(IoParams {
1122 address: io.port.into(),
1123 size,
1124 operation: IoOperation::Read,
1125 }) {
1126 // TODO(b/315998194): Add safety comment
1127 #[allow(clippy::undocumented_unsafe_blocks)]
1128 unsafe {
1129 copy_nonoverlapping(data.as_ptr(), data_ptr, size);
1130 data_ptr = data_ptr.add(size);
1131 }
1132 } else {
1133 return Err(Error::new(EINVAL));
1134 }
1135 }
1136 Ok(())
1137 }
1138 KVM_EXIT_IO_OUT => {
1139 for _ in 0..io.count {
1140 let mut data = [0; 8];
1141 // TODO(b/315998194): Add safety comment
1142 #[allow(clippy::undocumented_unsafe_blocks)]
1143 unsafe {
1144 copy_nonoverlapping(data_ptr, data.as_mut_ptr(), min(size, data.len()));
1145 data_ptr = data_ptr.add(size);
1146 }
1147 handle_fn(IoParams {
1148 address: io.port.into(),
1149 size,
1150 operation: IoOperation::Write { data },
1151 });
1152 }
1153 Ok(())
1154 }
1155 _ => Err(Error::new(EINVAL)),
1156 }
1157 }
1158
handle_hyperv_hypercall( &self, handle_fn: &mut dyn FnMut(HypervHypercall) -> u64, ) -> Result<()>1159 fn handle_hyperv_hypercall(
1160 &self,
1161 handle_fn: &mut dyn FnMut(HypervHypercall) -> u64,
1162 ) -> Result<()> {
1163 // SAFETY:
1164 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1165 // kernel told us how large it was.
1166 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1167 // Verify that the handler is called in the right context.
1168 assert!(run.exit_reason == KVM_EXIT_HYPERV);
1169 // SAFETY:
1170 // Safe because the exit_reason (which comes from the kernel) told us which
1171 // union field to use.
1172 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1173 match hyperv.type_ {
1174 KVM_EXIT_HYPERV_SYNIC => {
1175 // TODO(b/315998194): Add safety comment
1176 #[allow(clippy::undocumented_unsafe_blocks)]
1177 let synic = unsafe { &hyperv.u.synic };
1178 handle_fn(HypervHypercall::HypervSynic {
1179 msr: synic.msr,
1180 control: synic.control,
1181 evt_page: synic.evt_page,
1182 msg_page: synic.msg_page,
1183 });
1184 Ok(())
1185 }
1186 KVM_EXIT_HYPERV_HCALL => {
1187 // TODO(b/315998194): Add safety comment
1188 #[allow(clippy::undocumented_unsafe_blocks)]
1189 let hcall = unsafe { &mut hyperv.u.hcall };
1190 hcall.result = handle_fn(HypervHypercall::HypervHcall {
1191 input: hcall.input,
1192 params: hcall.params,
1193 });
1194 Ok(())
1195 }
1196 _ => Err(Error::new(EINVAL)),
1197 }
1198 }
1199
handle_rdmsr(&self, data: u64) -> Result<()>1200 fn handle_rdmsr(&self, data: u64) -> Result<()> {
1201 // SAFETY:
1202 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1203 // kernel told us how large it was.
1204 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1205 // Verify that the handler is called in the right context.
1206 assert!(run.exit_reason == KVM_EXIT_X86_RDMSR);
1207 // SAFETY:
1208 // Safe because the exit_reason (which comes from the kernel) told us which
1209 // union field to use.
1210 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1211 msr.data = data;
1212 msr.error = 0;
1213 Ok(())
1214 }
1215
handle_wrmsr(&self)1216 fn handle_wrmsr(&self) {
1217 // SAFETY:
1218 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1219 // kernel told us how large it was.
1220 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1221 // Verify that the handler is called in the right context.
1222 assert!(run.exit_reason == KVM_EXIT_X86_WRMSR);
1223 // SAFETY:
1224 // Safe because the exit_reason (which comes from the kernel) told us which
1225 // union field to use.
1226 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1227 msr.error = 0;
1228 }
1229 }
1230
1231 impl KvmVcpu {
1232 /// Gets the vcpu's current "multiprocessing state".
1233 ///
1234 /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1235 /// a call to `Vm::create_irq_chip`.
1236 ///
1237 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1238 /// to run crosvm on s390.
get_mp_state(&self) -> Result<kvm_mp_state>1239 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1240 // SAFETY: trivially safe
1241 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1242 let ret = {
1243 // SAFETY:
1244 // Safe because we know that our file is a VCPU fd, we know the kernel will only write
1245 // the correct amount of memory to our pointer, and we verify the return
1246 // result.
1247 unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut state) }
1248 };
1249 if ret < 0 {
1250 return errno_result();
1251 }
1252 Ok(state)
1253 }
1254
1255 /// Sets the vcpu's current "multiprocessing state".
1256 ///
1257 /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1258 /// a call to `Vm::create_irq_chip`.
1259 ///
1260 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1261 /// to run crosvm on s390.
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1262 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1263 let ret = {
1264 // SAFETY:
1265 // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1266 unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), state) }
1267 };
1268 if ret < 0 {
1269 return errno_result();
1270 }
1271 Ok(())
1272 }
1273 }
1274
1275 impl AsRawDescriptor for KvmVcpu {
as_raw_descriptor(&self) -> RawDescriptor1276 fn as_raw_descriptor(&self) -> RawDescriptor {
1277 self.vcpu.as_raw_descriptor()
1278 }
1279 }
1280
1281 impl TryFrom<HypervisorCap> for KvmCap {
1282 type Error = Error;
1283
try_from(cap: HypervisorCap) -> Result<KvmCap>1284 fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1285 match cap {
1286 HypervisorCap::ArmPmuV3 => Ok(KvmCap::ArmPmuV3),
1287 HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1288 HypervisorCap::S390UserSigp => Ok(KvmCap::S390UserSigp),
1289 HypervisorCap::TscDeadlineTimer => Ok(KvmCap::TscDeadlineTimer),
1290 HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1291 #[cfg(target_arch = "x86_64")]
1292 HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1293 #[cfg(target_arch = "x86_64")]
1294 HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1295 HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1296 HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1297 }
1298 }
1299 }
1300
1301 impl From<&IrqRoute> for kvm_irq_routing_entry {
from(item: &IrqRoute) -> Self1302 fn from(item: &IrqRoute) -> Self {
1303 match &item.source {
1304 IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1305 gsi: item.gsi,
1306 type_: KVM_IRQ_ROUTING_IRQCHIP,
1307 u: kvm_irq_routing_entry__bindgen_ty_1 {
1308 irqchip: kvm_irq_routing_irqchip {
1309 irqchip: chip_to_kvm_chip(*chip),
1310 pin: *pin,
1311 },
1312 },
1313 ..Default::default()
1314 },
1315 IrqSource::Msi { address, data } => kvm_irq_routing_entry {
1316 gsi: item.gsi,
1317 type_: KVM_IRQ_ROUTING_MSI,
1318 u: kvm_irq_routing_entry__bindgen_ty_1 {
1319 msi: kvm_irq_routing_msi {
1320 address_lo: *address as u32,
1321 address_hi: (*address >> 32) as u32,
1322 data: *data,
1323 ..Default::default()
1324 },
1325 },
1326 ..Default::default()
1327 },
1328 }
1329 }
1330 }
1331
1332 impl From<&kvm_mp_state> for MPState {
from(item: &kvm_mp_state) -> Self1333 fn from(item: &kvm_mp_state) -> Self {
1334 match item.mp_state {
1335 KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1336 KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1337 KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1338 KVM_MP_STATE_HALTED => MPState::Halted,
1339 KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1340 KVM_MP_STATE_STOPPED => MPState::Stopped,
1341 state => {
1342 error!(
1343 "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1344 state
1345 );
1346 MPState::Runnable
1347 }
1348 }
1349 }
1350 }
1351
1352 impl From<&MPState> for kvm_mp_state {
from(item: &MPState) -> Self1353 fn from(item: &MPState) -> Self {
1354 kvm_mp_state {
1355 mp_state: match item {
1356 MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1357 MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1358 MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1359 MPState::Halted => KVM_MP_STATE_HALTED,
1360 MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1361 MPState::Stopped => KVM_MP_STATE_STOPPED,
1362 },
1363 }
1364 }
1365 }
1366