1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
6 mod aarch64;
7 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8 pub use aarch64::*;
9 use base::sys::BlockedSignal;
10
11 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
12 mod x86_64;
13 use std::cell::RefCell;
14 use std::cmp::min;
15 use std::cmp::Reverse;
16 use std::collections::BTreeMap;
17 use std::collections::BinaryHeap;
18 use std::convert::TryFrom;
19 use std::ffi::CString;
20 use std::mem::size_of;
21 use std::mem::ManuallyDrop;
22 use std::os::raw::c_int;
23 use std::os::raw::c_ulong;
24 use std::os::raw::c_void;
25 use std::os::unix::prelude::OsStrExt;
26 use std::path::Path;
27 use std::path::PathBuf;
28 use std::ptr::copy_nonoverlapping;
29 use std::sync::atomic::AtomicU64;
30 use std::sync::Arc;
31
32 use base::errno_result;
33 use base::error;
34 use base::ioctl;
35 use base::ioctl_with_mut_ref;
36 use base::ioctl_with_ref;
37 use base::ioctl_with_val;
38 use base::pagesize;
39 use base::signal;
40 use base::AsRawDescriptor;
41 use base::Error;
42 use base::Event;
43 use base::FromRawDescriptor;
44 use base::MappedRegion;
45 use base::MemoryMapping;
46 use base::MemoryMappingBuilder;
47 use base::MemoryMappingBuilderUnix;
48 use base::MmapError;
49 use base::Protection;
50 use base::RawDescriptor;
51 use base::Result;
52 use base::SafeDescriptor;
53 use data_model::vec_with_array_field;
54 use kvm_sys::*;
55 use libc::open64;
56 use libc::sigset_t;
57 use libc::EBUSY;
58 use libc::EFAULT;
59 use libc::EINVAL;
60 use libc::EIO;
61 use libc::ENOENT;
62 use libc::ENOSPC;
63 use libc::ENOSYS;
64 use libc::EOVERFLOW;
65 use libc::O_CLOEXEC;
66 use libc::O_RDWR;
67 use sync::Mutex;
68 use vm_memory::GuestAddress;
69 use vm_memory::GuestMemory;
70 use vm_memory::MemoryRegionInformation;
71 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
72 pub use x86_64::*;
73
74 use crate::ClockState;
75 use crate::Config;
76 use crate::Datamatch;
77 use crate::DeviceKind;
78 use crate::HypervHypercall;
79 use crate::Hypervisor;
80 use crate::HypervisorCap;
81 use crate::IoEventAddress;
82 use crate::IoOperation;
83 use crate::IoParams;
84 use crate::IrqRoute;
85 use crate::IrqSource;
86 use crate::MPState;
87 use crate::MemSlot;
88 use crate::Vcpu;
89 use crate::VcpuExit;
90 use crate::VcpuRunHandle;
91 use crate::Vm;
92 use crate::VmCap;
93
94 // Wrapper around KVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping
95 // from guest physical to host user pages.
96 //
97 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( descriptor: &SafeDescriptor, slot: MemSlot, read_only: bool, log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>98 unsafe fn set_user_memory_region(
99 descriptor: &SafeDescriptor,
100 slot: MemSlot,
101 read_only: bool,
102 log_dirty_pages: bool,
103 guest_addr: u64,
104 memory_size: u64,
105 userspace_addr: *mut u8,
106 ) -> Result<()> {
107 let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
108 if log_dirty_pages {
109 flags |= KVM_MEM_LOG_DIRTY_PAGES;
110 }
111 let region = kvm_userspace_memory_region {
112 slot,
113 flags,
114 guest_phys_addr: guest_addr,
115 memory_size,
116 userspace_addr: userspace_addr as u64,
117 };
118
119 let ret = ioctl_with_ref(descriptor, KVM_SET_USER_MEMORY_REGION(), ®ion);
120 if ret == 0 {
121 Ok(())
122 } else {
123 errno_result()
124 }
125 }
126
127 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
128 /// size.
129 ///
130 /// # Arguments
131 ///
132 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize133 pub fn dirty_log_bitmap_size(size: usize) -> usize {
134 let page_size = pagesize();
135 (((size + page_size - 1) / page_size) + 7) / 8
136 }
137
138 pub struct Kvm {
139 kvm: SafeDescriptor,
140 }
141
142 pub type KvmCap = kvm::Cap;
143
144 impl Kvm {
new_with_path(device_path: &Path) -> Result<Kvm>145 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
146 // Open calls are safe because we give a nul-terminated string and verify the result.
147 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
148 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
149 if ret < 0 {
150 return errno_result();
151 }
152 // Safe because we verify that ret is valid and we own the fd.
153 let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
154
155 // Safe because we know that the descriptor is valid and we verify the return result.
156 let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION()) };
157 if version < 0 {
158 return errno_result();
159 }
160
161 // Per the kernel KVM API documentation: "Applications should refuse to run if
162 // KVM_GET_API_VERSION returns a value other than 12."
163 if version as u32 != KVM_API_VERSION {
164 error!(
165 "KVM_GET_API_VERSION: expected {}, got {}",
166 KVM_API_VERSION, version,
167 );
168 return Err(Error::new(ENOSYS));
169 }
170
171 Ok(Kvm { kvm })
172 }
173
174 /// Opens `/dev/kvm/` and returns a Kvm object on success.
new() -> Result<Kvm>175 pub fn new() -> Result<Kvm> {
176 Kvm::new_with_path(&PathBuf::from("/dev/kvm"))
177 }
178
179 /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>180 pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
181 // Safe because we know that our file is a KVM fd and we verify the return result.
182 let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) };
183 if res > 0 {
184 Ok(res as usize)
185 } else {
186 errno_result()
187 }
188 }
189 }
190
191 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor192 fn as_raw_descriptor(&self) -> RawDescriptor {
193 self.kvm.as_raw_descriptor()
194 }
195 }
196
197 impl Hypervisor for Kvm {
try_clone(&self) -> Result<Self>198 fn try_clone(&self) -> Result<Self> {
199 Ok(Kvm {
200 kvm: self.kvm.try_clone()?,
201 })
202 }
203
check_capability(&self, cap: HypervisorCap) -> bool204 fn check_capability(&self, cap: HypervisorCap) -> bool {
205 if let Ok(kvm_cap) = KvmCap::try_from(cap) {
206 // this ioctl is safe because we know this kvm descriptor is valid,
207 // and we are copying over the kvm capability (u32) as a c_ulong value.
208 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), kvm_cap as c_ulong) == 1 }
209 } else {
210 // this capability cannot be converted on this platform, so return false
211 false
212 }
213 }
214 }
215
216 /// A wrapper around creating and using a KVM VM.
217 pub struct KvmVm {
218 kvm: Kvm,
219 vm: SafeDescriptor,
220 guest_mem: GuestMemory,
221 mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
222 /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
223 mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
224 }
225
226 impl KvmVm {
227 /// Constructs a new `KvmVm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm>228 pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
229 // Safe because we know kvm is a real kvm fd as this module is the only one that can make
230 // Kvm objects.
231 let ret = unsafe {
232 ioctl_with_val(
233 kvm,
234 KVM_CREATE_VM(),
235 kvm.get_vm_type(cfg.protection_type)? as c_ulong,
236 )
237 };
238 if ret < 0 {
239 return errno_result();
240 }
241 // Safe because we verify that ret is valid and we own the fd.
242 let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
243 guest_mem.with_regions(
244 |MemoryRegionInformation {
245 index,
246 guest_addr,
247 size,
248 host_addr,
249 ..
250 }| {
251 unsafe {
252 // Safe because the guest regions are guaranteed not to overlap.
253 set_user_memory_region(
254 &vm_descriptor,
255 index as MemSlot,
256 false,
257 false,
258 guest_addr.offset(),
259 size as u64,
260 host_addr as *mut u8,
261 )
262 }
263 },
264 )?;
265
266 let vm = KvmVm {
267 kvm: kvm.try_clone()?,
268 vm: vm_descriptor,
269 guest_mem,
270 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
271 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
272 };
273 vm.init_arch(&cfg)?;
274 Ok(vm)
275 }
276
create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu>277 pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
278 let run_mmap_size = self.kvm.get_vcpu_mmap_size()?;
279
280 // Safe because we know that our file is a VM fd and we verify the return result.
281 let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU(), c_ulong::try_from(id).unwrap()) };
282 if fd < 0 {
283 return errno_result();
284 }
285
286 // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
287 // the value of the fd and we own the fd.
288 let vcpu = unsafe { SafeDescriptor::from_raw_descriptor(fd) };
289
290 let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
291 .from_descriptor(&vcpu)
292 .build()
293 .map_err(|_| Error::new(ENOSPC))?;
294
295 Ok(KvmVcpu {
296 kvm: self.kvm.try_clone()?,
297 vm: self.vm.try_clone()?,
298 vcpu,
299 id,
300 run_mmap,
301 vcpu_run_handle_fingerprint: Default::default(),
302 })
303 }
304
305 /// Creates an in kernel interrupt controller.
306 ///
307 /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
create_irq_chip(&self) -> Result<()>308 pub fn create_irq_chip(&self) -> Result<()> {
309 // Safe because we know that our file is a VM fd and we verify the return result.
310 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) };
311 if ret == 0 {
312 Ok(())
313 } else {
314 errno_result()
315 }
316 }
317
318 /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
set_irq_line(&self, irq: u32, active: bool) -> Result<()>319 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
320 let mut irq_level = kvm_irq_level::default();
321 irq_level.__bindgen_anon_1.irq = irq;
322 irq_level.level = active.into();
323
324 // Safe because we know that our file is a VM fd, we know the kernel will only read the
325 // correct amount of memory from our pointer, and we verify the return result.
326 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) };
327 if ret == 0 {
328 Ok(())
329 } else {
330 errno_result()
331 }
332 }
333
334 /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt`
335 /// ( when not None ) will be triggered when the irqchip is resampled.
register_irqfd( &self, gsi: u32, evt: &Event, resample_evt: Option<&Event>, ) -> Result<()>336 pub fn register_irqfd(
337 &self,
338 gsi: u32,
339 evt: &Event,
340 resample_evt: Option<&Event>,
341 ) -> Result<()> {
342 let mut irqfd = kvm_irqfd {
343 fd: evt.as_raw_descriptor() as u32,
344 gsi,
345 ..Default::default()
346 };
347
348 if let Some(r_evt) = resample_evt {
349 irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
350 irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
351 }
352
353 // Safe because we know that our file is a VM fd, we know the kernel will only read the
354 // correct amount of memory from our pointer, and we verify the return result.
355 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
356 if ret == 0 {
357 Ok(())
358 } else {
359 errno_result()
360 }
361 }
362
363 /// Unregisters an event that was previously registered with
364 /// `register_irqfd`.
365 ///
366 /// The `evt` and `gsi` pair must be the same as the ones passed into
367 /// `register_irqfd`.
unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()>368 pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
369 let irqfd = kvm_irqfd {
370 fd: evt.as_raw_descriptor() as u32,
371 gsi,
372 flags: KVM_IRQFD_FLAG_DEASSIGN,
373 ..Default::default()
374 };
375 // Safe because we know that our file is a VM fd, we know the kernel will only read the
376 // correct amount of memory from our pointer, and we verify the return result.
377 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
378 if ret == 0 {
379 Ok(())
380 } else {
381 errno_result()
382 }
383 }
384
385 /// Sets the GSI routing table, replacing any table set with previous calls to
386 /// `set_gsi_routing`.
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>387 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
388 let mut irq_routing =
389 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
390 irq_routing[0].nr = routes.len() as u32;
391
392 // Safe because we ensured there is enough space in irq_routing to hold the number of
393 // route entries.
394 let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
395 for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
396 *irq_route = kvm_irq_routing_entry::from(route);
397 }
398
399 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), &irq_routing[0]) };
400 if ret == 0 {
401 Ok(())
402 } else {
403 errno_result()
404 }
405 }
406
ioeventfd( &self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>407 fn ioeventfd(
408 &self,
409 evt: &Event,
410 addr: IoEventAddress,
411 datamatch: Datamatch,
412 deassign: bool,
413 ) -> Result<()> {
414 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
415 Datamatch::AnyLength => (false, 0, 0),
416 Datamatch::U8(v) => match v {
417 Some(u) => (true, u as u64, 1),
418 None => (false, 0, 1),
419 },
420 Datamatch::U16(v) => match v {
421 Some(u) => (true, u as u64, 2),
422 None => (false, 0, 2),
423 },
424 Datamatch::U32(v) => match v {
425 Some(u) => (true, u as u64, 4),
426 None => (false, 0, 4),
427 },
428 Datamatch::U64(v) => match v {
429 Some(u) => (true, u as u64, 8),
430 None => (false, 0, 8),
431 },
432 };
433 let mut flags = 0;
434 if deassign {
435 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
436 }
437 if do_datamatch {
438 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
439 }
440 if let IoEventAddress::Pio(_) = addr {
441 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
442 }
443 let ioeventfd = kvm_ioeventfd {
444 datamatch: datamatch_value,
445 len: datamatch_len,
446 addr: match addr {
447 IoEventAddress::Pio(p) => p as u64,
448 IoEventAddress::Mmio(m) => m,
449 },
450 fd: evt.as_raw_descriptor(),
451 flags,
452 ..Default::default()
453 };
454 // Safe because we know that our file is a VM fd, we know the kernel will only read the
455 // correct amount of memory from our pointer, and we verify the return result.
456 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) };
457 if ret == 0 {
458 Ok(())
459 } else {
460 errno_result()
461 }
462 }
463
464 /// Checks whether a particular KVM-specific capability is available for this VM.
check_raw_capability(&self, capability: KvmCap) -> bool465 pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
466 // Safe because we know that our file is a KVM fd, and if the cap is invalid KVM assumes
467 // it's an unavailable extension and returns 0.
468 let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), capability as c_ulong) };
469 match capability {
470 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
471 KvmCap::BusLockDetect => {
472 if ret > 0 {
473 ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
474 } else {
475 false
476 }
477 }
478 _ => ret == 1,
479 }
480 }
481
482 // Currently only used on aarch64, but works on any architecture.
483 #[allow(dead_code)]
484 /// Enables a KVM-specific capability for this VM, with the given arguments.
485 ///
486 /// # Safety
487 /// This function is marked as unsafe because `args` may be interpreted as pointers for some
488 /// capabilities. The caller must ensure that any pointers passed in the `args` array are
489 /// allocated as the kernel expects, and that mutable pointers are owned.
enable_raw_capability( &self, capability: KvmCap, flags: u32, args: &[u64; 4], ) -> Result<()>490 unsafe fn enable_raw_capability(
491 &self,
492 capability: KvmCap,
493 flags: u32,
494 args: &[u64; 4],
495 ) -> Result<()> {
496 let kvm_cap = kvm_enable_cap {
497 cap: capability as u32,
498 args: *args,
499 flags,
500 ..Default::default()
501 };
502 // Safe because we allocated the struct and we know the kernel will read exactly the size of
503 // the struct, and because we assume the caller has allocated the args appropriately.
504 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), &kvm_cap);
505 if ret == 0 {
506 Ok(())
507 } else {
508 errno_result()
509 }
510 }
511 }
512
513 impl Vm for KvmVm {
try_clone(&self) -> Result<Self>514 fn try_clone(&self) -> Result<Self> {
515 Ok(KvmVm {
516 kvm: self.kvm.try_clone()?,
517 vm: self.vm.try_clone()?,
518 guest_mem: self.guest_mem.clone(),
519 mem_regions: self.mem_regions.clone(),
520 mem_slot_gaps: self.mem_slot_gaps.clone(),
521 })
522 }
523
check_capability(&self, c: VmCap) -> bool524 fn check_capability(&self, c: VmCap) -> bool {
525 if let Some(val) = self.check_capability_arch(c) {
526 return val;
527 }
528 match c {
529 VmCap::DirtyLog => true,
530 VmCap::PvClock => false,
531 VmCap::PvClockSuspend => self.check_raw_capability(KvmCap::KvmclockCtrl),
532 VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
533 VmCap::EarlyInitCpuid => false,
534 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
535 VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
536 }
537 }
538
enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool>539 fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
540 match c {
541 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
542 VmCap::BusLockDetect => {
543 let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
544 Ok(unsafe {
545 self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
546 })
547 }
548 _ => Ok(false),
549 }
550 }
551
get_guest_phys_addr_bits(&self) -> u8552 fn get_guest_phys_addr_bits(&self) -> u8 {
553 self.kvm.get_guest_phys_addr_bits()
554 }
555
get_memory(&self) -> &GuestMemory556 fn get_memory(&self) -> &GuestMemory {
557 &self.guest_mem
558 }
559
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<MemSlot>560 fn add_memory_region(
561 &mut self,
562 guest_addr: GuestAddress,
563 mem: Box<dyn MappedRegion>,
564 read_only: bool,
565 log_dirty_pages: bool,
566 ) -> Result<MemSlot> {
567 let pgsz = pagesize() as u64;
568 // KVM require to set the user memory region with page size aligned size. Safe to extend
569 // the mem.size() to be page size aligned because the mmap will round up the size to be
570 // page size aligned if it is not.
571 let size = (mem.size() as u64 + pgsz - 1) / pgsz * pgsz;
572 let end_addr = guest_addr
573 .checked_add(size)
574 .ok_or_else(|| Error::new(EOVERFLOW))?;
575 if self.guest_mem.range_overlap(guest_addr, end_addr) {
576 return Err(Error::new(ENOSPC));
577 }
578 let mut regions = self.mem_regions.lock();
579 let mut gaps = self.mem_slot_gaps.lock();
580 let slot = match gaps.pop() {
581 Some(gap) => gap.0,
582 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
583 };
584
585 // Safe because we check that the given guest address is valid and has no overlaps. We also
586 // know that the pointer and size are correct because the MemoryMapping interface ensures
587 // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
588 // is removed.
589 let res = unsafe {
590 set_user_memory_region(
591 &self.vm,
592 slot,
593 read_only,
594 log_dirty_pages,
595 guest_addr.offset() as u64,
596 size,
597 mem.as_ptr(),
598 )
599 };
600
601 if let Err(e) = res {
602 gaps.push(Reverse(slot));
603 return Err(e);
604 }
605 regions.insert(slot, mem);
606 Ok(slot)
607 }
608
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>609 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
610 let mut regions = self.mem_regions.lock();
611 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
612
613 mem.msync(offset, size).map_err(|err| match err {
614 MmapError::InvalidAddress => Error::new(EFAULT),
615 MmapError::NotPageAligned => Error::new(EINVAL),
616 MmapError::SystemCallFailed(e) => e,
617 _ => Error::new(EIO),
618 })
619 }
620
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>621 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
622 let mut regions = self.mem_regions.lock();
623 if !regions.contains_key(&slot) {
624 return Err(Error::new(ENOENT));
625 }
626 // Safe because the slot is checked against the list of memory slots.
627 unsafe {
628 set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?;
629 }
630 self.mem_slot_gaps.lock().push(Reverse(slot));
631 // This remove will always succeed because of the contains_key check above.
632 Ok(regions.remove(&slot).unwrap())
633 }
634
create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>635 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
636 let device = if let Some(dev) = self.get_device_params_arch(kind) {
637 dev
638 } else {
639 match kind {
640 DeviceKind::Vfio => kvm_create_device {
641 type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
642 fd: 0,
643 flags: 0,
644 },
645
646 // ARM has additional DeviceKinds, so it needs the catch-all pattern
647 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
648 _ => return Err(Error::new(libc::ENXIO)),
649 }
650 };
651
652 // Safe because we know that our file is a VM fd, we know the kernel will only write correct
653 // amount of memory to our pointer, and we verify the return result.
654 let ret = unsafe { base::ioctl_with_ref(self, KVM_CREATE_DEVICE(), &device) };
655 if ret == 0 {
656 // Safe because we verify that ret is valid and we own the fd.
657 Ok(unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) })
658 } else {
659 errno_result()
660 }
661 }
662
get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>663 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
664 let regions = self.mem_regions.lock();
665 let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
666 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
667 if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
668 return Err(Error::new(EINVAL));
669 }
670
671 let mut dirty_log_kvm = kvm_dirty_log {
672 slot,
673 ..Default::default()
674 };
675 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
676 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid (because
677 // it's from a slice) and we checked that it will be large enough to hold the entire log.
678 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirty_log_kvm) };
679 if ret == 0 {
680 Ok(())
681 } else {
682 errno_result()
683 }
684 }
685
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>686 fn register_ioevent(
687 &mut self,
688 evt: &Event,
689 addr: IoEventAddress,
690 datamatch: Datamatch,
691 ) -> Result<()> {
692 self.ioeventfd(evt, addr, datamatch, false)
693 }
694
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>695 fn unregister_ioevent(
696 &mut self,
697 evt: &Event,
698 addr: IoEventAddress,
699 datamatch: Datamatch,
700 ) -> Result<()> {
701 self.ioeventfd(evt, addr, datamatch, true)
702 }
703
handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()>704 fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
705 // KVM delivers IO events in-kernel with ioeventfds, so this is a no-op
706 Ok(())
707 }
708
get_pvclock(&self) -> Result<ClockState>709 fn get_pvclock(&self) -> Result<ClockState> {
710 self.get_pvclock_arch()
711 }
712
set_pvclock(&self, state: &ClockState) -> Result<()>713 fn set_pvclock(&self, state: &ClockState) -> Result<()> {
714 self.set_pvclock_arch(state)
715 }
716
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>717 fn add_fd_mapping(
718 &mut self,
719 slot: u32,
720 offset: usize,
721 size: usize,
722 fd: &dyn AsRawDescriptor,
723 fd_offset: u64,
724 prot: Protection,
725 ) -> Result<()> {
726 let mut regions = self.mem_regions.lock();
727 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
728
729 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
730 Ok(()) => Ok(()),
731 Err(MmapError::SystemCallFailed(e)) => Err(e),
732 Err(_) => Err(Error::new(EIO)),
733 }
734 }
735
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>736 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
737 let mut regions = self.mem_regions.lock();
738 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
739
740 match region.remove_mapping(offset, size) {
741 Ok(()) => Ok(()),
742 Err(MmapError::SystemCallFailed(e)) => Err(e),
743 Err(_) => Err(Error::new(EIO)),
744 }
745 }
746
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>747 fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
748 match self.guest_mem.remove_range(guest_address, size) {
749 Ok(_) => Ok(()),
750 Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
751 Err(_) => Err(Error::new(EIO)),
752 }
753 }
754
handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()>755 fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
756 // No-op, when the guest attempts to access the pages again, Linux/KVM will provide them.
757 Ok(())
758 }
759 }
760
761 impl AsRawDescriptor for KvmVm {
as_raw_descriptor(&self) -> RawDescriptor762 fn as_raw_descriptor(&self) -> RawDescriptor {
763 self.vm.as_raw_descriptor()
764 }
765 }
766
767 /// A wrapper around using a KVM Vcpu.
768 pub struct KvmVcpu {
769 kvm: Kvm,
770 vm: SafeDescriptor,
771 vcpu: SafeDescriptor,
772 id: usize,
773 run_mmap: MemoryMapping,
774 vcpu_run_handle_fingerprint: Arc<AtomicU64>,
775 }
776
777 pub(super) struct VcpuThread {
778 run: *mut kvm_run,
779 signal_num: Option<c_int>,
780 }
781
782 thread_local!(static VCPU_THREAD: RefCell<Option<VcpuThread>> = RefCell::new(None));
783
784 impl Vcpu for KvmVcpu {
try_clone(&self) -> Result<Self>785 fn try_clone(&self) -> Result<Self> {
786 let vm = self.vm.try_clone()?;
787 let vcpu = self.vcpu.try_clone()?;
788 let run_mmap = MemoryMappingBuilder::new(self.run_mmap.size())
789 .from_descriptor(&vcpu)
790 .build()
791 .map_err(|_| Error::new(ENOSPC))?;
792 let vcpu_run_handle_fingerprint = self.vcpu_run_handle_fingerprint.clone();
793
794 Ok(KvmVcpu {
795 kvm: self.kvm.try_clone()?,
796 vm,
797 vcpu,
798 id: self.id,
799 run_mmap,
800 vcpu_run_handle_fingerprint,
801 })
802 }
803
as_vcpu(&self) -> &dyn Vcpu804 fn as_vcpu(&self) -> &dyn Vcpu {
805 self
806 }
807
808 #[allow(clippy::cast_ptr_alignment)]
take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>809 fn take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle> {
810 fn vcpu_run_handle_drop() {
811 VCPU_THREAD.with(|v| {
812 // This assumes that a failure in `BlockedSignal::new` means the signal is already
813 // blocked and there it should not be unblocked on exit.
814 let _blocked_signal = &(*v.borrow())
815 .as_ref()
816 .and_then(|state| state.signal_num)
817 .map(BlockedSignal::new);
818
819 *v.borrow_mut() = None;
820 });
821 }
822
823 // Prevent `vcpu_run_handle_drop` from being called until we actually setup the signal
824 // blocking. The handle needs to be made now so that we can use the fingerprint.
825 let vcpu_run_handle = ManuallyDrop::new(VcpuRunHandle::new(vcpu_run_handle_drop));
826
827 // AcqRel ordering is sufficient to ensure only one thread gets to set its fingerprint to
828 // this Vcpu and subsequent `run` calls will see the fingerprint.
829 if self
830 .vcpu_run_handle_fingerprint
831 .compare_exchange(
832 0,
833 vcpu_run_handle.fingerprint().as_u64(),
834 std::sync::atomic::Ordering::AcqRel,
835 std::sync::atomic::Ordering::Acquire,
836 )
837 .is_err()
838 {
839 return Err(Error::new(EBUSY));
840 }
841
842 // Block signal while we add -- if a signal fires (very unlikely,
843 // as this means something is trying to pause the vcpu before it has
844 // even started) it'll try to grab the read lock while this write
845 // lock is grabbed and cause a deadlock.
846 // Assuming that a failure to block means it's already blocked.
847 let _blocked_signal = signal_num.map(BlockedSignal::new);
848
849 VCPU_THREAD.with(|v| {
850 if v.borrow().is_none() {
851 *v.borrow_mut() = Some(VcpuThread {
852 run: self.run_mmap.as_ptr() as *mut kvm_run,
853 signal_num,
854 });
855 Ok(())
856 } else {
857 Err(Error::new(EBUSY))
858 }
859 })?;
860
861 Ok(ManuallyDrop::into_inner(vcpu_run_handle))
862 }
863
id(&self) -> usize864 fn id(&self) -> usize {
865 self.id
866 }
867
868 #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)869 fn set_immediate_exit(&self, exit: bool) {
870 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
871 // kernel told us how large it was. The pointer is page aligned so casting to a different
872 // type is well defined, hence the clippy allow attribute.
873 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
874 run.immediate_exit = exit.into();
875 }
876
set_local_immediate_exit(exit: bool)877 fn set_local_immediate_exit(exit: bool) {
878 VCPU_THREAD.with(|v| {
879 if let Some(state) = &(*v.borrow()) {
880 unsafe {
881 (*state.run).immediate_exit = exit.into();
882 };
883 }
884 });
885 }
886
set_local_immediate_exit_fn(&self) -> extern "C" fn()887 fn set_local_immediate_exit_fn(&self) -> extern "C" fn() {
888 extern "C" fn f() {
889 KvmVcpu::set_local_immediate_exit(true);
890 }
891 f
892 }
893
pvclock_ctrl(&self) -> Result<()>894 fn pvclock_ctrl(&self) -> Result<()> {
895 self.pvclock_ctrl_arch()
896 }
897
set_signal_mask(&self, signals: &[c_int]) -> Result<()>898 fn set_signal_mask(&self, signals: &[c_int]) -> Result<()> {
899 let sigset = signal::create_sigset(signals)?;
900
901 let mut kvm_sigmask = vec_with_array_field::<kvm_signal_mask, sigset_t>(1);
902 // Rust definition of sigset_t takes 128 bytes, but the kernel only
903 // expects 8-bytes structure, so we can't write
904 // kvm_sigmask.len = size_of::<sigset_t>() as u32;
905 kvm_sigmask[0].len = 8;
906 // Ensure the length is not too big.
907 const _ASSERT: usize = size_of::<sigset_t>() - 8usize;
908
909 // Safe as we allocated exactly the needed space
910 unsafe {
911 copy_nonoverlapping(
912 &sigset as *const sigset_t as *const u8,
913 kvm_sigmask[0].sigset.as_mut_ptr(),
914 8,
915 );
916 }
917
918 let ret = unsafe {
919 // The ioctl is safe because the kernel will only read from the
920 // kvm_signal_mask structure.
921 ioctl_with_ref(self, KVM_SET_SIGNAL_MASK(), &kvm_sigmask[0])
922 };
923 if ret == 0 {
924 Ok(())
925 } else {
926 errno_result()
927 }
928 }
929
enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>930 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
931 let kvm_cap = kvm_enable_cap {
932 cap,
933 args: *args,
934 ..Default::default()
935 };
936 // Safe because we allocated the struct and we know the kernel will read exactly the size of
937 // the struct, and because we assume the caller has allocated the args appropriately.
938 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), &kvm_cap);
939 if ret == 0 {
940 Ok(())
941 } else {
942 errno_result()
943 }
944 }
945
946 #[allow(clippy::cast_ptr_alignment)]
947 // The pointer is page aligned so casting to a different type is well defined, hence the clippy
948 // allow attribute.
run(&mut self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>949 fn run(&mut self, run_handle: &VcpuRunHandle) -> Result<VcpuExit> {
950 // Acquire is used to ensure this check is ordered after the `compare_exchange` in `run`.
951 if self
952 .vcpu_run_handle_fingerprint
953 .load(std::sync::atomic::Ordering::Acquire)
954 != run_handle.fingerprint().as_u64()
955 {
956 panic!("invalid VcpuRunHandle used to run Vcpu");
957 }
958
959 // Safe because we know that our file is a VCPU fd and we verify the return result.
960 let ret = unsafe { ioctl(self, KVM_RUN()) };
961 if ret != 0 {
962 return errno_result();
963 }
964
965 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
966 // kernel told us how large it was.
967 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
968 match run.exit_reason {
969 KVM_EXIT_IO => Ok(VcpuExit::Io),
970 KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
971 KVM_EXIT_IOAPIC_EOI => {
972 // Safe because the exit_reason (which comes from the kernel) told us which
973 // union field to use.
974 let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
975 Ok(VcpuExit::IoapicEoi { vector })
976 }
977 KVM_EXIT_HYPERV => Ok(VcpuExit::HypervHypercall),
978 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
979 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
980 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
981 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
982 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
983 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
984 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
985 KVM_EXIT_FAIL_ENTRY => {
986 // Safe because the exit_reason (which comes from the kernel) told us which
987 // union field to use.
988 let hardware_entry_failure_reason = unsafe {
989 run.__bindgen_anon_1
990 .fail_entry
991 .hardware_entry_failure_reason
992 };
993 Ok(VcpuExit::FailEntry {
994 hardware_entry_failure_reason,
995 })
996 }
997 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
998 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
999 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1000 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1001 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1002 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1003 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1004 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1005 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1006 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1007 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1008 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1009 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1010 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1011 KVM_EXIT_SYSTEM_EVENT => {
1012 // Safe because we know the exit reason told us this union
1013 // field is valid
1014 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1015 let event_flags =
1016 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1017 match event_type {
1018 KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1019 KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1020 KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1021 KVM_SYSTEM_EVENT_S2IDLE => Ok(VcpuExit::SystemEventS2Idle),
1022 _ => {
1023 error!(
1024 "Unknown KVM system event {} with flags {}",
1025 event_type, event_flags
1026 );
1027 Err(Error::new(EINVAL))
1028 }
1029 }
1030 }
1031 KVM_EXIT_X86_RDMSR => {
1032 // Safe because the exit_reason (which comes from the kernel) told us which
1033 // union field to use.
1034 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1035 let index = msr.index;
1036 // By default fail the MSR read unless it was handled later.
1037 msr.error = 1;
1038 Ok(VcpuExit::RdMsr { index })
1039 }
1040 KVM_EXIT_X86_WRMSR => {
1041 // Safe because the exit_reason (which comes from the kernel) told us which
1042 // union field to use.
1043 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1044 // By default fail the MSR write.
1045 msr.error = 1;
1046 let index = msr.index;
1047 let data = msr.data;
1048 Ok(VcpuExit::WrMsr { index, data })
1049 }
1050 KVM_EXIT_X86_BUS_LOCK => Ok(VcpuExit::BusLock),
1051 r => panic!("unknown kvm exit reason: {}", r),
1052 }
1053 }
1054
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>1055 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
1056 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1057 // kernel told us how large it was. The pointer is page aligned so casting to a different
1058 // type is well defined, hence the clippy allow attribute.
1059 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1060 // Verify that the handler is called in the right context.
1061 assert!(run.exit_reason == KVM_EXIT_MMIO);
1062 // Safe because the exit_reason (which comes from the kernel) told us which
1063 // union field to use.
1064 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1065 let address = mmio.phys_addr;
1066 let size = min(mmio.len as usize, mmio.data.len());
1067 if mmio.is_write != 0 {
1068 handle_fn(IoParams {
1069 address,
1070 size,
1071 operation: IoOperation::Write { data: mmio.data },
1072 });
1073 Ok(())
1074 } else if let Some(data) = handle_fn(IoParams {
1075 address,
1076 size,
1077 operation: IoOperation::Read,
1078 }) {
1079 mmio.data[..size].copy_from_slice(&data[..size]);
1080 Ok(())
1081 } else {
1082 Err(Error::new(EINVAL))
1083 }
1084 }
1085
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>1086 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
1087 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1088 // kernel told us how large it was. The pointer is page aligned so casting to a different
1089 // type is well defined, hence the clippy allow attribute.
1090 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1091 // Verify that the handler is called in the right context.
1092 assert!(run.exit_reason == KVM_EXIT_IO);
1093 let run_start = run as *mut kvm_run as *mut u8;
1094 // Safe because the exit_reason (which comes from the kernel) told us which
1095 // union field to use.
1096 let io = unsafe { run.__bindgen_anon_1.io };
1097 let size = (io.count as usize) * (io.size as usize);
1098 match io.direction as u32 {
1099 KVM_EXIT_IO_IN => {
1100 if let Some(data) = handle_fn(IoParams {
1101 address: io.port.into(),
1102 size,
1103 operation: IoOperation::Read,
1104 }) {
1105 // The data_offset is defined by the kernel to be some number of bytes
1106 // into the kvm_run structure, which we have fully mmap'd.
1107 unsafe {
1108 let data_ptr = run_start.offset(io.data_offset as isize);
1109 copy_nonoverlapping(data.as_ptr(), data_ptr, size);
1110 }
1111 Ok(())
1112 } else {
1113 Err(Error::new(EINVAL))
1114 }
1115 }
1116 KVM_EXIT_IO_OUT => {
1117 let mut data = [0; 8];
1118 // The data_offset is defined by the kernel to be some number of bytes
1119 // into the kvm_run structure, which we have fully mmap'd.
1120 unsafe {
1121 let data_ptr = run_start.offset(io.data_offset as isize);
1122 copy_nonoverlapping(data_ptr, data.as_mut_ptr(), min(size, data.len()));
1123 }
1124 handle_fn(IoParams {
1125 address: io.port.into(),
1126 size,
1127 operation: IoOperation::Write { data },
1128 });
1129 Ok(())
1130 }
1131 _ => Err(Error::new(EINVAL)),
1132 }
1133 }
1134
handle_hyperv_hypercall( &self, handle_fn: &mut dyn FnMut(HypervHypercall) -> u64, ) -> Result<()>1135 fn handle_hyperv_hypercall(
1136 &self,
1137 handle_fn: &mut dyn FnMut(HypervHypercall) -> u64,
1138 ) -> Result<()> {
1139 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1140 // kernel told us how large it was.
1141 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1142 // Verify that the handler is called in the right context.
1143 assert!(run.exit_reason == KVM_EXIT_HYPERV);
1144 // Safe because the exit_reason (which comes from the kernel) told us which
1145 // union field to use.
1146 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1147 match hyperv.type_ as u32 {
1148 KVM_EXIT_HYPERV_SYNIC => {
1149 let synic = unsafe { &hyperv.u.synic };
1150 handle_fn(HypervHypercall::HypervSynic {
1151 msr: synic.msr,
1152 control: synic.control,
1153 evt_page: synic.evt_page,
1154 msg_page: synic.msg_page,
1155 });
1156 Ok(())
1157 }
1158 KVM_EXIT_HYPERV_HCALL => {
1159 let hcall = unsafe { &mut hyperv.u.hcall };
1160 hcall.result = handle_fn(HypervHypercall::HypervHcall {
1161 input: hcall.input,
1162 params: hcall.params,
1163 });
1164 Ok(())
1165 }
1166 _ => Err(Error::new(EINVAL)),
1167 }
1168 }
1169
handle_rdmsr(&self, data: u64) -> Result<()>1170 fn handle_rdmsr(&self, data: u64) -> Result<()> {
1171 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1172 // kernel told us how large it was.
1173 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1174 // Verify that the handler is called in the right context.
1175 assert!(run.exit_reason == KVM_EXIT_X86_RDMSR);
1176 // Safe because the exit_reason (which comes from the kernel) told us which
1177 // union field to use.
1178 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1179 msr.data = data;
1180 msr.error = 0;
1181 Ok(())
1182 }
1183
handle_wrmsr(&self)1184 fn handle_wrmsr(&self) {
1185 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1186 // kernel told us how large it was.
1187 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1188 // Verify that the handler is called in the right context.
1189 assert!(run.exit_reason == KVM_EXIT_X86_WRMSR);
1190 // Safe because the exit_reason (which comes from the kernel) told us which
1191 // union field to use.
1192 let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1193 msr.error = 0;
1194 }
1195 }
1196
1197 impl KvmVcpu {
1198 /// Gets the vcpu's current "multiprocessing state".
1199 ///
1200 /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1201 /// a call to `Vm::create_irq_chip`.
1202 ///
1203 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1204 /// to run crosvm on s390.
get_mp_state(&self) -> Result<kvm_mp_state>1205 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1206 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1207 // correct amount of memory to our pointer, and we verify the return result.
1208 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1209 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut state) };
1210 if ret < 0 {
1211 return errno_result();
1212 }
1213 Ok(state)
1214 }
1215
1216 /// Sets the vcpu's current "multiprocessing state".
1217 ///
1218 /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1219 /// a call to `Vm::create_irq_chip`.
1220 ///
1221 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1222 /// to run crosvm on s390.
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1223 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1224 let ret = unsafe {
1225 // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1226 ioctl_with_ref(self, KVM_SET_MP_STATE(), state)
1227 };
1228 if ret < 0 {
1229 return errno_result();
1230 }
1231 Ok(())
1232 }
1233 }
1234
1235 impl AsRawDescriptor for KvmVcpu {
as_raw_descriptor(&self) -> RawDescriptor1236 fn as_raw_descriptor(&self) -> RawDescriptor {
1237 self.vcpu.as_raw_descriptor()
1238 }
1239 }
1240
1241 impl TryFrom<HypervisorCap> for KvmCap {
1242 type Error = Error;
1243
try_from(cap: HypervisorCap) -> Result<KvmCap>1244 fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1245 match cap {
1246 HypervisorCap::ArmPmuV3 => Ok(KvmCap::ArmPmuV3),
1247 HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1248 HypervisorCap::S390UserSigp => Ok(KvmCap::S390UserSigp),
1249 HypervisorCap::TscDeadlineTimer => Ok(KvmCap::TscDeadlineTimer),
1250 HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1251 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1252 HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1253 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1254 HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1255 HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1256 HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1257 }
1258 }
1259 }
1260
1261 impl From<&IrqRoute> for kvm_irq_routing_entry {
from(item: &IrqRoute) -> Self1262 fn from(item: &IrqRoute) -> Self {
1263 match &item.source {
1264 IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1265 gsi: item.gsi,
1266 type_: KVM_IRQ_ROUTING_IRQCHIP,
1267 u: kvm_irq_routing_entry__bindgen_ty_1 {
1268 irqchip: kvm_irq_routing_irqchip {
1269 irqchip: chip_to_kvm_chip(*chip),
1270 pin: *pin,
1271 },
1272 },
1273 ..Default::default()
1274 },
1275 IrqSource::Msi { address, data } => kvm_irq_routing_entry {
1276 gsi: item.gsi,
1277 type_: KVM_IRQ_ROUTING_MSI,
1278 u: kvm_irq_routing_entry__bindgen_ty_1 {
1279 msi: kvm_irq_routing_msi {
1280 address_lo: *address as u32,
1281 address_hi: (*address >> 32) as u32,
1282 data: *data,
1283 ..Default::default()
1284 },
1285 },
1286 ..Default::default()
1287 },
1288 }
1289 }
1290 }
1291
1292 impl From<&kvm_mp_state> for MPState {
from(item: &kvm_mp_state) -> Self1293 fn from(item: &kvm_mp_state) -> Self {
1294 match item.mp_state {
1295 KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1296 KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1297 KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1298 KVM_MP_STATE_HALTED => MPState::Halted,
1299 KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1300 KVM_MP_STATE_STOPPED => MPState::Stopped,
1301 state => {
1302 error!(
1303 "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1304 state
1305 );
1306 MPState::Runnable
1307 }
1308 }
1309 }
1310 }
1311
1312 impl From<&MPState> for kvm_mp_state {
from(item: &MPState) -> Self1313 fn from(item: &MPState) -> Self {
1314 kvm_mp_state {
1315 mp_state: match item {
1316 MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1317 MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1318 MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1319 MPState::Halted => KVM_MP_STATE_HALTED,
1320 MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1321 MPState::Stopped => KVM_MP_STATE_STOPPED,
1322 },
1323 }
1324 }
1325 }
1326