• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! A safe wrapper around the kernel's KVM interface.
6 //!
7 //! New code should use the `hypervisor` crate instead.
8 
9 #![cfg(unix)]
10 
11 mod cap;
12 
13 use std::cell::RefCell;
14 use std::cmp::min;
15 use std::cmp::Ordering;
16 use std::collections::BTreeMap;
17 use std::collections::BinaryHeap;
18 use std::ffi::CString;
19 use std::fs::File;
20 use std::mem::size_of;
21 use std::ops::Deref;
22 use std::ops::DerefMut;
23 use std::os::raw::*;
24 use std::os::unix::prelude::OsStrExt;
25 use std::path::Path;
26 use std::path::PathBuf;
27 use std::ptr::copy_nonoverlapping;
28 use std::sync::Arc;
29 
30 #[allow(unused_imports)]
31 use base::ioctl;
32 #[allow(unused_imports)]
33 use base::ioctl_with_mut_ptr;
34 #[allow(unused_imports)]
35 use base::ioctl_with_mut_ref;
36 #[allow(unused_imports)]
37 use base::ioctl_with_ptr;
38 #[allow(unused_imports)]
39 use base::ioctl_with_ref;
40 #[allow(unused_imports)]
41 use base::ioctl_with_val;
42 #[allow(unused_imports)]
43 use base::pagesize;
44 #[allow(unused_imports)]
45 use base::signal;
46 use base::sys::BlockedSignal;
47 #[allow(unused_imports)]
48 use base::unblock_signal;
49 #[allow(unused_imports)]
50 use base::warn;
51 use base::AsRawDescriptor;
52 #[allow(unused_imports)]
53 use base::Error;
54 #[allow(unused_imports)]
55 use base::Event;
56 use base::FromRawDescriptor;
57 #[allow(unused_imports)]
58 use base::IoctlNr;
59 #[allow(unused_imports)]
60 use base::MappedRegion;
61 #[allow(unused_imports)]
62 use base::MemoryMapping;
63 #[allow(unused_imports)]
64 use base::MemoryMappingBuilder;
65 #[allow(unused_imports)]
66 use base::MmapError;
67 use base::RawDescriptor;
68 #[allow(unused_imports)]
69 use base::Result;
70 #[allow(unused_imports)]
71 use base::SIGRTMIN;
72 use data_model::vec_with_array_field;
73 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
74 use data_model::FlexibleArrayWrapper;
75 use kvm_sys::*;
76 use libc::open64;
77 use libc::sigset_t;
78 use libc::EBUSY;
79 use libc::EINVAL;
80 use libc::ENOENT;
81 use libc::ENOSPC;
82 use libc::EOVERFLOW;
83 use libc::O_CLOEXEC;
84 use libc::O_RDWR;
85 use sync::Mutex;
86 use vm_memory::GuestAddress;
87 use vm_memory::GuestMemory;
88 use vm_memory::MemoryRegionInformation;
89 
90 pub use crate::cap::*;
91 
errno_result<T>() -> Result<T>92 fn errno_result<T>() -> Result<T> {
93     Err(Error::last())
94 }
95 
set_user_memory_region<F: AsRawDescriptor>( fd: &F, slot: u32, read_only: bool, log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>96 unsafe fn set_user_memory_region<F: AsRawDescriptor>(
97     fd: &F,
98     slot: u32,
99     read_only: bool,
100     log_dirty_pages: bool,
101     guest_addr: u64,
102     memory_size: u64,
103     userspace_addr: *mut u8,
104 ) -> Result<()> {
105     let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
106     if log_dirty_pages {
107         flags |= KVM_MEM_LOG_DIRTY_PAGES;
108     }
109     let region = kvm_userspace_memory_region {
110         slot,
111         flags,
112         guest_phys_addr: guest_addr,
113         memory_size,
114         userspace_addr: userspace_addr as u64,
115     };
116 
117     let ret = ioctl_with_ref(fd, KVM_SET_USER_MEMORY_REGION(), &region);
118     if ret == 0 {
119         Ok(())
120     } else {
121         errno_result()
122     }
123 }
124 
125 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
126 /// size.
127 ///
128 /// # Arguments
129 ///
130 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize131 pub fn dirty_log_bitmap_size(size: usize) -> usize {
132     let page_size = pagesize();
133     (((size + page_size - 1) / page_size) + 7) / 8
134 }
135 
136 /// A wrapper around opening and using `/dev/kvm`.
137 ///
138 /// Useful for querying extensions and basic values from the KVM backend. A `Kvm` is required to
139 /// create a `Vm` object.
140 pub struct Kvm {
141     kvm: File,
142 }
143 
144 impl Kvm {
145     /// Opens `/dev/kvm/` and returns a Kvm object on success.
new() -> Result<Kvm>146     pub fn new() -> Result<Kvm> {
147         Kvm::new_with_path(&PathBuf::from("/dev/kvm"))
148     }
149 
150     /// Opens a KVM device at `device_path` and returns a Kvm object on success.
new_with_path(device_path: &Path) -> Result<Kvm>151     pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
152         // Open calls are safe because we give a nul-terminated string and verify the result.
153         let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
154         let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
155         if ret < 0 {
156             return errno_result();
157         }
158         // Safe because we verify that ret is valid and we own the fd.
159         Ok(Kvm {
160             kvm: unsafe { File::from_raw_descriptor(ret) },
161         })
162     }
163 
check_extension_int(&self, c: Cap) -> i32164     fn check_extension_int(&self, c: Cap) -> i32 {
165         // Safe because we know that our file is a KVM fd and that the extension is one of the ones
166         // defined by kernel.
167         unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) }
168     }
169 
170     /// Checks if a particular `Cap` is available.
check_extension(&self, c: Cap) -> bool171     pub fn check_extension(&self, c: Cap) -> bool {
172         self.check_extension_int(c) == 1
173     }
174 
175     /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>176     pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
177         // Safe because we know that our file is a KVM fd and we verify the return result.
178         let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) };
179         if res > 0 {
180             Ok(res as usize)
181         } else {
182             errno_result()
183         }
184     }
185 
186     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>187     fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
188         const MAX_KVM_CPUID_ENTRIES: usize = 256;
189         let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
190 
191         let ret = unsafe {
192             // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
193             // allocated for the struct. The limit is read from nent, which is set to the allocated
194             // size(MAX_KVM_CPUID_ENTRIES) above.
195             ioctl_with_mut_ptr(self, kind, cpuid.as_mut_ptr())
196         };
197         if ret < 0 {
198             return errno_result();
199         }
200 
201         Ok(cpuid)
202     }
203 
204     /// X86 specific call to get the system supported CPUID values
205     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_supported_cpuid(&self) -> Result<CpuId>206     pub fn get_supported_cpuid(&self) -> Result<CpuId> {
207         self.get_cpuid(KVM_GET_SUPPORTED_CPUID())
208     }
209 
210     /// X86 specific call to get the system emulated CPUID values
211     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_emulated_cpuid(&self) -> Result<CpuId>212     pub fn get_emulated_cpuid(&self) -> Result<CpuId> {
213         self.get_cpuid(KVM_GET_EMULATED_CPUID())
214     }
215 
216     /// X86 specific call to get list of supported MSRS
217     ///
218     /// See the documentation for KVM_GET_MSR_INDEX_LIST.
219     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_msr_index_list(&self) -> Result<Vec<u32>>220     pub fn get_msr_index_list(&self) -> Result<Vec<u32>> {
221         const MAX_KVM_MSR_ENTRIES: usize = 256;
222 
223         let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
224         msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
225 
226         let ret = unsafe {
227             // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
228             // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
229             // size (MAX_KVM_MSR_ENTRIES) above.
230             ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST(), &mut msr_list[0])
231         };
232         if ret < 0 {
233             return errno_result();
234         }
235 
236         let mut nmsrs = msr_list[0].nmsrs;
237 
238         // Mapping the unsized array to a slice is unsafe because the length isn't known.  Using
239         // the length we originally allocated with eliminates the possibility of overflow.
240         let indices: &[u32] = unsafe {
241             if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
242                 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
243             }
244             msr_list[0].indices.as_slice(nmsrs as usize)
245         };
246 
247         Ok(indices.to_vec())
248     }
249 
250     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
251     // The x86 machine type is always 0
get_vm_type(&self) -> c_ulong252     pub fn get_vm_type(&self) -> c_ulong {
253         0
254     }
255 
256     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
257     // Compute the machine type, which should be the IPA range for the VM
258     // Ideally, this would take a description of the memory map and return
259     // the closest machine type for this VM. Here, we just return the maximum
260     // the kernel support.
261     #[allow(clippy::useless_conversion)]
get_vm_type(&self) -> c_ulong262     pub fn get_vm_type(&self) -> c_ulong {
263         // Safe because we know self is a real kvm fd
264         match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), KVM_CAP_ARM_VM_IPA_SIZE.into()) }
265         {
266             // Not supported? Use 0 as the machine type, which implies 40bit IPA
267             ret if ret < 0 => 0,
268             // Use the lower 8 bits representing the IPA space as the machine type
269             ipa => (ipa & 0xff) as c_ulong,
270         }
271     }
272 }
273 
274 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor275     fn as_raw_descriptor(&self) -> RawDescriptor {
276         self.kvm.as_raw_descriptor()
277     }
278 }
279 
280 /// An address either in programmable I/O space or in memory mapped I/O space.
281 #[derive(Copy, Clone, Debug)]
282 pub enum IoeventAddress {
283     Pio(u64),
284     Mmio(u64),
285 }
286 
287 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
288 pub enum Datamatch {
289     AnyLength,
290     U8(Option<u8>),
291     U16(Option<u16>),
292     U32(Option<u32>),
293     U64(Option<u64>),
294 }
295 
296 /// A source of IRQs in an `IrqRoute`.
297 pub enum IrqSource {
298     Irqchip { chip: u32, pin: u32 },
299     Msi { address: u64, data: u32 },
300 }
301 
302 /// A single route for an IRQ.
303 pub struct IrqRoute {
304     pub gsi: u32,
305     pub source: IrqSource,
306 }
307 
308 /// Interrupt controller IDs
309 pub enum PicId {
310     Primary = 0,
311     Secondary = 1,
312 }
313 
314 /// Number of pins on the IOAPIC.
315 pub const NUM_IOAPIC_PINS: usize = 24;
316 
317 // Used to invert the order when stored in a max-heap.
318 #[derive(Copy, Clone, Eq, PartialEq)]
319 struct MemSlot(u32);
320 
321 impl Ord for MemSlot {
cmp(&self, other: &MemSlot) -> Ordering322     fn cmp(&self, other: &MemSlot) -> Ordering {
323         // Notice the order is inverted so the lowest magnitude slot has the highest priority in a
324         // max-heap.
325         other.0.cmp(&self.0)
326     }
327 }
328 
329 impl PartialOrd for MemSlot {
partial_cmp(&self, other: &MemSlot) -> Option<Ordering>330     fn partial_cmp(&self, other: &MemSlot) -> Option<Ordering> {
331         Some(self.cmp(other))
332     }
333 }
334 
335 /// A wrapper around creating and using a VM.
336 pub struct Vm {
337     vm: File,
338     guest_mem: GuestMemory,
339     mem_regions: Arc<Mutex<BTreeMap<u32, Box<dyn MappedRegion>>>>,
340     mem_slot_gaps: Arc<Mutex<BinaryHeap<MemSlot>>>,
341 }
342 
343 impl Vm {
344     /// Constructs a new `Vm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm>345     pub fn new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm> {
346         // Safe because we know kvm is a real kvm fd as this module is the only one that can make
347         // Kvm objects.
348         let ret = unsafe { ioctl_with_val(kvm, KVM_CREATE_VM(), kvm.get_vm_type()) };
349         if ret >= 0 {
350             // Safe because we verify the value of ret and we are the owners of the fd.
351             let vm_file = unsafe { File::from_raw_descriptor(ret) };
352             guest_mem.with_regions(
353                 |MemoryRegionInformation {
354                      index,
355                      guest_addr,
356                      size,
357                      host_addr,
358                      ..
359                  }| {
360                     unsafe {
361                         // Safe because the guest regions are guaranteed not to overlap.
362                         set_user_memory_region(
363                             &vm_file,
364                             index as u32,
365                             false,
366                             false,
367                             guest_addr.offset() as u64,
368                             size as u64,
369                             host_addr as *mut u8,
370                         )
371                     }
372                 },
373             )?;
374 
375             Ok(Vm {
376                 vm: vm_file,
377                 guest_mem,
378                 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
379                 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
380             })
381         } else {
382             errno_result()
383         }
384     }
385 
386     /// Checks if a particular `Cap` is available.
387     ///
388     /// This is distinct from the `Kvm` version of this method because the some extensions depend on
389     /// the particular `Vm` existence. This method is encouraged by the kernel because it more
390     /// accurately reflects the usable capabilities.
check_extension(&self, c: Cap) -> bool391     pub fn check_extension(&self, c: Cap) -> bool {
392         // Safe because we know that our file is a KVM fd and that the extension is one of the ones
393         // defined by kernel.
394         unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) == 1 }
395     }
396 
397     /// Inserts the given `mem` into the VM's address space at `guest_addr`.
398     ///
399     /// The slot that was assigned the kvm memory mapping is returned on success. The slot can be
400     /// given to `Vm::remove_memory_region` to remove the memory from the VM's address space and
401     /// take back ownership of `mem`.
402     ///
403     /// Note that memory inserted into the VM's address space must not overlap with any other memory
404     /// slot's region.
405     ///
406     /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
407     /// write will trigger a mmio VM exit, leaving the memory untouched.
408     ///
409     /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
410     /// by the guest with `get_dirty_log`.
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<u32>411     pub fn add_memory_region(
412         &mut self,
413         guest_addr: GuestAddress,
414         mem: Box<dyn MappedRegion>,
415         read_only: bool,
416         log_dirty_pages: bool,
417     ) -> Result<u32> {
418         let size = mem.size() as u64;
419         let end_addr = guest_addr
420             .checked_add(size)
421             .ok_or_else(|| Error::new(EOVERFLOW))?;
422         if self.guest_mem.range_overlap(guest_addr, end_addr) {
423             return Err(Error::new(ENOSPC));
424         }
425         let mut regions = self.mem_regions.lock();
426         let mut gaps = self.mem_slot_gaps.lock();
427         let slot = match gaps.pop() {
428             Some(gap) => gap.0,
429             None => (regions.len() + self.guest_mem.num_regions() as usize) as u32,
430         };
431 
432         // Safe because we check that the given guest address is valid and has no overlaps. We also
433         // know that the pointer and size are correct because the MemoryMapping interface ensures
434         // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
435         // is removed.
436         let res = unsafe {
437             set_user_memory_region(
438                 &self.vm,
439                 slot,
440                 read_only,
441                 log_dirty_pages,
442                 guest_addr.offset() as u64,
443                 size,
444                 mem.as_ptr(),
445             )
446         };
447 
448         if let Err(e) = res {
449             gaps.push(MemSlot(slot));
450             return Err(e);
451         }
452         regions.insert(slot, mem);
453         Ok(slot)
454     }
455 
456     /// Removes memory that was previously added at the given slot.
457     ///
458     /// Ownership of the host memory mapping associated with the given slot is returned on success.
remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>>459     pub fn remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>> {
460         let mut regions = self.mem_regions.lock();
461         if !regions.contains_key(&slot) {
462             return Err(Error::new(ENOENT));
463         }
464         // Safe because the slot is checked against the list of memory slots.
465         unsafe {
466             set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?;
467         }
468         self.mem_slot_gaps.lock().push(MemSlot(slot));
469         // This remove will always succeed because of the contains_key check above.
470         Ok(regions.remove(&slot).unwrap())
471     }
472 
473     /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
474     /// `slot`.
475     ///
476     /// The size of `dirty_log` must be at least as many bits as there are pages in the memory
477     /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
478     /// be 2 bytes or greater.
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>479     pub fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
480         match self.mem_regions.lock().get(&slot) {
481             Some(mem) => {
482                 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
483                 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
484                     return Err(Error::new(EINVAL));
485                 }
486                 let mut dirty_log_kvm = kvm_dirty_log {
487                     slot,
488                     ..Default::default()
489                 };
490                 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
491                 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid
492                 // (because it's from a slice) and we checked that it will be large enough to hold
493                 // the entire log.
494                 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirty_log_kvm) };
495                 if ret == 0 {
496                     Ok(())
497                 } else {
498                     errno_result()
499                 }
500             }
501             _ => Err(Error::new(ENOENT)),
502         }
503     }
504 
505     /// Gets a reference to the guest memory owned by this VM.
506     ///
507     /// Note that `GuestMemory` does not include any mmio memory that may have been added after
508     /// this VM was constructed.
get_memory(&self) -> &GuestMemory509     pub fn get_memory(&self) -> &GuestMemory {
510         &self.guest_mem
511     }
512 
513     /// Sets the address of a one-page region in the VM's address space.
514     ///
515     /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
516     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>517     pub fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
518         // Safe because we know that our file is a VM fd and we verify the return result.
519         let ret =
520             unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &(addr.offset() as u64)) };
521         if ret == 0 {
522             Ok(())
523         } else {
524             errno_result()
525         }
526     }
527 
528     /// Retrieves the current timestamp of kvmclock as seen by the current guest.
529     ///
530     /// See the documentation on the KVM_GET_CLOCK ioctl.
531     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_clock(&self) -> Result<kvm_clock_data>532     pub fn get_clock(&self) -> Result<kvm_clock_data> {
533         // Safe because we know that our file is a VM fd, we know the kernel will only write
534         // correct amount of memory to our pointer, and we verify the return result.
535         let mut clock_data = unsafe { std::mem::zeroed() };
536         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock_data) };
537         if ret == 0 {
538             Ok(clock_data)
539         } else {
540             errno_result()
541         }
542     }
543 
544     /// Sets the current timestamp of kvmclock to the specified value.
545     ///
546     /// See the documentation on the KVM_SET_CLOCK ioctl.
547     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_clock(&self, clock_data: &kvm_clock_data) -> Result<()>548     pub fn set_clock(&self, clock_data: &kvm_clock_data) -> Result<()> {
549         // Safe because we know that our file is a VM fd, we know the kernel will only read
550         // correct amount of memory from our pointer, and we verify the return result.
551         let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), clock_data) };
552         if ret == 0 {
553             Ok(())
554         } else {
555             errno_result()
556         }
557     }
558 
559     /// Crates an in kernel interrupt controller.
560     ///
561     /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
562     #[cfg(any(
563         target_arch = "x86",
564         target_arch = "x86_64",
565         target_arch = "arm",
566         target_arch = "aarch64"
567     ))]
create_irq_chip(&self) -> Result<()>568     pub fn create_irq_chip(&self) -> Result<()> {
569         // Safe because we know that our file is a VM fd and we verify the return result.
570         let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) };
571         if ret == 0 {
572             Ok(())
573         } else {
574             errno_result()
575         }
576     }
577 
578     /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
579     ///
580     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
581     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_pic_state(&self, id: PicId) -> Result<kvm_pic_state>582     pub fn get_pic_state(&self, id: PicId) -> Result<kvm_pic_state> {
583         let mut irqchip_state = kvm_irqchip {
584             chip_id: id as u32,
585             ..Default::default()
586         };
587         let ret = unsafe {
588             // Safe because we know our file is a VM fd, we know the kernel will only write
589             // correct amount of memory to our pointer, and we verify the return result.
590             ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
591         };
592         if ret == 0 {
593             Ok(unsafe {
594                 // Safe as we know that we are retrieving data related to the
595                 // PIC (primary or secondary) and not IOAPIC.
596                 irqchip_state.chip.pic
597             })
598         } else {
599             errno_result()
600         }
601     }
602 
603     /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
604     ///
605     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
606     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()>607     pub fn set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()> {
608         let mut irqchip_state = kvm_irqchip {
609             chip_id: id as u32,
610             ..Default::default()
611         };
612         irqchip_state.chip.pic = *state;
613         // Safe because we know that our file is a VM fd, we know the kernel will only read
614         // correct amount of memory from our pointer, and we verify the return result.
615         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
616         if ret == 0 {
617             Ok(())
618         } else {
619             errno_result()
620         }
621     }
622 
623     /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
624     ///
625     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
626     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_ioapic_state(&self) -> Result<kvm_ioapic_state>627     pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
628         let mut irqchip_state = kvm_irqchip {
629             chip_id: 2,
630             ..Default::default()
631         };
632         let ret = unsafe {
633             // Safe because we know our file is a VM fd, we know the kernel will only write
634             // correct amount of memory to our pointer, and we verify the return result.
635             ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
636         };
637         if ret == 0 {
638             Ok(unsafe {
639                 // Safe as we know that we are retrieving data related to the
640                 // IOAPIC and not PIC.
641                 irqchip_state.chip.ioapic
642             })
643         } else {
644             errno_result()
645         }
646     }
647 
648     /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
649     ///
650     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
651     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>652     pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
653         let mut irqchip_state = kvm_irqchip {
654             chip_id: 2,
655             ..Default::default()
656         };
657         irqchip_state.chip.ioapic = *state;
658         // Safe because we know that our file is a VM fd, we know the kernel will only read
659         // correct amount of memory from our pointer, and we verify the return result.
660         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
661         if ret == 0 {
662             Ok(())
663         } else {
664             errno_result()
665         }
666     }
667 
668     /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
669     #[cfg(any(
670         target_arch = "x86",
671         target_arch = "x86_64",
672         target_arch = "arm",
673         target_arch = "aarch64"
674     ))]
set_irq_line(&self, irq: u32, active: bool) -> Result<()>675     pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
676         let mut irq_level = kvm_irq_level::default();
677         irq_level.__bindgen_anon_1.irq = irq;
678         irq_level.level = active.into();
679 
680         // Safe because we know that our file is a VM fd, we know the kernel will only read the
681         // correct amount of memory from our pointer, and we verify the return result.
682         let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) };
683         if ret == 0 {
684             Ok(())
685         } else {
686             errno_result()
687         }
688     }
689 
690     /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
691     ///
692     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
693     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
create_pit(&self) -> Result<()>694     pub fn create_pit(&self) -> Result<()> {
695         let pit_config = kvm_pit_config::default();
696         // Safe because we know that our file is a VM fd, we know the kernel will only read the
697         // correct amount of memory from our pointer, and we verify the return result.
698         let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) };
699         if ret == 0 {
700             Ok(())
701         } else {
702             errno_result()
703         }
704     }
705 
706     /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
707     ///
708     /// Note that this call can only succeed after a call to `Vm::create_pit`.
709     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_pit_state(&self) -> Result<kvm_pit_state2>710     pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
711         // Safe because we know that our file is a VM fd, we know the kernel will only write
712         // correct amount of memory to our pointer, and we verify the return result.
713         let mut pit_state = unsafe { std::mem::zeroed() };
714         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pit_state) };
715         if ret == 0 {
716             Ok(pit_state)
717         } else {
718             errno_result()
719         }
720     }
721 
722     /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
723     ///
724     /// Note that this call can only succeed after a call to `Vm::create_pit`.
725     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>726     pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
727         // Safe because we know that our file is a VM fd, we know the kernel will only read
728         // correct amount of memory from our pointer, and we verify the return result.
729         let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pit_state) };
730         if ret == 0 {
731             Ok(())
732         } else {
733             errno_result()
734         }
735     }
736 
737     /// Registers an event to be signaled whenever a certain address is written to.
738     ///
739     /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
740     /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
741     /// and must match the expected size of the guest's write.
742     ///
743     /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
744     /// triggered is prevented.
register_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>745     pub fn register_ioevent(
746         &self,
747         evt: &Event,
748         addr: IoeventAddress,
749         datamatch: Datamatch,
750     ) -> Result<()> {
751         self.ioeventfd(evt, addr, datamatch, false)
752     }
753 
754     /// Unregisters an event previously registered with `register_ioevent`.
755     ///
756     /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
757     /// `register_ioevent`.
unregister_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>758     pub fn unregister_ioevent(
759         &self,
760         evt: &Event,
761         addr: IoeventAddress,
762         datamatch: Datamatch,
763     ) -> Result<()> {
764         self.ioeventfd(evt, addr, datamatch, true)
765     }
766 
ioeventfd( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>767     fn ioeventfd(
768         &self,
769         evt: &Event,
770         addr: IoeventAddress,
771         datamatch: Datamatch,
772         deassign: bool,
773     ) -> Result<()> {
774         let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
775             Datamatch::AnyLength => (false, 0, 0),
776             Datamatch::U8(v) => match v {
777                 Some(u) => (true, u as u64, 1),
778                 None => (false, 0, 1),
779             },
780             Datamatch::U16(v) => match v {
781                 Some(u) => (true, u as u64, 2),
782                 None => (false, 0, 2),
783             },
784             Datamatch::U32(v) => match v {
785                 Some(u) => (true, u as u64, 4),
786                 None => (false, 0, 4),
787             },
788             Datamatch::U64(v) => match v {
789                 Some(u) => (true, u as u64, 8),
790                 None => (false, 0, 8),
791             },
792         };
793         let mut flags = 0;
794         if deassign {
795             flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
796         }
797         if do_datamatch {
798             flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
799         }
800         if let IoeventAddress::Pio(_) = addr {
801             flags |= 1 << kvm_ioeventfd_flag_nr_pio;
802         }
803         let ioeventfd = kvm_ioeventfd {
804             datamatch: datamatch_value,
805             len: datamatch_len,
806             addr: match addr {
807                 IoeventAddress::Pio(p) => p as u64,
808                 IoeventAddress::Mmio(m) => m,
809             },
810             fd: evt.as_raw_descriptor(),
811             flags,
812             ..Default::default()
813         };
814         // Safe because we know that our file is a VM fd, we know the kernel will only read the
815         // correct amount of memory from our pointer, and we verify the return result.
816         let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) };
817         if ret == 0 {
818             Ok(())
819         } else {
820             errno_result()
821         }
822     }
823 
824     /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt` will
825     /// get triggered when the irqchip is resampled.
826     #[cfg(any(
827         target_arch = "x86",
828         target_arch = "x86_64",
829         target_arch = "arm",
830         target_arch = "aarch64"
831     ))]
register_irqfd_resample( &self, evt: &Event, resample_evt: &Event, gsi: u32, ) -> Result<()>832     pub fn register_irqfd_resample(
833         &self,
834         evt: &Event,
835         resample_evt: &Event,
836         gsi: u32,
837     ) -> Result<()> {
838         let irqfd = kvm_irqfd {
839             flags: KVM_IRQFD_FLAG_RESAMPLE,
840             fd: evt.as_raw_descriptor() as u32,
841             resamplefd: resample_evt.as_raw_descriptor() as u32,
842             gsi,
843             ..Default::default()
844         };
845         // Safe because we know that our file is a VM fd, we know the kernel will only read the
846         // correct amount of memory from our pointer, and we verify the return result.
847         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
848         if ret == 0 {
849             Ok(())
850         } else {
851             errno_result()
852         }
853     }
854 
855     /// Unregisters an event that was previously registered with
856     /// `register_irqfd`/`register_irqfd_resample`.
857     ///
858     /// The `evt` and `gsi` pair must be the same as the ones passed into
859     /// `register_irqfd`/`register_irqfd_resample`.
860     #[cfg(any(
861         target_arch = "x86",
862         target_arch = "x86_64",
863         target_arch = "arm",
864         target_arch = "aarch64"
865     ))]
unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()>866     pub fn unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()> {
867         let irqfd = kvm_irqfd {
868             fd: evt.as_raw_descriptor() as u32,
869             gsi,
870             flags: KVM_IRQFD_FLAG_DEASSIGN,
871             ..Default::default()
872         };
873         // Safe because we know that our file is a VM fd, we know the kernel will only read the
874         // correct amount of memory from our pointer, and we verify the return result.
875         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
876         if ret == 0 {
877             Ok(())
878         } else {
879             errno_result()
880         }
881     }
882 
883     /// Sets the GSI routing table, replacing any table set with previous calls to
884     /// `set_gsi_routing`.
885     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>886     pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
887         let mut irq_routing =
888             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
889         irq_routing[0].nr = routes.len() as u32;
890 
891         // Safe because we ensured there is enough space in irq_routing to hold the number of
892         // route entries.
893         let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
894         for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
895             irq_route.gsi = route.gsi;
896             match route.source {
897                 IrqSource::Irqchip { chip, pin } => {
898                     irq_route.type_ = KVM_IRQ_ROUTING_IRQCHIP;
899                     irq_route.u.irqchip = kvm_irq_routing_irqchip { irqchip: chip, pin }
900                 }
901                 IrqSource::Msi { address, data } => {
902                     irq_route.type_ = KVM_IRQ_ROUTING_MSI;
903                     irq_route.u.msi = kvm_irq_routing_msi {
904                         address_lo: address as u32,
905                         address_hi: (address >> 32) as u32,
906                         data,
907                         ..Default::default()
908                     }
909                 }
910             }
911         }
912 
913         let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), &irq_routing[0]) };
914         if ret == 0 {
915             Ok(())
916         } else {
917             errno_result()
918         }
919     }
920 
921     /// Enable the specified capability.
922     /// See documentation for KVM_ENABLE_CAP.
923     /// # Safety
924     /// This function is marked as unsafe because `cap` may contain values which are interpreted as
925     /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>926     pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
927         // Safe because we allocated the struct and we know the kernel will read exactly the size of
928         // the struct.
929         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
930         if ret < 0 {
931             errno_result()
932         } else {
933             Ok(())
934         }
935     }
936 }
937 
938 impl AsRawDescriptor for Vm {
as_raw_descriptor(&self) -> RawDescriptor939     fn as_raw_descriptor(&self) -> RawDescriptor {
940         self.vm.as_raw_descriptor()
941     }
942 }
943 
944 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
945 #[derive(Debug)]
946 pub enum VcpuExit {
947     /// An out port instruction was run on the given port with the given data.
948     IoOut {
949         port: u16,
950         size: usize,
951         data: [u8; 8],
952     },
953     /// An in port instruction was run on the given port.
954     ///
955     /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
956     /// called again.
957     IoIn {
958         port: u16,
959         size: usize,
960     },
961     /// A read instruction was run against the given MMIO address.
962     ///
963     /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
964     /// called again.
965     MmioRead {
966         address: u64,
967         size: usize,
968     },
969     /// A write instruction was run against the given MMIO address with the given data.
970     MmioWrite {
971         address: u64,
972         size: usize,
973         data: [u8; 8],
974     },
975     IoapicEoi {
976         vector: u8,
977     },
978     HypervSynic {
979         msr: u32,
980         control: u64,
981         evt_page: u64,
982         msg_page: u64,
983     },
984     HypervHcall {
985         input: u64,
986         params: [u64; 2],
987     },
988     Unknown,
989     Exception,
990     Hypercall,
991     Debug,
992     Hlt,
993     IrqWindowOpen,
994     Shutdown,
995     FailEntry {
996         hardware_entry_failure_reason: u64,
997     },
998     Intr,
999     SetTpr,
1000     TprAccess,
1001     S390Sieic,
1002     S390Reset,
1003     Dcr,
1004     Nmi,
1005     InternalError,
1006     Osi,
1007     PaprHcall,
1008     S390Ucontrol,
1009     Watchdog,
1010     S390Tsch,
1011     Epr,
1012     /// The cpu triggered a system level event which is specified by the type field.
1013     /// The first field is the event type and the second field is flags.
1014     /// The possible event types are shutdown, reset, or crash.  So far there
1015     /// are not any flags defined.
1016     SystemEvent(u32 /* event_type */, u64 /* flags */),
1017 }
1018 
1019 /// A wrapper around creating and using a VCPU.
1020 /// `Vcpu` provides all functionality except for running. To run, `to_runnable` must be called to
1021 /// lock the vcpu to a thread. Then the returned `RunnableVcpu` can be used for running.
1022 pub struct Vcpu {
1023     vcpu: File,
1024     run_mmap: MemoryMapping,
1025 }
1026 
1027 pub struct VcpuThread {
1028     run: *mut kvm_run,
1029     signal_num: Option<c_int>,
1030 }
1031 
1032 thread_local!(static VCPU_THREAD: RefCell<Option<VcpuThread>> = RefCell::new(None));
1033 
1034 impl Vcpu {
1035     /// Constructs a new VCPU for `vm`.
1036     ///
1037     /// The `id` argument is the CPU number between [0, max vcpus).
new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu>1038     pub fn new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu> {
1039         let run_mmap_size = kvm.get_vcpu_mmap_size()?;
1040 
1041         // Safe because we know that vm a VM fd and we verify the return result.
1042         let vcpu_fd = unsafe { ioctl_with_val(vm, KVM_CREATE_VCPU(), id) };
1043         if vcpu_fd < 0 {
1044             return errno_result();
1045         }
1046 
1047         // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
1048         // the value of the fd and we own the fd.
1049         let vcpu = unsafe { File::from_raw_descriptor(vcpu_fd) };
1050 
1051         let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
1052             .from_file(&vcpu)
1053             .build()
1054             .map_err(|_| Error::new(ENOSPC))?;
1055 
1056         Ok(Vcpu { vcpu, run_mmap })
1057     }
1058 
1059     /// Consumes `self` and returns a `RunnableVcpu`. A `RunnableVcpu` is required to run the
1060     /// guest.
1061     /// Assigns a vcpu to the current thread and stores it in a hash map that can be used by signal
1062     /// handlers to call set_local_immediate_exit(). An optional signal number will be temporarily
1063     /// blocked while assigning the vcpu to the thread and later blocked when `RunnableVcpu` is
1064     /// destroyed.
1065     ///
1066     /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
1067     #[allow(clippy::cast_ptr_alignment)]
to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu>1068     pub fn to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu> {
1069         // Block signal while we add -- if a signal fires (very unlikely,
1070         // as this means something is trying to pause the vcpu before it has
1071         // even started) it'll try to grab the read lock while this write
1072         // lock is grabbed and cause a deadlock.
1073         // Assuming that a failure to block means it's already blocked.
1074         let _blocked_signal = signal_num.map(BlockedSignal::new);
1075 
1076         VCPU_THREAD.with(|v| {
1077             if v.borrow().is_none() {
1078                 *v.borrow_mut() = Some(VcpuThread {
1079                     run: self.run_mmap.as_ptr() as *mut kvm_run,
1080                     signal_num,
1081                 });
1082                 Ok(())
1083             } else {
1084                 Err(Error::new(EBUSY))
1085             }
1086         })?;
1087 
1088         Ok(RunnableVcpu {
1089             vcpu: self,
1090             phantom: Default::default(),
1091         })
1092     }
1093 
1094     /// Sets the data received by a mmio read, ioport in, or hypercall instruction.
1095     ///
1096     /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
1097     /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
1098     #[allow(clippy::cast_ptr_alignment)]
set_data(&self, data: &[u8]) -> Result<()>1099     pub fn set_data(&self, data: &[u8]) -> Result<()> {
1100         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1101         // kernel told us how large it was. The pointer is page aligned so casting to a different
1102         // type is well defined, hence the clippy allow attribute.
1103         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1104         match run.exit_reason {
1105             KVM_EXIT_IO => {
1106                 let run_start = run as *mut kvm_run as *mut u8;
1107                 // Safe because the exit_reason (which comes from the kernel) told us which
1108                 // union field to use.
1109                 let io = unsafe { run.__bindgen_anon_1.io };
1110                 if io.direction as u32 != KVM_EXIT_IO_IN {
1111                     return Err(Error::new(EINVAL));
1112                 }
1113                 let data_size = (io.count as usize) * (io.size as usize);
1114                 if data_size != data.len() {
1115                     return Err(Error::new(EINVAL));
1116                 }
1117                 // The data_offset is defined by the kernel to be some number of bytes into the
1118                 // kvm_run structure, which we have fully mmap'd.
1119                 unsafe {
1120                     let data_ptr = run_start.offset(io.data_offset as isize);
1121                     copy_nonoverlapping(data.as_ptr(), data_ptr, data_size);
1122                 }
1123                 Ok(())
1124             }
1125             KVM_EXIT_MMIO => {
1126                 // Safe because the exit_reason (which comes from the kernel) told us which
1127                 // union field to use.
1128                 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1129                 if mmio.is_write != 0 {
1130                     return Err(Error::new(EINVAL));
1131                 }
1132                 let len = mmio.len as usize;
1133                 if len != data.len() {
1134                     return Err(Error::new(EINVAL));
1135                 }
1136                 mmio.data[..len].copy_from_slice(data);
1137                 Ok(())
1138             }
1139             KVM_EXIT_HYPERV => {
1140                 // Safe because the exit_reason (which comes from the kernel) told us which
1141                 // union field to use.
1142                 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1143                 if hyperv.type_ != KVM_EXIT_HYPERV_HCALL {
1144                     return Err(Error::new(EINVAL));
1145                 }
1146                 let hcall = unsafe { &mut hyperv.u.hcall };
1147                 match data.try_into() {
1148                     Ok(data) => {
1149                         hcall.result = u64::from_ne_bytes(data);
1150                     }
1151                     _ => return Err(Error::new(EINVAL)),
1152                 }
1153                 Ok(())
1154             }
1155             _ => Err(Error::new(EINVAL)),
1156         }
1157     }
1158 
1159     /// Sets the bit that requests an immediate exit.
1160     #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)1161     pub fn set_immediate_exit(&self, exit: bool) {
1162         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1163         // kernel told us how large it was. The pointer is page aligned so casting to a different
1164         // type is well defined, hence the clippy allow attribute.
1165         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1166         run.immediate_exit = exit.into();
1167     }
1168 
1169     /// Sets/clears the bit for immediate exit for the vcpu on the current thread.
set_local_immediate_exit(exit: bool)1170     pub fn set_local_immediate_exit(exit: bool) {
1171         VCPU_THREAD.with(|v| {
1172             if let Some(state) = &(*v.borrow()) {
1173                 unsafe {
1174                     (*state.run).immediate_exit = exit.into();
1175                 };
1176             }
1177         });
1178     }
1179 
1180     /// Gets the VCPU registers.
1181     #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
get_regs(&self) -> Result<kvm_regs>1182     pub fn get_regs(&self) -> Result<kvm_regs> {
1183         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1184         // correct amount of memory from our pointer, and we verify the return result.
1185         let mut regs = unsafe { std::mem::zeroed() };
1186         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
1187         if ret != 0 {
1188             return errno_result();
1189         }
1190         Ok(regs)
1191     }
1192 
1193     /// Sets the VCPU registers.
1194     #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
set_regs(&self, regs: &kvm_regs) -> Result<()>1195     pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
1196         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1197         // correct amount of memory from our pointer, and we verify the return result.
1198         let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) };
1199         if ret != 0 {
1200             return errno_result();
1201         }
1202         Ok(())
1203     }
1204 
1205     /// Gets the VCPU special registers.
1206     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_sregs(&self) -> Result<kvm_sregs>1207     pub fn get_sregs(&self) -> Result<kvm_sregs> {
1208         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1209         // correct amount of memory to our pointer, and we verify the return result.
1210         let mut regs = unsafe { std::mem::zeroed() };
1211         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
1212         if ret != 0 {
1213             return errno_result();
1214         }
1215         Ok(regs)
1216     }
1217 
1218     /// Sets the VCPU special registers.
1219     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_sregs(&self, sregs: &kvm_sregs) -> Result<()>1220     pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
1221         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1222         // correct amount of memory from our pointer, and we verify the return result.
1223         let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) };
1224         if ret != 0 {
1225             return errno_result();
1226         }
1227         Ok(())
1228     }
1229 
1230     /// Gets the VCPU FPU registers.
1231     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_fpu(&self) -> Result<kvm_fpu>1232     pub fn get_fpu(&self) -> Result<kvm_fpu> {
1233         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1234         // correct amount of memory to our pointer, and we verify the return result.
1235         let mut regs = unsafe { std::mem::zeroed() };
1236         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut regs) };
1237         if ret != 0 {
1238             return errno_result();
1239         }
1240         Ok(regs)
1241     }
1242 
1243     /// X86 specific call to setup the FPU
1244     ///
1245     /// See the documentation for KVM_SET_FPU.
1246     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_fpu(&self, fpu: &kvm_fpu) -> Result<()>1247     pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
1248         let ret = unsafe {
1249             // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1250             ioctl_with_ref(self, KVM_SET_FPU(), fpu)
1251         };
1252         if ret < 0 {
1253             return errno_result();
1254         }
1255         Ok(())
1256     }
1257 
1258     /// Gets the VCPU debug registers.
1259     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_debugregs(&self) -> Result<kvm_debugregs>1260     pub fn get_debugregs(&self) -> Result<kvm_debugregs> {
1261         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1262         // correct amount of memory to our pointer, and we verify the return result.
1263         let mut regs = unsafe { std::mem::zeroed() };
1264         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut regs) };
1265         if ret != 0 {
1266             return errno_result();
1267         }
1268         Ok(regs)
1269     }
1270 
1271     /// Sets the VCPU debug registers
1272     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()>1273     pub fn set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()> {
1274         let ret = unsafe {
1275             // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1276             ioctl_with_ref(self, KVM_SET_DEBUGREGS(), dregs)
1277         };
1278         if ret < 0 {
1279             return errno_result();
1280         }
1281         Ok(())
1282     }
1283 
1284     /// Gets the VCPU extended control registers
1285     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_xcrs(&self) -> Result<kvm_xcrs>1286     pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1287         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1288         // correct amount of memory to our pointer, and we verify the return result.
1289         let mut regs = unsafe { std::mem::zeroed() };
1290         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut regs) };
1291         if ret != 0 {
1292             return errno_result();
1293         }
1294         Ok(regs)
1295     }
1296 
1297     /// Sets the VCPU extended control registers
1298     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()>1299     pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1300         let ret = unsafe {
1301             // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1302             ioctl_with_ref(self, KVM_SET_XCRS(), xcrs)
1303         };
1304         if ret < 0 {
1305             return errno_result();
1306         }
1307         Ok(())
1308     }
1309 
1310     /// X86 specific call to get the MSRS
1311     ///
1312     /// See the documentation for KVM_SET_MSRS.
1313     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()>1314     pub fn get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()> {
1315         let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(msr_entries.len());
1316         unsafe {
1317             // Mapping the unsized array to a slice is unsafe because the length isn't known.
1318             // Providing the length used to create the struct guarantees the entire slice is valid.
1319             let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(msr_entries.len());
1320             entries.copy_from_slice(msr_entries);
1321         }
1322         msrs[0].nmsrs = msr_entries.len() as u32;
1323         let ret = unsafe {
1324             // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
1325             ioctl_with_ref(self, KVM_GET_MSRS(), &msrs[0])
1326         };
1327         if ret < 0 {
1328             // KVM_SET_MSRS actually returns the number of msr entries written.
1329             return errno_result();
1330         }
1331         unsafe {
1332             let count = ret as usize;
1333             assert!(count <= msr_entries.len());
1334             let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(count);
1335             msr_entries.truncate(count);
1336             msr_entries.copy_from_slice(entries);
1337         }
1338         Ok(())
1339     }
1340 
1341     /// X86 specific call to setup the MSRS
1342     ///
1343     /// See the documentation for KVM_SET_MSRS.
1344     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_msrs(&self, msrs: &kvm_msrs) -> Result<()>1345     pub fn set_msrs(&self, msrs: &kvm_msrs) -> Result<()> {
1346         let ret = unsafe {
1347             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1348             ioctl_with_ref(self, KVM_SET_MSRS(), msrs)
1349         };
1350         if ret < 0 {
1351             // KVM_SET_MSRS actually returns the number of msr entries written.
1352             return errno_result();
1353         }
1354         Ok(())
1355     }
1356 
1357     /// X86 specific call to setup the CPUID registers
1358     ///
1359     /// See the documentation for KVM_SET_CPUID2.
1360     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_cpuid2(&self, cpuid: &CpuId) -> Result<()>1361     pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
1362         let ret = unsafe {
1363             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1364             ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_ptr())
1365         };
1366         if ret < 0 {
1367             return errno_result();
1368         }
1369         Ok(())
1370     }
1371 
1372     /// X86 specific call to get the system emulated hyper-v CPUID values
1373     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_hyperv_cpuid(&self) -> Result<CpuId>1374     pub fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1375         const MAX_KVM_CPUID_ENTRIES: usize = 256;
1376         let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
1377 
1378         let ret = unsafe {
1379             // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
1380             // allocated for the struct. The limit is read from nent, which is set to the allocated
1381             // size(MAX_KVM_CPUID_ENTRIES) above.
1382             ioctl_with_mut_ptr(self, KVM_GET_SUPPORTED_HV_CPUID(), cpuid.as_mut_ptr())
1383         };
1384         if ret < 0 {
1385             return errno_result();
1386         }
1387         Ok(cpuid)
1388     }
1389 
1390     /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt Controller".
1391     ///
1392     /// See the documentation for KVM_GET_LAPIC.
1393     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_lapic(&self) -> Result<kvm_lapic_state>1394     pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
1395         let mut klapic: kvm_lapic_state = Default::default();
1396 
1397         let ret = unsafe {
1398             // The ioctl is unsafe unless you trust the kernel not to write past the end of the
1399             // local_apic struct.
1400             ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic)
1401         };
1402         if ret < 0 {
1403             return errno_result();
1404         }
1405         Ok(klapic)
1406     }
1407 
1408     /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt Controller".
1409     ///
1410     /// See the documentation for KVM_SET_LAPIC.
1411     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>1412     pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
1413         let ret = unsafe {
1414             // The ioctl is safe because the kernel will only read from the klapic struct.
1415             ioctl_with_ref(self, KVM_SET_LAPIC(), klapic)
1416         };
1417         if ret < 0 {
1418             return errno_result();
1419         }
1420         Ok(())
1421     }
1422 
1423     /// Gets the vcpu's current "multiprocessing state".
1424     ///
1425     /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1426     /// a call to `Vm::create_irq_chip`.
1427     ///
1428     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1429     /// to run crosvm on s390.
1430     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_mp_state(&self) -> Result<kvm_mp_state>1431     pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1432         // Safe because we know that our file is a VCPU fd, we know the kernel will only
1433         // write correct amount of memory to our pointer, and we verify the return result.
1434         let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1435         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut state) };
1436         if ret < 0 {
1437             return errno_result();
1438         }
1439         Ok(state)
1440     }
1441 
1442     /// Sets the vcpu's current "multiprocessing state".
1443     ///
1444     /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1445     /// a call to `Vm::create_irq_chip`.
1446     ///
1447     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1448     /// to run crosvm on s390.
1449     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1450     pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1451         let ret = unsafe {
1452             // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1453             ioctl_with_ref(self, KVM_SET_MP_STATE(), state)
1454         };
1455         if ret < 0 {
1456             return errno_result();
1457         }
1458         Ok(())
1459     }
1460 
1461     /// Gets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1462     ///
1463     /// See the documentation for KVM_GET_VCPU_EVENTS.
1464     ///
1465     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_vcpu_events(&self) -> Result<kvm_vcpu_events>1466     pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1467         // Safe because we know that our file is a VCPU fd, we know the kernel
1468         // will only write correct amount of memory to our pointer, and we
1469         // verify the return result.
1470         let mut events: kvm_vcpu_events = unsafe { std::mem::zeroed() };
1471         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut events) };
1472         if ret < 0 {
1473             return errno_result();
1474         }
1475         Ok(events)
1476     }
1477 
1478     /// Sets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1479     ///
1480     /// See the documentation for KVM_SET_VCPU_EVENTS.
1481     ///
1482     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()>1483     pub fn set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()> {
1484         let ret = unsafe {
1485             // The ioctl is safe because the kernel will only read from the
1486             // kvm_vcpu_events.
1487             ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), events)
1488         };
1489         if ret < 0 {
1490             return errno_result();
1491         }
1492         Ok(())
1493     }
1494 
1495     /// Enable the specified capability.
1496     /// See documentation for KVM_ENABLE_CAP.
1497     /// # Safety
1498     /// This function is marked as unsafe because `cap` may contain values which are interpreted as
1499     /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>1500     pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
1501         // Safe because we allocated the struct and we know the kernel will read exactly the size of
1502         // the struct.
1503         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
1504         if ret < 0 {
1505             return errno_result();
1506         }
1507         Ok(())
1508     }
1509 
1510     /// Specifies set of signals that are blocked during execution of KVM_RUN.
1511     /// Signals that are not blocked will cause KVM_RUN to return with -EINTR.
1512     ///
1513     /// See the documentation for KVM_SET_SIGNAL_MASK
set_signal_mask(&self, signals: &[c_int]) -> Result<()>1514     pub fn set_signal_mask(&self, signals: &[c_int]) -> Result<()> {
1515         let sigset = signal::create_sigset(signals)?;
1516 
1517         let mut kvm_sigmask = vec_with_array_field::<kvm_signal_mask, sigset_t>(1);
1518         // Rust definition of sigset_t takes 128 bytes, but the kernel only
1519         // expects 8-bytes structure, so we can't write
1520         // kvm_sigmask.len  = size_of::<sigset_t>() as u32;
1521         kvm_sigmask[0].len = 8;
1522         // Ensure the length is not too big.
1523         const _ASSERT: usize = size_of::<sigset_t>() - 8usize;
1524 
1525         // Safe as we allocated exactly the needed space
1526         unsafe {
1527             copy_nonoverlapping(
1528                 &sigset as *const sigset_t as *const u8,
1529                 kvm_sigmask[0].sigset.as_mut_ptr(),
1530                 8,
1531             );
1532         }
1533 
1534         let ret = unsafe {
1535             // The ioctl is safe because the kernel will only read from the
1536             // kvm_signal_mask structure.
1537             ioctl_with_ref(self, KVM_SET_SIGNAL_MASK(), &kvm_sigmask[0])
1538         };
1539         if ret < 0 {
1540             return errno_result();
1541         }
1542         Ok(())
1543     }
1544 
1545     /// Sets the value of one register on this VCPU.  The id of the register is
1546     /// encoded as specified in the kernel documentation for KVM_SET_ONE_REG.
1547     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
set_one_reg(&self, reg_id: u64, data: u64) -> Result<()>1548     pub fn set_one_reg(&self, reg_id: u64, data: u64) -> Result<()> {
1549         let data_ref = &data as *const u64;
1550         let onereg = kvm_one_reg {
1551             id: reg_id,
1552             addr: data_ref as u64,
1553         };
1554         // safe because we allocated the struct and we know the kernel will read
1555         // exactly the size of the struct
1556         let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) };
1557         if ret < 0 {
1558             return errno_result();
1559         }
1560         Ok(())
1561     }
1562 }
1563 
1564 impl AsRawDescriptor for Vcpu {
as_raw_descriptor(&self) -> RawDescriptor1565     fn as_raw_descriptor(&self) -> RawDescriptor {
1566         self.vcpu.as_raw_descriptor()
1567     }
1568 }
1569 
1570 /// A Vcpu that has a thread and can be run. Created by calling `to_runnable` on a `Vcpu`.
1571 /// Implements `Deref` to a `Vcpu` so all `Vcpu` methods are usable, with the addition of the `run`
1572 /// function to execute the guest.
1573 pub struct RunnableVcpu {
1574     vcpu: Vcpu,
1575     // vcpus must stay on the same thread once they start.
1576     // Add the PhantomData pointer to ensure RunnableVcpu is not `Send`.
1577     phantom: std::marker::PhantomData<*mut u8>,
1578 }
1579 
1580 impl RunnableVcpu {
1581     /// Runs the VCPU until it exits, returning the reason for the exit.
1582     ///
1583     /// Note that the state of the VCPU and associated VM must be setup first for this to do
1584     /// anything useful.
1585     #[allow(clippy::cast_ptr_alignment)]
1586     // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1587     // allow attribute.
run(&self) -> Result<VcpuExit>1588     pub fn run(&self) -> Result<VcpuExit> {
1589         // Safe because we know that our file is a VCPU fd and we verify the return result.
1590         let ret = unsafe { ioctl(self, KVM_RUN()) };
1591         if ret == 0 {
1592             // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1593             // kernel told us how large it was.
1594             let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) };
1595             match run.exit_reason {
1596                 KVM_EXIT_IO => {
1597                     // Safe because the exit_reason (which comes from the kernel) told us which
1598                     // union field to use.
1599                     let io = unsafe { run.__bindgen_anon_1.io };
1600                     let port = io.port;
1601                     let size = (io.count as usize) * (io.size as usize);
1602                     match io.direction as u32 {
1603                         KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }),
1604                         KVM_EXIT_IO_OUT => {
1605                             let mut data = [0; 8];
1606                             let run_start = run as *const kvm_run as *const u8;
1607                             // The data_offset is defined by the kernel to be some number of bytes
1608                             // into the kvm_run structure, which we have fully mmap'd.
1609                             unsafe {
1610                                 let data_ptr = run_start.offset(io.data_offset as isize);
1611                                 copy_nonoverlapping(
1612                                     data_ptr,
1613                                     data.as_mut_ptr(),
1614                                     min(size, data.len()),
1615                                 );
1616                             }
1617                             Ok(VcpuExit::IoOut { port, size, data })
1618                         }
1619                         _ => Err(Error::new(EINVAL)),
1620                     }
1621                 }
1622                 KVM_EXIT_MMIO => {
1623                     // Safe because the exit_reason (which comes from the kernel) told us which
1624                     // union field to use.
1625                     let mmio = unsafe { &run.__bindgen_anon_1.mmio };
1626                     let address = mmio.phys_addr;
1627                     let size = min(mmio.len as usize, mmio.data.len());
1628                     if mmio.is_write != 0 {
1629                         Ok(VcpuExit::MmioWrite {
1630                             address,
1631                             size,
1632                             data: mmio.data,
1633                         })
1634                     } else {
1635                         Ok(VcpuExit::MmioRead { address, size })
1636                     }
1637                 }
1638                 KVM_EXIT_IOAPIC_EOI => {
1639                     // Safe because the exit_reason (which comes from the kernel) told us which
1640                     // union field to use.
1641                     let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
1642                     Ok(VcpuExit::IoapicEoi { vector })
1643                 }
1644                 KVM_EXIT_HYPERV => {
1645                     // Safe because the exit_reason (which comes from the kernel) told us which
1646                     // union field to use.
1647                     let hyperv = unsafe { &run.__bindgen_anon_1.hyperv };
1648                     match hyperv.type_ as u32 {
1649                         KVM_EXIT_HYPERV_SYNIC => {
1650                             let synic = unsafe { &hyperv.u.synic };
1651                             Ok(VcpuExit::HypervSynic {
1652                                 msr: synic.msr,
1653                                 control: synic.control,
1654                                 evt_page: synic.evt_page,
1655                                 msg_page: synic.msg_page,
1656                             })
1657                         }
1658                         KVM_EXIT_HYPERV_HCALL => {
1659                             let hcall = unsafe { &hyperv.u.hcall };
1660                             Ok(VcpuExit::HypervHcall {
1661                                 input: hcall.input,
1662                                 params: hcall.params,
1663                             })
1664                         }
1665                         _ => Err(Error::new(EINVAL)),
1666                     }
1667                 }
1668                 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1669                 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1670                 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1671                 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1672                 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1673                 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1674                 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1675                 KVM_EXIT_FAIL_ENTRY => {
1676                     // Safe because the exit_reason (which comes from the kernel) told us which
1677                     // union field to use.
1678                     let hardware_entry_failure_reason = unsafe {
1679                         run.__bindgen_anon_1
1680                             .fail_entry
1681                             .hardware_entry_failure_reason
1682                     };
1683                     Ok(VcpuExit::FailEntry {
1684                         hardware_entry_failure_reason,
1685                     })
1686                 }
1687                 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1688                 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1689                 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1690                 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1691                 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1692                 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1693                 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1694                 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1695                 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1696                 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1697                 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1698                 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1699                 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1700                 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1701                 KVM_EXIT_SYSTEM_EVENT => {
1702                     // Safe because we know the exit reason told us this union
1703                     // field is valid
1704                     let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1705                     let event_flags =
1706                         unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1707                     Ok(VcpuExit::SystemEvent(event_type, event_flags))
1708                 }
1709                 r => panic!("unknown kvm exit reason: {}", r),
1710             }
1711         } else {
1712             errno_result()
1713         }
1714     }
1715 }
1716 
1717 impl Deref for RunnableVcpu {
1718     type Target = Vcpu;
deref(&self) -> &Self::Target1719     fn deref(&self) -> &Self::Target {
1720         &self.vcpu
1721     }
1722 }
1723 
1724 impl DerefMut for RunnableVcpu {
deref_mut(&mut self) -> &mut Self::Target1725     fn deref_mut(&mut self) -> &mut Self::Target {
1726         &mut self.vcpu
1727     }
1728 }
1729 
1730 impl AsRawDescriptor for RunnableVcpu {
as_raw_descriptor(&self) -> RawDescriptor1731     fn as_raw_descriptor(&self) -> RawDescriptor {
1732         self.vcpu.as_raw_descriptor()
1733     }
1734 }
1735 
1736 impl Drop for RunnableVcpu {
drop(&mut self)1737     fn drop(&mut self) {
1738         VCPU_THREAD.with(|v| {
1739             // This assumes that a failure in `BlockedSignal::new` means the signal is already
1740             // blocked and there it should not be unblocked on exit.
1741             let _blocked_signal = &(*v.borrow())
1742                 .as_ref()
1743                 .and_then(|state| state.signal_num)
1744                 .map(BlockedSignal::new);
1745 
1746             *v.borrow_mut() = None;
1747         });
1748     }
1749 }
1750 
1751 /// Wrapper for kvm_cpuid2 which has a zero length array at the end.
1752 /// Hides the zero length array behind a bounds check.
1753 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1754 pub type CpuId = FlexibleArrayWrapper<kvm_cpuid2, kvm_cpuid_entry2>;
1755