• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! A safe wrapper around the kernel's KVM interface.
6 //!
7 //! New code should use the `hypervisor` crate instead.
8 
9 #![cfg(any(target_os = "android", target_os = "linux"))]
10 
11 mod cap;
12 
13 use std::cell::RefCell;
14 use std::cmp::min;
15 use std::cmp::Ordering;
16 use std::collections::BTreeMap;
17 use std::collections::BinaryHeap;
18 use std::ffi::CString;
19 use std::fs::File;
20 use std::mem::size_of;
21 use std::ops::Deref;
22 use std::ops::DerefMut;
23 use std::os::raw::*;
24 use std::os::unix::prelude::OsStrExt;
25 use std::path::Path;
26 use std::path::PathBuf;
27 use std::ptr::copy_nonoverlapping;
28 use std::sync::Arc;
29 
30 #[allow(unused_imports)]
31 use base::ioctl;
32 #[allow(unused_imports)]
33 use base::ioctl_with_mut_ptr;
34 #[allow(unused_imports)]
35 use base::ioctl_with_mut_ref;
36 #[allow(unused_imports)]
37 use base::ioctl_with_ptr;
38 #[allow(unused_imports)]
39 use base::ioctl_with_ref;
40 #[allow(unused_imports)]
41 use base::ioctl_with_val;
42 #[allow(unused_imports)]
43 use base::pagesize;
44 #[allow(unused_imports)]
45 use base::signal;
46 use base::sys::BlockedSignal;
47 #[allow(unused_imports)]
48 use base::unblock_signal;
49 #[allow(unused_imports)]
50 use base::warn;
51 use base::AsRawDescriptor;
52 #[allow(unused_imports)]
53 use base::Error;
54 #[allow(unused_imports)]
55 use base::Event;
56 use base::FromRawDescriptor;
57 #[allow(unused_imports)]
58 use base::IoctlNr;
59 #[allow(unused_imports)]
60 use base::MappedRegion;
61 #[allow(unused_imports)]
62 use base::MemoryMapping;
63 #[allow(unused_imports)]
64 use base::MemoryMappingBuilder;
65 #[allow(unused_imports)]
66 use base::MmapError;
67 use base::RawDescriptor;
68 #[allow(unused_imports)]
69 use base::Result;
70 #[allow(unused_imports)]
71 use base::SIGRTMIN;
72 use data_model::vec_with_array_field;
73 #[cfg(target_arch = "x86_64")]
74 use data_model::FlexibleArrayWrapper;
75 use kvm_sys::*;
76 use libc::open64;
77 use libc::sigset_t;
78 use libc::EBUSY;
79 use libc::EINVAL;
80 use libc::ENOENT;
81 use libc::ENOSPC;
82 use libc::EOVERFLOW;
83 use libc::O_CLOEXEC;
84 use libc::O_RDWR;
85 use sync::Mutex;
86 use vm_memory::GuestAddress;
87 use vm_memory::GuestMemory;
88 
89 pub use crate::cap::*;
90 
errno_result<T>() -> Result<T>91 fn errno_result<T>() -> Result<T> {
92     Err(Error::last())
93 }
94 
set_user_memory_region<F: AsRawDescriptor>( fd: &F, slot: u32, read_only: bool, log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>95 unsafe fn set_user_memory_region<F: AsRawDescriptor>(
96     fd: &F,
97     slot: u32,
98     read_only: bool,
99     log_dirty_pages: bool,
100     guest_addr: u64,
101     memory_size: u64,
102     userspace_addr: *mut u8,
103 ) -> Result<()> {
104     let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
105     if log_dirty_pages {
106         flags |= KVM_MEM_LOG_DIRTY_PAGES;
107     }
108     let region = kvm_userspace_memory_region {
109         slot,
110         flags,
111         guest_phys_addr: guest_addr,
112         memory_size,
113         userspace_addr: userspace_addr as u64,
114     };
115 
116     let ret = ioctl_with_ref(fd, KVM_SET_USER_MEMORY_REGION(), &region);
117     if ret == 0 {
118         Ok(())
119     } else {
120         errno_result()
121     }
122 }
123 
124 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
125 /// size.
126 ///
127 /// # Arguments
128 ///
129 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize130 pub fn dirty_log_bitmap_size(size: usize) -> usize {
131     let page_size = pagesize();
132     (((size + page_size - 1) / page_size) + 7) / 8
133 }
134 
135 /// A wrapper around opening and using `/dev/kvm`.
136 ///
137 /// Useful for querying extensions and basic values from the KVM backend. A `Kvm` is required to
138 /// create a `Vm` object.
139 pub struct Kvm {
140     kvm: File,
141 }
142 
143 impl Kvm {
144     /// Opens `/dev/kvm/` and returns a Kvm object on success.
new() -> Result<Kvm>145     pub fn new() -> Result<Kvm> {
146         Kvm::new_with_path(&PathBuf::from("/dev/kvm"))
147     }
148 
149     /// Opens a KVM device at `device_path` and returns a Kvm object on success.
new_with_path(device_path: &Path) -> Result<Kvm>150     pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
151         let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
152         // SAFETY:
153         // Open calls are safe because we give a nul-terminated string and verify the result.
154         let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
155         if ret < 0 {
156             return errno_result();
157         }
158         Ok(Kvm {
159             kvm: {
160                 // SAFETY:
161                 // Safe because we verify that ret is valid and we own the fd.
162                 unsafe { File::from_raw_descriptor(ret) }
163             },
164         })
165     }
166 
check_extension_int(&self, c: Cap) -> i32167     fn check_extension_int(&self, c: Cap) -> i32 {
168         // SAFETY:
169         // Safe because we know that our file is a KVM fd and that the extension is one of the ones
170         // defined by kernel.
171         unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) }
172     }
173 
174     /// Checks if a particular `Cap` is available.
check_extension(&self, c: Cap) -> bool175     pub fn check_extension(&self, c: Cap) -> bool {
176         self.check_extension_int(c) == 1
177     }
178 
179     /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>180     pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
181         // SAFETY:
182         // Safe because we know that our file is a KVM fd and we verify the return result.
183         let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) };
184         if res > 0 {
185             Ok(res as usize)
186         } else {
187             errno_result()
188         }
189     }
190 
191     #[cfg(target_arch = "x86_64")]
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>192     fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
193         const MAX_KVM_CPUID_ENTRIES: usize = 256;
194         let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
195 
196         // SAFETY:
197         // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
198         // allocated for the struct. The limit is read from nent, which is set to the allocated
199         // size(MAX_KVM_CPUID_ENTRIES) above.
200         let ret = unsafe { ioctl_with_mut_ptr(self, kind, cpuid.as_mut_ptr()) };
201         if ret < 0 {
202             return errno_result();
203         }
204 
205         Ok(cpuid)
206     }
207 
208     /// X86 specific call to get the system supported CPUID values
209     #[cfg(target_arch = "x86_64")]
get_supported_cpuid(&self) -> Result<CpuId>210     pub fn get_supported_cpuid(&self) -> Result<CpuId> {
211         self.get_cpuid(KVM_GET_SUPPORTED_CPUID())
212     }
213 
214     /// X86 specific call to get the system emulated CPUID values
215     #[cfg(target_arch = "x86_64")]
get_emulated_cpuid(&self) -> Result<CpuId>216     pub fn get_emulated_cpuid(&self) -> Result<CpuId> {
217         self.get_cpuid(KVM_GET_EMULATED_CPUID())
218     }
219 
220     /// X86 specific call to get list of supported MSRS
221     ///
222     /// See the documentation for KVM_GET_MSR_INDEX_LIST.
223     #[cfg(target_arch = "x86_64")]
get_msr_index_list(&self) -> Result<Vec<u32>>224     pub fn get_msr_index_list(&self) -> Result<Vec<u32>> {
225         const MAX_KVM_MSR_ENTRIES: usize = 256;
226 
227         let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
228         msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
229 
230         // SAFETY:
231         // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
232         // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
233         // size (MAX_KVM_MSR_ENTRIES) above.
234         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST(), &mut msr_list[0]) };
235         if ret < 0 {
236             return errno_result();
237         }
238 
239         let mut nmsrs = msr_list[0].nmsrs;
240 
241         // SAFETY:
242         // Mapping the unsized array to a slice is unsafe because the length isn't known.  Using
243         // the length we originally allocated with eliminates the possibility of overflow.
244         let indices: &[u32] = unsafe {
245             if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
246                 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
247             }
248             msr_list[0].indices.as_slice(nmsrs as usize)
249         };
250 
251         Ok(indices.to_vec())
252     }
253 
254     #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
255     // The x86 and riscv machine type is always 0
get_vm_type(&self) -> c_ulong256     pub fn get_vm_type(&self) -> c_ulong {
257         0
258     }
259 
260     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
261     // Compute the machine type, which should be the IPA range for the VM
262     // Ideally, this would take a description of the memory map and return
263     // the closest machine type for this VM. Here, we just return the maximum
264     // the kernel support.
265     #[allow(clippy::useless_conversion)]
get_vm_type(&self) -> c_ulong266     pub fn get_vm_type(&self) -> c_ulong {
267         // SAFETY:
268         // Safe because we know self is a real kvm fd
269         match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), KVM_CAP_ARM_VM_IPA_SIZE.into()) }
270         {
271             // Not supported? Use 0 as the machine type, which implies 40bit IPA
272             ret if ret < 0 => 0,
273             // Use the lower 8 bits representing the IPA space as the machine type
274             ipa => (ipa & 0xff) as c_ulong,
275         }
276     }
277 }
278 
279 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor280     fn as_raw_descriptor(&self) -> RawDescriptor {
281         self.kvm.as_raw_descriptor()
282     }
283 }
284 
285 /// An address either in programmable I/O space or in memory mapped I/O space.
286 #[derive(Copy, Clone, Debug)]
287 pub enum IoeventAddress {
288     Pio(u64),
289     Mmio(u64),
290 }
291 
292 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
293 pub enum Datamatch {
294     AnyLength,
295     U8(Option<u8>),
296     U16(Option<u16>),
297     U32(Option<u32>),
298     U64(Option<u64>),
299 }
300 
301 /// A source of IRQs in an `IrqRoute`.
302 pub enum IrqSource {
303     Irqchip { chip: u32, pin: u32 },
304     Msi { address: u64, data: u32 },
305 }
306 
307 /// A single route for an IRQ.
308 pub struct IrqRoute {
309     pub gsi: u32,
310     pub source: IrqSource,
311 }
312 
313 /// Interrupt controller IDs
314 pub enum PicId {
315     Primary = 0,
316     Secondary = 1,
317 }
318 
319 /// Number of pins on the IOAPIC.
320 pub const NUM_IOAPIC_PINS: usize = 24;
321 
322 // Used to invert the order when stored in a max-heap.
323 #[derive(Copy, Clone, Eq, PartialEq)]
324 struct MemSlot(u32);
325 
326 impl Ord for MemSlot {
cmp(&self, other: &MemSlot) -> Ordering327     fn cmp(&self, other: &MemSlot) -> Ordering {
328         // Notice the order is inverted so the lowest magnitude slot has the highest priority in a
329         // max-heap.
330         other.0.cmp(&self.0)
331     }
332 }
333 
334 impl PartialOrd for MemSlot {
partial_cmp(&self, other: &MemSlot) -> Option<Ordering>335     fn partial_cmp(&self, other: &MemSlot) -> Option<Ordering> {
336         Some(self.cmp(other))
337     }
338 }
339 
340 /// A wrapper around creating and using a VM.
341 pub struct Vm {
342     vm: File,
343     guest_mem: GuestMemory,
344     mem_regions: Arc<Mutex<BTreeMap<u32, Box<dyn MappedRegion>>>>,
345     mem_slot_gaps: Arc<Mutex<BinaryHeap<MemSlot>>>,
346 }
347 
348 impl Vm {
349     /// Constructs a new `Vm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm>350     pub fn new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm> {
351         // SAFETY:
352         // Safe because we know kvm is a real kvm fd as this module is the only one that can make
353         // Kvm objects.
354         let ret = unsafe { ioctl_with_val(kvm, KVM_CREATE_VM(), kvm.get_vm_type()) };
355         if ret >= 0 {
356             // SAFETY:
357             // Safe because we verify the value of ret and we are the owners of the fd.
358             let vm_file = unsafe { File::from_raw_descriptor(ret) };
359             for region in guest_mem.regions() {
360                 // SAFETY:
361                 // Safe because the guest regions are guaranteed not to overlap.
362                 unsafe {
363                     set_user_memory_region(
364                         &vm_file,
365                         region.index as u32,
366                         false,
367                         false,
368                         region.guest_addr.offset(),
369                         region.size as u64,
370                         region.host_addr as *mut u8,
371                     )
372                 }?;
373             }
374 
375             Ok(Vm {
376                 vm: vm_file,
377                 guest_mem,
378                 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
379                 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
380             })
381         } else {
382             errno_result()
383         }
384     }
385 
386     /// Checks if a particular `Cap` is available.
387     ///
388     /// This is distinct from the `Kvm` version of this method because the some extensions depend on
389     /// the particular `Vm` existence. This method is encouraged by the kernel because it more
390     /// accurately reflects the usable capabilities.
check_extension(&self, c: Cap) -> bool391     pub fn check_extension(&self, c: Cap) -> bool {
392         // SAFETY:
393         // Safe because we know that our file is a KVM fd and that the extension is one of the ones
394         // defined by kernel.
395         unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) == 1 }
396     }
397 
398     /// Inserts the given `mem` into the VM's address space at `guest_addr`.
399     ///
400     /// The slot that was assigned the kvm memory mapping is returned on success. The slot can be
401     /// given to `Vm::remove_memory_region` to remove the memory from the VM's address space and
402     /// take back ownership of `mem`.
403     ///
404     /// Note that memory inserted into the VM's address space must not overlap with any other memory
405     /// slot's region.
406     ///
407     /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
408     /// write will trigger a mmio VM exit, leaving the memory untouched.
409     ///
410     /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
411     /// by the guest with `get_dirty_log`.
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<u32>412     pub fn add_memory_region(
413         &mut self,
414         guest_addr: GuestAddress,
415         mem: Box<dyn MappedRegion>,
416         read_only: bool,
417         log_dirty_pages: bool,
418     ) -> Result<u32> {
419         let size = mem.size() as u64;
420         let end_addr = guest_addr
421             .checked_add(size)
422             .ok_or_else(|| Error::new(EOVERFLOW))?;
423         if self.guest_mem.range_overlap(guest_addr, end_addr) {
424             return Err(Error::new(ENOSPC));
425         }
426         let mut regions = self.mem_regions.lock();
427         let mut gaps = self.mem_slot_gaps.lock();
428         let slot = match gaps.pop() {
429             Some(gap) => gap.0,
430             None => (regions.len() + self.guest_mem.num_regions() as usize) as u32,
431         };
432 
433         // SAFETY:
434         // Safe because we check that the given guest address is valid and has no overlaps. We also
435         // know that the pointer and size are correct because the MemoryMapping interface ensures
436         // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
437         // is removed.
438         let res = unsafe {
439             set_user_memory_region(
440                 &self.vm,
441                 slot,
442                 read_only,
443                 log_dirty_pages,
444                 guest_addr.offset(),
445                 size,
446                 mem.as_ptr(),
447             )
448         };
449 
450         if let Err(e) = res {
451             gaps.push(MemSlot(slot));
452             return Err(e);
453         }
454         regions.insert(slot, mem);
455         Ok(slot)
456     }
457 
458     /// Removes memory that was previously added at the given slot.
459     ///
460     /// Ownership of the host memory mapping associated with the given slot is returned on success.
remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>>461     pub fn remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>> {
462         let mut regions = self.mem_regions.lock();
463         if !regions.contains_key(&slot) {
464             return Err(Error::new(ENOENT));
465         }
466         // SAFETY:
467         // Safe because the slot is checked against the list of memory slots.
468         unsafe {
469             set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?;
470         }
471         self.mem_slot_gaps.lock().push(MemSlot(slot));
472         // This remove will always succeed because of the contains_key check above.
473         Ok(regions.remove(&slot).unwrap())
474     }
475 
476     /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
477     /// `slot`.
478     ///
479     /// The size of `dirty_log` must be at least as many bits as there are pages in the memory
480     /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
481     /// be 2 bytes or greater.
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>482     pub fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
483         match self.mem_regions.lock().get(&slot) {
484             Some(mem) => {
485                 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
486                 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
487                     return Err(Error::new(EINVAL));
488                 }
489                 let mut dirty_log_kvm = kvm_dirty_log {
490                     slot,
491                     ..Default::default()
492                 };
493                 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
494                 // SAFETY:
495                 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid
496                 // (because it's from a slice) and we checked that it will be large enough to hold
497                 // the entire log.
498                 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirty_log_kvm) };
499                 if ret == 0 {
500                     Ok(())
501                 } else {
502                     errno_result()
503                 }
504             }
505             _ => Err(Error::new(ENOENT)),
506         }
507     }
508 
509     /// Gets a reference to the guest memory owned by this VM.
510     ///
511     /// Note that `GuestMemory` does not include any mmio memory that may have been added after
512     /// this VM was constructed.
get_memory(&self) -> &GuestMemory513     pub fn get_memory(&self) -> &GuestMemory {
514         &self.guest_mem
515     }
516 
517     /// Sets the address of a one-page region in the VM's address space.
518     ///
519     /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
520     #[cfg(target_arch = "x86_64")]
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>521     pub fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
522         // SAFETY:
523         // Safe because we know that our file is a VM fd and we verify the return result.
524         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &addr.offset()) };
525         if ret == 0 {
526             Ok(())
527         } else {
528             errno_result()
529         }
530     }
531 
532     /// Retrieves the current timestamp of kvmclock as seen by the current guest.
533     ///
534     /// See the documentation on the KVM_GET_CLOCK ioctl.
535     #[cfg(target_arch = "x86_64")]
get_clock(&self) -> Result<kvm_clock_data>536     pub fn get_clock(&self) -> Result<kvm_clock_data> {
537         // SAFETY: trivially safe
538         let mut clock_data = unsafe { std::mem::zeroed() };
539         // SAFETY:
540         // Safe because we know that our file is a VM fd, we know the kernel will only write
541         // correct amount of memory to our pointer, and we verify the return result.
542         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock_data) };
543         if ret == 0 {
544             Ok(clock_data)
545         } else {
546             errno_result()
547         }
548     }
549 
550     /// Sets the current timestamp of kvmclock to the specified value.
551     ///
552     /// See the documentation on the KVM_SET_CLOCK ioctl.
553     #[cfg(target_arch = "x86_64")]
set_clock(&self, clock_data: &kvm_clock_data) -> Result<()>554     pub fn set_clock(&self, clock_data: &kvm_clock_data) -> Result<()> {
555         // SAFETY:
556         // Safe because we know that our file is a VM fd, we know the kernel will only read
557         // correct amount of memory from our pointer, and we verify the return result.
558         let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), clock_data) };
559         if ret == 0 {
560             Ok(())
561         } else {
562             errno_result()
563         }
564     }
565 
566     /// Crates an in kernel interrupt controller.
567     ///
568     /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
569     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
create_irq_chip(&self) -> Result<()>570     pub fn create_irq_chip(&self) -> Result<()> {
571         // SAFETY:
572         // Safe because we know that our file is a VM fd and we verify the return result.
573         let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) };
574         if ret == 0 {
575             Ok(())
576         } else {
577             errno_result()
578         }
579     }
580 
581     /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
582     ///
583     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
584     #[cfg(target_arch = "x86_64")]
get_pic_state(&self, id: PicId) -> Result<kvm_pic_state>585     pub fn get_pic_state(&self, id: PicId) -> Result<kvm_pic_state> {
586         let mut irqchip_state = kvm_irqchip {
587             chip_id: id as u32,
588             ..Default::default()
589         };
590         // SAFETY:
591         // Safe because we know our file is a VM fd, we know the kernel will only write
592         // correct amount of memory to our pointer, and we verify the return result.
593         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state) };
594         if ret == 0 {
595             Ok(
596                 // SAFETY:
597                 // Safe as we know that we are retrieving data related to the
598                 // PIC (primary or secondary) and not IOAPIC.
599                 unsafe { irqchip_state.chip.pic },
600             )
601         } else {
602             errno_result()
603         }
604     }
605 
606     /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
607     ///
608     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
609     #[cfg(target_arch = "x86_64")]
set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()>610     pub fn set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()> {
611         let mut irqchip_state = kvm_irqchip {
612             chip_id: id as u32,
613             ..Default::default()
614         };
615         irqchip_state.chip.pic = *state;
616         // SAFETY:
617         // Safe because we know that our file is a VM fd, we know the kernel will only read
618         // correct amount of memory from our pointer, and we verify the return result.
619         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
620         if ret == 0 {
621             Ok(())
622         } else {
623             errno_result()
624         }
625     }
626 
627     /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
628     ///
629     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
630     #[cfg(target_arch = "x86_64")]
get_ioapic_state(&self) -> Result<kvm_ioapic_state>631     pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
632         let mut irqchip_state = kvm_irqchip {
633             chip_id: 2,
634             ..Default::default()
635         };
636         let ret =
637             // SAFETY:
638             // Safe because we know our file is a VM fd, we know the kernel will only write
639             // correct amount of memory to our pointer, and we verify the return result.
640             unsafe {
641                 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
642         };
643         if ret == 0 {
644             Ok(
645                 // SAFETY:
646                 // Safe as we know that we are retrieving data related to the
647                 // IOAPIC and not PIC.
648                 unsafe { irqchip_state.chip.ioapic },
649             )
650         } else {
651             errno_result()
652         }
653     }
654 
655     /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
656     ///
657     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
658     #[cfg(target_arch = "x86_64")]
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>659     pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
660         let mut irqchip_state = kvm_irqchip {
661             chip_id: 2,
662             ..Default::default()
663         };
664         irqchip_state.chip.ioapic = *state;
665         // SAFETY:
666         // Safe because we know that our file is a VM fd, we know the kernel will only read
667         // correct amount of memory from our pointer, and we verify the return result.
668         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
669         if ret == 0 {
670             Ok(())
671         } else {
672             errno_result()
673         }
674     }
675 
676     /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
677     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
set_irq_line(&self, irq: u32, active: bool) -> Result<()>678     pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
679         let mut irq_level = kvm_irq_level::default();
680         irq_level.__bindgen_anon_1.irq = irq;
681         irq_level.level = active.into();
682 
683         // SAFETY:
684         // Safe because we know that our file is a VM fd, we know the kernel will only read the
685         // correct amount of memory from our pointer, and we verify the return result.
686         let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) };
687         if ret == 0 {
688             Ok(())
689         } else {
690             errno_result()
691         }
692     }
693 
694     /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
695     ///
696     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
697     #[cfg(target_arch = "x86_64")]
create_pit(&self) -> Result<()>698     pub fn create_pit(&self) -> Result<()> {
699         let pit_config = kvm_pit_config::default();
700         // SAFETY:
701         // Safe because we know that our file is a VM fd, we know the kernel will only read the
702         // correct amount of memory from our pointer, and we verify the return result.
703         let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) };
704         if ret == 0 {
705             Ok(())
706         } else {
707             errno_result()
708         }
709     }
710 
711     /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
712     ///
713     /// Note that this call can only succeed after a call to `Vm::create_pit`.
714     #[cfg(target_arch = "x86_64")]
get_pit_state(&self) -> Result<kvm_pit_state2>715     pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
716         // SAFETY: trivially safe
717         let mut pit_state = unsafe { std::mem::zeroed() };
718         // SAFETY:
719         // Safe because we know that our file is a VM fd, we know the kernel will only write
720         // correct amount of memory to our pointer, and we verify the return result.
721         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pit_state) };
722         if ret == 0 {
723             Ok(pit_state)
724         } else {
725             errno_result()
726         }
727     }
728 
729     /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
730     ///
731     /// Note that this call can only succeed after a call to `Vm::create_pit`.
732     #[cfg(target_arch = "x86_64")]
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>733     pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
734         // SAFETY:
735         // Safe because we know that our file is a VM fd, we know the kernel will only read
736         // correct amount of memory from our pointer, and we verify the return result.
737         let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pit_state) };
738         if ret == 0 {
739             Ok(())
740         } else {
741             errno_result()
742         }
743     }
744 
745     /// Registers an event to be signaled whenever a certain address is written to.
746     ///
747     /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
748     /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
749     /// and must match the expected size of the guest's write.
750     ///
751     /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
752     /// triggered is prevented.
register_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>753     pub fn register_ioevent(
754         &self,
755         evt: &Event,
756         addr: IoeventAddress,
757         datamatch: Datamatch,
758     ) -> Result<()> {
759         self.ioeventfd(evt, addr, datamatch, false)
760     }
761 
762     /// Unregisters an event previously registered with `register_ioevent`.
763     ///
764     /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
765     /// `register_ioevent`.
unregister_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>766     pub fn unregister_ioevent(
767         &self,
768         evt: &Event,
769         addr: IoeventAddress,
770         datamatch: Datamatch,
771     ) -> Result<()> {
772         self.ioeventfd(evt, addr, datamatch, true)
773     }
774 
ioeventfd( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>775     fn ioeventfd(
776         &self,
777         evt: &Event,
778         addr: IoeventAddress,
779         datamatch: Datamatch,
780         deassign: bool,
781     ) -> Result<()> {
782         let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
783             Datamatch::AnyLength => (false, 0, 0),
784             Datamatch::U8(v) => match v {
785                 Some(u) => (true, u as u64, 1),
786                 None => (false, 0, 1),
787             },
788             Datamatch::U16(v) => match v {
789                 Some(u) => (true, u as u64, 2),
790                 None => (false, 0, 2),
791             },
792             Datamatch::U32(v) => match v {
793                 Some(u) => (true, u as u64, 4),
794                 None => (false, 0, 4),
795             },
796             Datamatch::U64(v) => match v {
797                 Some(u) => (true, u, 8),
798                 None => (false, 0, 8),
799             },
800         };
801         let mut flags = 0;
802         if deassign {
803             flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
804         }
805         if do_datamatch {
806             flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
807         }
808         if let IoeventAddress::Pio(_) = addr {
809             flags |= 1 << kvm_ioeventfd_flag_nr_pio;
810         }
811         let ioeventfd = kvm_ioeventfd {
812             datamatch: datamatch_value,
813             len: datamatch_len,
814             addr: match addr {
815                 IoeventAddress::Pio(p) => p,
816                 IoeventAddress::Mmio(m) => m,
817             },
818             fd: evt.as_raw_descriptor(),
819             flags,
820             ..Default::default()
821         };
822         // SAFETY:
823         // Safe because we know that our file is a VM fd, we know the kernel will only read the
824         // correct amount of memory from our pointer, and we verify the return result.
825         let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) };
826         if ret == 0 {
827             Ok(())
828         } else {
829             errno_result()
830         }
831     }
832 
833     /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt` will
834     /// get triggered when the irqchip is resampled.
835     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
register_irqfd_resample( &self, evt: &Event, resample_evt: &Event, gsi: u32, ) -> Result<()>836     pub fn register_irqfd_resample(
837         &self,
838         evt: &Event,
839         resample_evt: &Event,
840         gsi: u32,
841     ) -> Result<()> {
842         let irqfd = kvm_irqfd {
843             flags: KVM_IRQFD_FLAG_RESAMPLE,
844             fd: evt.as_raw_descriptor() as u32,
845             resamplefd: resample_evt.as_raw_descriptor() as u32,
846             gsi,
847             ..Default::default()
848         };
849         // SAFETY:
850         // Safe because we know that our file is a VM fd, we know the kernel will only read the
851         // correct amount of memory from our pointer, and we verify the return result.
852         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
853         if ret == 0 {
854             Ok(())
855         } else {
856             errno_result()
857         }
858     }
859 
860     /// Unregisters an event that was previously registered with
861     /// `register_irqfd`/`register_irqfd_resample`.
862     ///
863     /// The `evt` and `gsi` pair must be the same as the ones passed into
864     /// `register_irqfd`/`register_irqfd_resample`.
865     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()>866     pub fn unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()> {
867         let irqfd = kvm_irqfd {
868             fd: evt.as_raw_descriptor() as u32,
869             gsi,
870             flags: KVM_IRQFD_FLAG_DEASSIGN,
871             ..Default::default()
872         };
873         // SAFETY:
874         // Safe because we know that our file is a VM fd, we know the kernel will only read the
875         // correct amount of memory from our pointer, and we verify the return result.
876         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
877         if ret == 0 {
878             Ok(())
879         } else {
880             errno_result()
881         }
882     }
883 
884     /// Sets the GSI routing table, replacing any table set with previous calls to
885     /// `set_gsi_routing`.
886     #[cfg(target_arch = "x86_64")]
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>887     pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
888         let mut irq_routing =
889             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
890         irq_routing[0].nr = routes.len() as u32;
891 
892         // SAFETY:
893         // Safe because we ensured there is enough space in irq_routing to hold the number of
894         // route entries.
895         let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
896         for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
897             irq_route.gsi = route.gsi;
898             match route.source {
899                 IrqSource::Irqchip { chip, pin } => {
900                     irq_route.type_ = KVM_IRQ_ROUTING_IRQCHIP;
901                     irq_route.u.irqchip = kvm_irq_routing_irqchip { irqchip: chip, pin }
902                 }
903                 IrqSource::Msi { address, data } => {
904                     irq_route.type_ = KVM_IRQ_ROUTING_MSI;
905                     irq_route.u.msi = kvm_irq_routing_msi {
906                         address_lo: address as u32,
907                         address_hi: (address >> 32) as u32,
908                         data,
909                         ..Default::default()
910                     }
911                 }
912             }
913         }
914 
915         // TODO(b/315998194): Add safety comment
916         #[allow(clippy::undocumented_unsafe_blocks)]
917         let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), &irq_routing[0]) };
918         if ret == 0 {
919             Ok(())
920         } else {
921             errno_result()
922         }
923     }
924 
925     /// Enable the specified capability.
926     /// See documentation for KVM_ENABLE_CAP.
927     /// # Safety
928     /// This function is marked as unsafe because `cap` may contain values which are interpreted as
929     /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>930     pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
931         // Safe because we allocated the struct and we know the kernel will read exactly the size of
932         // the struct.
933         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
934         if ret < 0 {
935             errno_result()
936         } else {
937             Ok(())
938         }
939     }
940 }
941 
942 impl AsRawDescriptor for Vm {
as_raw_descriptor(&self) -> RawDescriptor943     fn as_raw_descriptor(&self) -> RawDescriptor {
944         self.vm.as_raw_descriptor()
945     }
946 }
947 
948 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
949 #[derive(Debug)]
950 pub enum VcpuExit {
951     /// An out port instruction was run on the given port with the given data.
952     IoOut {
953         port: u16,
954         size: usize,
955         data: [u8; 8],
956     },
957     /// An in port instruction was run on the given port.
958     ///
959     /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
960     /// called again.
961     IoIn {
962         port: u16,
963         size: usize,
964     },
965     /// A read instruction was run against the given MMIO address.
966     ///
967     /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
968     /// called again.
969     MmioRead {
970         address: u64,
971         size: usize,
972     },
973     /// A write instruction was run against the given MMIO address with the given data.
974     MmioWrite {
975         address: u64,
976         size: usize,
977         data: [u8; 8],
978     },
979     IoapicEoi {
980         vector: u8,
981     },
982     HypervSynic {
983         msr: u32,
984         control: u64,
985         evt_page: u64,
986         msg_page: u64,
987     },
988     HypervHcall {
989         input: u64,
990         params: [u64; 2],
991     },
992     Unknown,
993     Exception,
994     Hypercall,
995     Debug,
996     Hlt,
997     IrqWindowOpen,
998     Shutdown,
999     FailEntry {
1000         hardware_entry_failure_reason: u64,
1001     },
1002     Intr,
1003     SetTpr,
1004     TprAccess,
1005     S390Sieic,
1006     S390Reset,
1007     Dcr,
1008     Nmi,
1009     InternalError,
1010     Osi,
1011     PaprHcall,
1012     S390Ucontrol,
1013     Watchdog,
1014     S390Tsch,
1015     Epr,
1016     /// The cpu triggered a system level event which is specified by the type field.
1017     /// The first field is the event type and the second field is flags.
1018     /// The possible event types are shutdown, reset, or crash.  So far there
1019     /// are not any flags defined.
1020     SystemEvent(u32 /* event_type */, u64 /* flags */),
1021 }
1022 
1023 /// A wrapper around creating and using a VCPU.
1024 /// `Vcpu` provides all functionality except for running. To run, `to_runnable` must be called to
1025 /// lock the vcpu to a thread. Then the returned `RunnableVcpu` can be used for running.
1026 pub struct Vcpu {
1027     vcpu: File,
1028     run_mmap: MemoryMapping,
1029 }
1030 
1031 pub struct VcpuThread {
1032     run: *mut kvm_run,
1033     signal_num: Option<c_int>,
1034 }
1035 
1036 thread_local!(static VCPU_THREAD: RefCell<Option<VcpuThread>> = RefCell::new(None));
1037 
1038 impl Vcpu {
1039     /// Constructs a new VCPU for `vm`.
1040     ///
1041     /// The `id` argument is the CPU number between [0, max vcpus).
new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu>1042     pub fn new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu> {
1043         let run_mmap_size = kvm.get_vcpu_mmap_size()?;
1044 
1045         // SAFETY:
1046         // Safe because we know that vm a VM fd and we verify the return result.
1047         let vcpu_fd = unsafe { ioctl_with_val(vm, KVM_CREATE_VCPU(), id) };
1048         if vcpu_fd < 0 {
1049             return errno_result();
1050         }
1051 
1052         // SAFETY:
1053         // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
1054         // the value of the fd and we own the fd.
1055         let vcpu = unsafe { File::from_raw_descriptor(vcpu_fd) };
1056 
1057         let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
1058             .from_file(&vcpu)
1059             .build()
1060             .map_err(|_| Error::new(ENOSPC))?;
1061 
1062         Ok(Vcpu { vcpu, run_mmap })
1063     }
1064 
1065     /// Consumes `self` and returns a `RunnableVcpu`. A `RunnableVcpu` is required to run the
1066     /// guest.
1067     /// Assigns a vcpu to the current thread and stores it in a hash map that can be used by signal
1068     /// handlers to call set_local_immediate_exit(). An optional signal number will be temporarily
1069     /// blocked while assigning the vcpu to the thread and later blocked when `RunnableVcpu` is
1070     /// destroyed.
1071     ///
1072     /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
1073     #[allow(clippy::cast_ptr_alignment)]
to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu>1074     pub fn to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu> {
1075         // Block signal while we add -- if a signal fires (very unlikely,
1076         // as this means something is trying to pause the vcpu before it has
1077         // even started) it'll try to grab the read lock while this write
1078         // lock is grabbed and cause a deadlock.
1079         // Assuming that a failure to block means it's already blocked.
1080         let _blocked_signal = signal_num.map(BlockedSignal::new);
1081 
1082         VCPU_THREAD.with(|v| {
1083             if v.borrow().is_none() {
1084                 *v.borrow_mut() = Some(VcpuThread {
1085                     run: self.run_mmap.as_ptr() as *mut kvm_run,
1086                     signal_num,
1087                 });
1088                 Ok(())
1089             } else {
1090                 Err(Error::new(EBUSY))
1091             }
1092         })?;
1093 
1094         Ok(RunnableVcpu {
1095             vcpu: self,
1096             phantom: Default::default(),
1097         })
1098     }
1099 
1100     /// Sets the data received by a mmio read, ioport in, or hypercall instruction.
1101     ///
1102     /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
1103     /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
1104     #[allow(clippy::cast_ptr_alignment)]
set_data(&self, data: &[u8]) -> Result<()>1105     pub fn set_data(&self, data: &[u8]) -> Result<()> {
1106         // SAFETY:
1107         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1108         // kernel told us how large it was. The pointer is page aligned so casting to a different
1109         // type is well defined, hence the clippy allow attribute.
1110         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1111         match run.exit_reason {
1112             KVM_EXIT_IO => {
1113                 let run_start = run as *mut kvm_run as *mut u8;
1114                 // SAFETY:
1115                 // Safe because the exit_reason (which comes from the kernel) told us which
1116                 // union field to use.
1117                 let io = unsafe { run.__bindgen_anon_1.io };
1118                 if io.direction as u32 != KVM_EXIT_IO_IN {
1119                     return Err(Error::new(EINVAL));
1120                 }
1121                 let data_size = (io.count as usize) * (io.size as usize);
1122                 if data_size != data.len() {
1123                     return Err(Error::new(EINVAL));
1124                 }
1125                 // SAFETY:
1126                 // The data_offset is defined by the kernel to be some number of bytes into the
1127                 // kvm_run structure, which we have fully mmap'd.
1128                 unsafe {
1129                     let data_ptr = run_start.offset(io.data_offset as isize);
1130                     copy_nonoverlapping(data.as_ptr(), data_ptr, data_size);
1131                 }
1132                 Ok(())
1133             }
1134             KVM_EXIT_MMIO => {
1135                 // SAFETY:
1136                 // Safe because the exit_reason (which comes from the kernel) told us which
1137                 // union field to use.
1138                 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1139                 if mmio.is_write != 0 {
1140                     return Err(Error::new(EINVAL));
1141                 }
1142                 let len = mmio.len as usize;
1143                 if len != data.len() {
1144                     return Err(Error::new(EINVAL));
1145                 }
1146                 mmio.data[..len].copy_from_slice(data);
1147                 Ok(())
1148             }
1149             KVM_EXIT_HYPERV => {
1150                 // SAFETY:
1151                 // Safe because the exit_reason (which comes from the kernel) told us which
1152                 // union field to use.
1153                 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1154                 if hyperv.type_ != KVM_EXIT_HYPERV_HCALL {
1155                     return Err(Error::new(EINVAL));
1156                 }
1157                 // TODO(b/315998194): Add safety comment
1158                 #[allow(clippy::undocumented_unsafe_blocks)]
1159                 let hcall = unsafe { &mut hyperv.u.hcall };
1160                 match data.try_into() {
1161                     Ok(data) => {
1162                         hcall.result = u64::from_ne_bytes(data);
1163                     }
1164                     _ => return Err(Error::new(EINVAL)),
1165                 }
1166                 Ok(())
1167             }
1168             _ => Err(Error::new(EINVAL)),
1169         }
1170     }
1171 
1172     /// Sets the bit that requests an immediate exit.
1173     #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)1174     pub fn set_immediate_exit(&self, exit: bool) {
1175         // SAFETY:
1176         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1177         // kernel told us how large it was. The pointer is page aligned so casting to a different
1178         // type is well defined, hence the clippy allow attribute.
1179         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1180         run.immediate_exit = exit.into();
1181     }
1182 
1183     /// Sets/clears the bit for immediate exit for the vcpu on the current thread.
set_local_immediate_exit(exit: bool)1184     pub fn set_local_immediate_exit(exit: bool) {
1185         VCPU_THREAD.with(|v| {
1186             if let Some(state) = &(*v.borrow()) {
1187                 // TODO(b/315998194): Add safety comment
1188                 #[allow(clippy::undocumented_unsafe_blocks)]
1189                 unsafe {
1190                     (*state.run).immediate_exit = exit.into();
1191                 };
1192             }
1193         });
1194     }
1195 
1196     /// Gets the VCPU registers.
1197     #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
get_regs(&self) -> Result<kvm_regs>1198     pub fn get_regs(&self) -> Result<kvm_regs> {
1199         // SAFETY: trivially safe
1200         let mut regs = unsafe { std::mem::zeroed() };
1201         // SAFETY:
1202         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1203         // correct amount of memory from our pointer, and we verify the return result.
1204         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
1205         if ret != 0 {
1206             return errno_result();
1207         }
1208         Ok(regs)
1209     }
1210 
1211     /// Sets the VCPU registers.
1212     #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
set_regs(&self, regs: &kvm_regs) -> Result<()>1213     pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
1214         // SAFETY:
1215         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1216         // correct amount of memory from our pointer, and we verify the return result.
1217         let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) };
1218         if ret != 0 {
1219             return errno_result();
1220         }
1221         Ok(())
1222     }
1223 
1224     /// Gets the VCPU special registers.
1225     #[cfg(target_arch = "x86_64")]
get_sregs(&self) -> Result<kvm_sregs>1226     pub fn get_sregs(&self) -> Result<kvm_sregs> {
1227         // SAFETY: trivially safe
1228         let mut regs = unsafe { std::mem::zeroed() };
1229         // SAFETY:
1230         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1231         // correct amount of memory to our pointer, and we verify the return result.
1232         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
1233         if ret != 0 {
1234             return errno_result();
1235         }
1236         Ok(regs)
1237     }
1238 
1239     /// Sets the VCPU special registers.
1240     #[cfg(target_arch = "x86_64")]
set_sregs(&self, sregs: &kvm_sregs) -> Result<()>1241     pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
1242         // SAFETY:
1243         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1244         // correct amount of memory from our pointer, and we verify the return result.
1245         let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) };
1246         if ret != 0 {
1247             return errno_result();
1248         }
1249         Ok(())
1250     }
1251 
1252     /// Gets the VCPU FPU registers.
1253     #[cfg(target_arch = "x86_64")]
get_fpu(&self) -> Result<kvm_fpu>1254     pub fn get_fpu(&self) -> Result<kvm_fpu> {
1255         // SAFETY: trivially safe
1256         // correct amount of memory to our pointer, and we verify the return result.
1257         let mut regs = unsafe { std::mem::zeroed() };
1258         // SAFETY:
1259         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1260         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut regs) };
1261         if ret != 0 {
1262             return errno_result();
1263         }
1264         Ok(regs)
1265     }
1266 
1267     /// X86 specific call to setup the FPU
1268     ///
1269     /// See the documentation for KVM_SET_FPU.
1270     #[cfg(target_arch = "x86_64")]
set_fpu(&self, fpu: &kvm_fpu) -> Result<()>1271     pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
1272         let ret = {
1273             // SAFETY:
1274             // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1275             unsafe { ioctl_with_ref(self, KVM_SET_FPU(), fpu) }
1276         };
1277         if ret < 0 {
1278             return errno_result();
1279         }
1280         Ok(())
1281     }
1282 
1283     /// Gets the VCPU debug registers.
1284     #[cfg(target_arch = "x86_64")]
get_debugregs(&self) -> Result<kvm_debugregs>1285     pub fn get_debugregs(&self) -> Result<kvm_debugregs> {
1286         // SAFETY: trivially safe
1287         let mut regs = unsafe { std::mem::zeroed() };
1288         // SAFETY:
1289         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1290         // correct amount of memory to our pointer, and we verify the return result.
1291         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut regs) };
1292         if ret != 0 {
1293             return errno_result();
1294         }
1295         Ok(regs)
1296     }
1297 
1298     /// Sets the VCPU debug registers
1299     #[cfg(target_arch = "x86_64")]
set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()>1300     pub fn set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()> {
1301         let ret = {
1302             // SAFETY:
1303             // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1304             unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS(), dregs) }
1305         };
1306         if ret < 0 {
1307             return errno_result();
1308         }
1309         Ok(())
1310     }
1311 
1312     /// Gets the VCPU extended control registers
1313     #[cfg(target_arch = "x86_64")]
get_xcrs(&self) -> Result<kvm_xcrs>1314     pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1315         // SAFETY: trivially safe
1316         let mut regs = unsafe { std::mem::zeroed() };
1317         // SAFETY:
1318         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1319         // correct amount of memory to our pointer, and we verify the return result.
1320         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut regs) };
1321         if ret != 0 {
1322             return errno_result();
1323         }
1324         Ok(regs)
1325     }
1326 
1327     /// Sets the VCPU extended control registers
1328     #[cfg(target_arch = "x86_64")]
set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()>1329     pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1330         let ret = {
1331             // SAFETY:
1332             // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1333             unsafe { ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) }
1334         };
1335         if ret < 0 {
1336             return errno_result();
1337         }
1338         Ok(())
1339     }
1340 
1341     /// X86 specific call to get the MSRS
1342     ///
1343     /// See the documentation for KVM_SET_MSRS.
1344     #[cfg(target_arch = "x86_64")]
get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()>1345     pub fn get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()> {
1346         let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(msr_entries.len());
1347         {
1348             // SAFETY:
1349             // Mapping the unsized array to a slice is unsafe because the length isn't known.
1350             // Providing the length used to create the struct guarantees the entire slice is valid.
1351             unsafe {
1352                 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(msr_entries.len());
1353                 entries.copy_from_slice(msr_entries);
1354             }
1355         }
1356         msrs[0].nmsrs = msr_entries.len() as u32;
1357         let ret = {
1358             // SAFETY:
1359             // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
1360             unsafe { ioctl_with_ref(self, KVM_GET_MSRS(), &msrs[0]) }
1361         };
1362         if ret < 0 {
1363             // KVM_SET_MSRS actually returns the number of msr entries written.
1364             return errno_result();
1365         }
1366         // TODO(b/315998194): Add safety comment
1367         #[allow(clippy::undocumented_unsafe_blocks)]
1368         unsafe {
1369             let count = ret as usize;
1370             assert!(count <= msr_entries.len());
1371             let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(count);
1372             msr_entries.truncate(count);
1373             msr_entries.copy_from_slice(entries);
1374         }
1375         Ok(())
1376     }
1377 
1378     /// X86 specific call to setup the MSRS
1379     ///
1380     /// See the documentation for KVM_SET_MSRS.
1381     #[cfg(target_arch = "x86_64")]
set_msrs(&self, msrs: &kvm_msrs) -> Result<()>1382     pub fn set_msrs(&self, msrs: &kvm_msrs) -> Result<()> {
1383         let ret = {
1384             // SAFETY:
1385             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1386             unsafe { ioctl_with_ref(self, KVM_SET_MSRS(), msrs) }
1387         };
1388         if ret < 0 {
1389             // KVM_SET_MSRS actually returns the number of msr entries written.
1390             return errno_result();
1391         }
1392         Ok(())
1393     }
1394 
1395     /// X86 specific call to setup the CPUID registers
1396     ///
1397     /// See the documentation for KVM_SET_CPUID2.
1398     #[cfg(target_arch = "x86_64")]
set_cpuid2(&self, cpuid: &CpuId) -> Result<()>1399     pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
1400         let ret = {
1401             // SAFETY:
1402             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1403             unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_ptr()) }
1404         };
1405         if ret < 0 {
1406             return errno_result();
1407         }
1408         Ok(())
1409     }
1410 
1411     /// X86 specific call to get the system emulated hyper-v CPUID values
1412     #[cfg(target_arch = "x86_64")]
get_hyperv_cpuid(&self) -> Result<CpuId>1413     pub fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1414         const MAX_KVM_CPUID_ENTRIES: usize = 256;
1415         let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
1416 
1417         let ret = {
1418             // SAFETY:
1419             // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
1420             // allocated for the struct. The limit is read from nent, which is set to the allocated
1421             // size(MAX_KVM_CPUID_ENTRIES) above.
1422             unsafe { ioctl_with_mut_ptr(self, KVM_GET_SUPPORTED_HV_CPUID(), cpuid.as_mut_ptr()) }
1423         };
1424         if ret < 0 {
1425             return errno_result();
1426         }
1427         Ok(cpuid)
1428     }
1429 
1430     /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt
1431     /// Controller".
1432     ///
1433     /// See the documentation for KVM_GET_LAPIC.
1434     #[cfg(target_arch = "x86_64")]
get_lapic(&self) -> Result<kvm_lapic_state>1435     pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
1436         let mut klapic: kvm_lapic_state = Default::default();
1437 
1438         let ret = {
1439             // SAFETY:
1440             // The ioctl is unsafe unless you trust the kernel not to write past the end of the
1441             // local_apic struct.
1442             unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) }
1443         };
1444         if ret < 0 {
1445             return errno_result();
1446         }
1447         Ok(klapic)
1448     }
1449 
1450     /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt
1451     /// Controller".
1452     ///
1453     /// See the documentation for KVM_SET_LAPIC.
1454     #[cfg(target_arch = "x86_64")]
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>1455     pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
1456         let ret = {
1457             // SAFETY:
1458             // The ioctl is safe because the kernel will only read from the klapic struct.
1459             unsafe { ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) }
1460         };
1461         if ret < 0 {
1462             return errno_result();
1463         }
1464         Ok(())
1465     }
1466 
1467     /// Gets the vcpu's current "multiprocessing state".
1468     ///
1469     /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1470     /// a call to `Vm::create_irq_chip`.
1471     ///
1472     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1473     /// to run crosvm on s390.
1474     #[cfg(target_arch = "x86_64")]
get_mp_state(&self) -> Result<kvm_mp_state>1475     pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1476         // SAFETY: trivially safe
1477         let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1478         // SAFETY:
1479         // Safe because we know that our file is a VCPU fd, we know the kernel will only
1480         // write correct amount of memory to our pointer, and we verify the return result.
1481         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut state) };
1482         if ret < 0 {
1483             return errno_result();
1484         }
1485         Ok(state)
1486     }
1487 
1488     /// Sets the vcpu's current "multiprocessing state".
1489     ///
1490     /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1491     /// a call to `Vm::create_irq_chip`.
1492     ///
1493     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1494     /// to run crosvm on s390.
1495     #[cfg(target_arch = "x86_64")]
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1496     pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1497         let ret = {
1498             // SAFETY:
1499             // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1500             unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), state) }
1501         };
1502         if ret < 0 {
1503             return errno_result();
1504         }
1505         Ok(())
1506     }
1507 
1508     /// Gets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1509     ///
1510     /// See the documentation for KVM_GET_VCPU_EVENTS.
1511     #[cfg(target_arch = "x86_64")]
get_vcpu_events(&self) -> Result<kvm_vcpu_events>1512     pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1513         // SAFETY: trivially safe
1514         let mut events: kvm_vcpu_events = unsafe { std::mem::zeroed() };
1515         // SAFETY:
1516         // Safe because we know that our file is a VCPU fd, we know the kernel
1517         // will only write correct amount of memory to our pointer, and we
1518         // verify the return result.
1519         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut events) };
1520         if ret < 0 {
1521             return errno_result();
1522         }
1523         Ok(events)
1524     }
1525 
1526     /// Sets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1527     ///
1528     /// See the documentation for KVM_SET_VCPU_EVENTS.
1529     #[cfg(target_arch = "x86_64")]
set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()>1530     pub fn set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()> {
1531         let ret = {
1532             // SAFETY:
1533             // The ioctl is safe because the kernel will only read from the
1534             // kvm_vcpu_events.
1535             unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), events) }
1536         };
1537         if ret < 0 {
1538             return errno_result();
1539         }
1540         Ok(())
1541     }
1542 
1543     /// Enable the specified capability.
1544     /// See documentation for KVM_ENABLE_CAP.
1545     /// # Safety
1546     /// This function is marked as unsafe because `cap` may contain values which are interpreted as
1547     /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>1548     pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
1549         // SAFETY:
1550         // Safe because we allocated the struct and we know the kernel will read exactly the size of
1551         // the struct.
1552         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
1553         if ret < 0 {
1554             return errno_result();
1555         }
1556         Ok(())
1557     }
1558 
1559     /// Specifies set of signals that are blocked during execution of KVM_RUN.
1560     /// Signals that are not blocked will cause KVM_RUN to return with -EINTR.
1561     ///
1562     /// See the documentation for KVM_SET_SIGNAL_MASK
set_signal_mask(&self, signals: &[c_int]) -> Result<()>1563     pub fn set_signal_mask(&self, signals: &[c_int]) -> Result<()> {
1564         let sigset = signal::create_sigset(signals)?;
1565 
1566         let mut kvm_sigmask = vec_with_array_field::<kvm_signal_mask, sigset_t>(1);
1567         // Rust definition of sigset_t takes 128 bytes, but the kernel only
1568         // expects 8-bytes structure, so we can't write
1569         // kvm_sigmask.len  = size_of::<sigset_t>() as u32;
1570         kvm_sigmask[0].len = 8;
1571         // Ensure the length is not too big.
1572         const _ASSERT: usize = size_of::<sigset_t>() - 8usize;
1573 
1574         // SAFETY:
1575         // Safe as we allocated exactly the needed space
1576         unsafe {
1577             copy_nonoverlapping(
1578                 &sigset as *const sigset_t as *const u8,
1579                 kvm_sigmask[0].sigset.as_mut_ptr(),
1580                 8,
1581             );
1582         }
1583 
1584         let ret = {
1585             // SAFETY:
1586             // The ioctl is safe because the kernel will only read from the
1587             // kvm_signal_mask structure.
1588             unsafe { ioctl_with_ref(self, KVM_SET_SIGNAL_MASK(), &kvm_sigmask[0]) }
1589         };
1590         if ret < 0 {
1591             return errno_result();
1592         }
1593         Ok(())
1594     }
1595 
1596     /// Sets the value of one register on this VCPU.  The id of the register is
1597     /// encoded as specified in the kernel documentation for KVM_SET_ONE_REG.
1598     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
set_one_reg(&self, reg_id: u64, data: u64) -> Result<()>1599     pub fn set_one_reg(&self, reg_id: u64, data: u64) -> Result<()> {
1600         let data_ref = &data as *const u64;
1601         let onereg = kvm_one_reg {
1602             id: reg_id,
1603             addr: data_ref as u64,
1604         };
1605         // SAFETY:
1606         // safe because we allocated the struct and we know the kernel will read
1607         // exactly the size of the struct
1608         let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) };
1609         if ret < 0 {
1610             return errno_result();
1611         }
1612         Ok(())
1613     }
1614 }
1615 
1616 impl AsRawDescriptor for Vcpu {
as_raw_descriptor(&self) -> RawDescriptor1617     fn as_raw_descriptor(&self) -> RawDescriptor {
1618         self.vcpu.as_raw_descriptor()
1619     }
1620 }
1621 
1622 /// A Vcpu that has a thread and can be run. Created by calling `to_runnable` on a `Vcpu`.
1623 /// Implements `Deref` to a `Vcpu` so all `Vcpu` methods are usable, with the addition of the `run`
1624 /// function to execute the guest.
1625 pub struct RunnableVcpu {
1626     vcpu: Vcpu,
1627     // vcpus must stay on the same thread once they start.
1628     // Add the PhantomData pointer to ensure RunnableVcpu is not `Send`.
1629     phantom: std::marker::PhantomData<*mut u8>,
1630 }
1631 
1632 impl RunnableVcpu {
1633     /// Runs the VCPU until it exits, returning the reason for the exit.
1634     ///
1635     /// Note that the state of the VCPU and associated VM must be setup first for this to do
1636     /// anything useful.
1637     #[allow(clippy::cast_ptr_alignment)]
1638     // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1639     // allow attribute.
run(&self) -> Result<VcpuExit>1640     pub fn run(&self) -> Result<VcpuExit> {
1641         // SAFETY:
1642         // Safe because we know that our file is a VCPU fd and we verify the return result.
1643         let ret = unsafe { ioctl(self, KVM_RUN()) };
1644         if ret == 0 {
1645             // SAFETY:
1646             // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1647             // kernel told us how large it was.
1648             let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) };
1649             match run.exit_reason {
1650                 KVM_EXIT_IO => {
1651                     // SAFETY:
1652                     // Safe because the exit_reason (which comes from the kernel) told us which
1653                     // union field to use.
1654                     let io = unsafe { run.__bindgen_anon_1.io };
1655                     let port = io.port;
1656                     let size = (io.count as usize) * (io.size as usize);
1657                     match io.direction as u32 {
1658                         KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }),
1659                         KVM_EXIT_IO_OUT => {
1660                             let mut data = [0; 8];
1661                             let run_start = run as *const kvm_run as *const u8;
1662                             // SAFETY:
1663                             // The data_offset is defined by the kernel to be some number of bytes
1664                             // into the kvm_run structure, which we have fully mmap'd.
1665                             unsafe {
1666                                 let data_ptr = run_start.offset(io.data_offset as isize);
1667                                 copy_nonoverlapping(
1668                                     data_ptr,
1669                                     data.as_mut_ptr(),
1670                                     min(size, data.len()),
1671                                 );
1672                             }
1673                             Ok(VcpuExit::IoOut { port, size, data })
1674                         }
1675                         _ => Err(Error::new(EINVAL)),
1676                     }
1677                 }
1678                 KVM_EXIT_MMIO => {
1679                     // SAFETY:
1680                     // Safe because the exit_reason (which comes from the kernel) told us which
1681                     // union field to use.
1682                     let mmio = unsafe { &run.__bindgen_anon_1.mmio };
1683                     let address = mmio.phys_addr;
1684                     let size = min(mmio.len as usize, mmio.data.len());
1685                     if mmio.is_write != 0 {
1686                         Ok(VcpuExit::MmioWrite {
1687                             address,
1688                             size,
1689                             data: mmio.data,
1690                         })
1691                     } else {
1692                         Ok(VcpuExit::MmioRead { address, size })
1693                     }
1694                 }
1695                 KVM_EXIT_IOAPIC_EOI => {
1696                     // SAFETY:
1697                     // Safe because the exit_reason (which comes from the kernel) told us which
1698                     // union field to use.
1699                     let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
1700                     Ok(VcpuExit::IoapicEoi { vector })
1701                 }
1702                 KVM_EXIT_HYPERV => {
1703                     // SAFETY:
1704                     // Safe because the exit_reason (which comes from the kernel) told us which
1705                     // union field to use.
1706                     let hyperv = unsafe { &run.__bindgen_anon_1.hyperv };
1707                     match hyperv.type_ {
1708                         KVM_EXIT_HYPERV_SYNIC => {
1709                             // TODO(b/315998194): Add safety comment
1710                             #[allow(clippy::undocumented_unsafe_blocks)]
1711                             let synic = unsafe { &hyperv.u.synic };
1712                             Ok(VcpuExit::HypervSynic {
1713                                 msr: synic.msr,
1714                                 control: synic.control,
1715                                 evt_page: synic.evt_page,
1716                                 msg_page: synic.msg_page,
1717                             })
1718                         }
1719                         KVM_EXIT_HYPERV_HCALL => {
1720                             // TODO(b/315998194): Add safety comment
1721                             #[allow(clippy::undocumented_unsafe_blocks)]
1722                             let hcall = unsafe { &hyperv.u.hcall };
1723                             Ok(VcpuExit::HypervHcall {
1724                                 input: hcall.input,
1725                                 params: hcall.params,
1726                             })
1727                         }
1728                         _ => Err(Error::new(EINVAL)),
1729                     }
1730                 }
1731                 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1732                 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1733                 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1734                 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1735                 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1736                 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1737                 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1738                 KVM_EXIT_FAIL_ENTRY => {
1739                     // SAFETY:
1740                     // Safe because the exit_reason (which comes from the kernel) told us which
1741                     // union field to use.
1742                     let hardware_entry_failure_reason = unsafe {
1743                         run.__bindgen_anon_1
1744                             .fail_entry
1745                             .hardware_entry_failure_reason
1746                     };
1747                     Ok(VcpuExit::FailEntry {
1748                         hardware_entry_failure_reason,
1749                     })
1750                 }
1751                 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1752                 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1753                 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1754                 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1755                 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1756                 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1757                 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1758                 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1759                 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1760                 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1761                 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1762                 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1763                 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1764                 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1765                 KVM_EXIT_SYSTEM_EVENT => {
1766                     let event_type = {
1767                         // SAFETY:
1768                         // Safe because we know the exit reason told us this union
1769                         // field is valid
1770                         unsafe { run.__bindgen_anon_1.system_event.type_ }
1771                     };
1772                     // TODO(b/315998194): Add safety comment
1773                     #[allow(clippy::undocumented_unsafe_blocks)]
1774                     let event_flags =
1775                         unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1776                     Ok(VcpuExit::SystemEvent(event_type, event_flags))
1777                 }
1778                 r => panic!("unknown kvm exit reason: {}", r),
1779             }
1780         } else {
1781             errno_result()
1782         }
1783     }
1784 }
1785 
1786 impl Deref for RunnableVcpu {
1787     type Target = Vcpu;
deref(&self) -> &Self::Target1788     fn deref(&self) -> &Self::Target {
1789         &self.vcpu
1790     }
1791 }
1792 
1793 impl DerefMut for RunnableVcpu {
deref_mut(&mut self) -> &mut Self::Target1794     fn deref_mut(&mut self) -> &mut Self::Target {
1795         &mut self.vcpu
1796     }
1797 }
1798 
1799 impl AsRawDescriptor for RunnableVcpu {
as_raw_descriptor(&self) -> RawDescriptor1800     fn as_raw_descriptor(&self) -> RawDescriptor {
1801         self.vcpu.as_raw_descriptor()
1802     }
1803 }
1804 
1805 impl Drop for RunnableVcpu {
drop(&mut self)1806     fn drop(&mut self) {
1807         VCPU_THREAD.with(|v| {
1808             // This assumes that a failure in `BlockedSignal::new` means the signal is already
1809             // blocked and there it should not be unblocked on exit.
1810             let _blocked_signal = &(*v.borrow())
1811                 .as_ref()
1812                 .and_then(|state| state.signal_num)
1813                 .map(BlockedSignal::new);
1814 
1815             *v.borrow_mut() = None;
1816         });
1817     }
1818 }
1819 
1820 /// Wrapper for kvm_cpuid2 which has a zero length array at the end.
1821 /// Hides the zero length array behind a bounds check.
1822 #[cfg(target_arch = "x86_64")]
1823 pub type CpuId = FlexibleArrayWrapper<kvm_cpuid2, kvm_cpuid_entry2>;
1824