1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! A safe wrapper around the kernel's KVM interface.
6 //!
7 //! New code should use the `hypervisor` crate instead.
8
9 #![cfg(any(target_os = "android", target_os = "linux"))]
10
11 mod cap;
12
13 use std::cell::RefCell;
14 use std::cmp::min;
15 use std::cmp::Ordering;
16 use std::collections::BTreeMap;
17 use std::collections::BinaryHeap;
18 use std::ffi::CString;
19 use std::fs::File;
20 use std::mem::size_of;
21 use std::ops::Deref;
22 use std::ops::DerefMut;
23 use std::os::raw::*;
24 use std::os::unix::prelude::OsStrExt;
25 use std::path::Path;
26 use std::path::PathBuf;
27 use std::ptr::copy_nonoverlapping;
28 use std::sync::Arc;
29
30 #[allow(unused_imports)]
31 use base::ioctl;
32 #[allow(unused_imports)]
33 use base::ioctl_with_mut_ptr;
34 #[allow(unused_imports)]
35 use base::ioctl_with_mut_ref;
36 #[allow(unused_imports)]
37 use base::ioctl_with_ptr;
38 #[allow(unused_imports)]
39 use base::ioctl_with_ref;
40 #[allow(unused_imports)]
41 use base::ioctl_with_val;
42 #[allow(unused_imports)]
43 use base::pagesize;
44 #[allow(unused_imports)]
45 use base::signal;
46 use base::sys::BlockedSignal;
47 #[allow(unused_imports)]
48 use base::unblock_signal;
49 #[allow(unused_imports)]
50 use base::warn;
51 use base::AsRawDescriptor;
52 #[allow(unused_imports)]
53 use base::Error;
54 #[allow(unused_imports)]
55 use base::Event;
56 use base::FromRawDescriptor;
57 #[allow(unused_imports)]
58 use base::IoctlNr;
59 #[allow(unused_imports)]
60 use base::MappedRegion;
61 #[allow(unused_imports)]
62 use base::MemoryMapping;
63 #[allow(unused_imports)]
64 use base::MemoryMappingBuilder;
65 #[allow(unused_imports)]
66 use base::MmapError;
67 use base::RawDescriptor;
68 #[allow(unused_imports)]
69 use base::Result;
70 #[allow(unused_imports)]
71 use base::SIGRTMIN;
72 use data_model::vec_with_array_field;
73 #[cfg(target_arch = "x86_64")]
74 use data_model::FlexibleArrayWrapper;
75 use kvm_sys::*;
76 use libc::open64;
77 use libc::sigset_t;
78 use libc::EBUSY;
79 use libc::EINVAL;
80 use libc::ENOENT;
81 use libc::ENOSPC;
82 use libc::EOVERFLOW;
83 use libc::O_CLOEXEC;
84 use libc::O_RDWR;
85 use sync::Mutex;
86 use vm_memory::GuestAddress;
87 use vm_memory::GuestMemory;
88
89 pub use crate::cap::*;
90
errno_result<T>() -> Result<T>91 fn errno_result<T>() -> Result<T> {
92 Err(Error::last())
93 }
94
set_user_memory_region<F: AsRawDescriptor>( fd: &F, slot: u32, read_only: bool, log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>95 unsafe fn set_user_memory_region<F: AsRawDescriptor>(
96 fd: &F,
97 slot: u32,
98 read_only: bool,
99 log_dirty_pages: bool,
100 guest_addr: u64,
101 memory_size: u64,
102 userspace_addr: *mut u8,
103 ) -> Result<()> {
104 let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
105 if log_dirty_pages {
106 flags |= KVM_MEM_LOG_DIRTY_PAGES;
107 }
108 let region = kvm_userspace_memory_region {
109 slot,
110 flags,
111 guest_phys_addr: guest_addr,
112 memory_size,
113 userspace_addr: userspace_addr as u64,
114 };
115
116 let ret = ioctl_with_ref(fd, KVM_SET_USER_MEMORY_REGION(), ®ion);
117 if ret == 0 {
118 Ok(())
119 } else {
120 errno_result()
121 }
122 }
123
124 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
125 /// size.
126 ///
127 /// # Arguments
128 ///
129 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize130 pub fn dirty_log_bitmap_size(size: usize) -> usize {
131 let page_size = pagesize();
132 (((size + page_size - 1) / page_size) + 7) / 8
133 }
134
135 /// A wrapper around opening and using `/dev/kvm`.
136 ///
137 /// Useful for querying extensions and basic values from the KVM backend. A `Kvm` is required to
138 /// create a `Vm` object.
139 pub struct Kvm {
140 kvm: File,
141 }
142
143 impl Kvm {
144 /// Opens `/dev/kvm/` and returns a Kvm object on success.
new() -> Result<Kvm>145 pub fn new() -> Result<Kvm> {
146 Kvm::new_with_path(&PathBuf::from("/dev/kvm"))
147 }
148
149 /// Opens a KVM device at `device_path` and returns a Kvm object on success.
new_with_path(device_path: &Path) -> Result<Kvm>150 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
151 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
152 // SAFETY:
153 // Open calls are safe because we give a nul-terminated string and verify the result.
154 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
155 if ret < 0 {
156 return errno_result();
157 }
158 Ok(Kvm {
159 kvm: {
160 // SAFETY:
161 // Safe because we verify that ret is valid and we own the fd.
162 unsafe { File::from_raw_descriptor(ret) }
163 },
164 })
165 }
166
check_extension_int(&self, c: Cap) -> i32167 fn check_extension_int(&self, c: Cap) -> i32 {
168 // SAFETY:
169 // Safe because we know that our file is a KVM fd and that the extension is one of the ones
170 // defined by kernel.
171 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) }
172 }
173
174 /// Checks if a particular `Cap` is available.
check_extension(&self, c: Cap) -> bool175 pub fn check_extension(&self, c: Cap) -> bool {
176 self.check_extension_int(c) == 1
177 }
178
179 /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>180 pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
181 // SAFETY:
182 // Safe because we know that our file is a KVM fd and we verify the return result.
183 let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) };
184 if res > 0 {
185 Ok(res as usize)
186 } else {
187 errno_result()
188 }
189 }
190
191 #[cfg(target_arch = "x86_64")]
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>192 fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
193 const MAX_KVM_CPUID_ENTRIES: usize = 256;
194 let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
195
196 // SAFETY:
197 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
198 // allocated for the struct. The limit is read from nent, which is set to the allocated
199 // size(MAX_KVM_CPUID_ENTRIES) above.
200 let ret = unsafe { ioctl_with_mut_ptr(self, kind, cpuid.as_mut_ptr()) };
201 if ret < 0 {
202 return errno_result();
203 }
204
205 Ok(cpuid)
206 }
207
208 /// X86 specific call to get the system supported CPUID values
209 #[cfg(target_arch = "x86_64")]
get_supported_cpuid(&self) -> Result<CpuId>210 pub fn get_supported_cpuid(&self) -> Result<CpuId> {
211 self.get_cpuid(KVM_GET_SUPPORTED_CPUID())
212 }
213
214 /// X86 specific call to get the system emulated CPUID values
215 #[cfg(target_arch = "x86_64")]
get_emulated_cpuid(&self) -> Result<CpuId>216 pub fn get_emulated_cpuid(&self) -> Result<CpuId> {
217 self.get_cpuid(KVM_GET_EMULATED_CPUID())
218 }
219
220 /// X86 specific call to get list of supported MSRS
221 ///
222 /// See the documentation for KVM_GET_MSR_INDEX_LIST.
223 #[cfg(target_arch = "x86_64")]
get_msr_index_list(&self) -> Result<Vec<u32>>224 pub fn get_msr_index_list(&self) -> Result<Vec<u32>> {
225 const MAX_KVM_MSR_ENTRIES: usize = 256;
226
227 let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
228 msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
229
230 // SAFETY:
231 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
232 // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
233 // size (MAX_KVM_MSR_ENTRIES) above.
234 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST(), &mut msr_list[0]) };
235 if ret < 0 {
236 return errno_result();
237 }
238
239 let mut nmsrs = msr_list[0].nmsrs;
240
241 // SAFETY:
242 // Mapping the unsized array to a slice is unsafe because the length isn't known. Using
243 // the length we originally allocated with eliminates the possibility of overflow.
244 let indices: &[u32] = unsafe {
245 if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
246 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
247 }
248 msr_list[0].indices.as_slice(nmsrs as usize)
249 };
250
251 Ok(indices.to_vec())
252 }
253
254 #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
255 // The x86 and riscv machine type is always 0
get_vm_type(&self) -> c_ulong256 pub fn get_vm_type(&self) -> c_ulong {
257 0
258 }
259
260 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
261 // Compute the machine type, which should be the IPA range for the VM
262 // Ideally, this would take a description of the memory map and return
263 // the closest machine type for this VM. Here, we just return the maximum
264 // the kernel support.
265 #[allow(clippy::useless_conversion)]
get_vm_type(&self) -> c_ulong266 pub fn get_vm_type(&self) -> c_ulong {
267 // SAFETY:
268 // Safe because we know self is a real kvm fd
269 match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), KVM_CAP_ARM_VM_IPA_SIZE.into()) }
270 {
271 // Not supported? Use 0 as the machine type, which implies 40bit IPA
272 ret if ret < 0 => 0,
273 // Use the lower 8 bits representing the IPA space as the machine type
274 ipa => (ipa & 0xff) as c_ulong,
275 }
276 }
277 }
278
279 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor280 fn as_raw_descriptor(&self) -> RawDescriptor {
281 self.kvm.as_raw_descriptor()
282 }
283 }
284
285 /// An address either in programmable I/O space or in memory mapped I/O space.
286 #[derive(Copy, Clone, Debug)]
287 pub enum IoeventAddress {
288 Pio(u64),
289 Mmio(u64),
290 }
291
292 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
293 pub enum Datamatch {
294 AnyLength,
295 U8(Option<u8>),
296 U16(Option<u16>),
297 U32(Option<u32>),
298 U64(Option<u64>),
299 }
300
301 /// A source of IRQs in an `IrqRoute`.
302 pub enum IrqSource {
303 Irqchip { chip: u32, pin: u32 },
304 Msi { address: u64, data: u32 },
305 }
306
307 /// A single route for an IRQ.
308 pub struct IrqRoute {
309 pub gsi: u32,
310 pub source: IrqSource,
311 }
312
313 /// Interrupt controller IDs
314 pub enum PicId {
315 Primary = 0,
316 Secondary = 1,
317 }
318
319 /// Number of pins on the IOAPIC.
320 pub const NUM_IOAPIC_PINS: usize = 24;
321
322 // Used to invert the order when stored in a max-heap.
323 #[derive(Copy, Clone, Eq, PartialEq)]
324 struct MemSlot(u32);
325
326 impl Ord for MemSlot {
cmp(&self, other: &MemSlot) -> Ordering327 fn cmp(&self, other: &MemSlot) -> Ordering {
328 // Notice the order is inverted so the lowest magnitude slot has the highest priority in a
329 // max-heap.
330 other.0.cmp(&self.0)
331 }
332 }
333
334 impl PartialOrd for MemSlot {
partial_cmp(&self, other: &MemSlot) -> Option<Ordering>335 fn partial_cmp(&self, other: &MemSlot) -> Option<Ordering> {
336 Some(self.cmp(other))
337 }
338 }
339
340 /// A wrapper around creating and using a VM.
341 pub struct Vm {
342 vm: File,
343 guest_mem: GuestMemory,
344 mem_regions: Arc<Mutex<BTreeMap<u32, Box<dyn MappedRegion>>>>,
345 mem_slot_gaps: Arc<Mutex<BinaryHeap<MemSlot>>>,
346 }
347
348 impl Vm {
349 /// Constructs a new `Vm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm>350 pub fn new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm> {
351 // SAFETY:
352 // Safe because we know kvm is a real kvm fd as this module is the only one that can make
353 // Kvm objects.
354 let ret = unsafe { ioctl_with_val(kvm, KVM_CREATE_VM(), kvm.get_vm_type()) };
355 if ret >= 0 {
356 // SAFETY:
357 // Safe because we verify the value of ret and we are the owners of the fd.
358 let vm_file = unsafe { File::from_raw_descriptor(ret) };
359 for region in guest_mem.regions() {
360 // SAFETY:
361 // Safe because the guest regions are guaranteed not to overlap.
362 unsafe {
363 set_user_memory_region(
364 &vm_file,
365 region.index as u32,
366 false,
367 false,
368 region.guest_addr.offset(),
369 region.size as u64,
370 region.host_addr as *mut u8,
371 )
372 }?;
373 }
374
375 Ok(Vm {
376 vm: vm_file,
377 guest_mem,
378 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
379 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
380 })
381 } else {
382 errno_result()
383 }
384 }
385
386 /// Checks if a particular `Cap` is available.
387 ///
388 /// This is distinct from the `Kvm` version of this method because the some extensions depend on
389 /// the particular `Vm` existence. This method is encouraged by the kernel because it more
390 /// accurately reflects the usable capabilities.
check_extension(&self, c: Cap) -> bool391 pub fn check_extension(&self, c: Cap) -> bool {
392 // SAFETY:
393 // Safe because we know that our file is a KVM fd and that the extension is one of the ones
394 // defined by kernel.
395 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) == 1 }
396 }
397
398 /// Inserts the given `mem` into the VM's address space at `guest_addr`.
399 ///
400 /// The slot that was assigned the kvm memory mapping is returned on success. The slot can be
401 /// given to `Vm::remove_memory_region` to remove the memory from the VM's address space and
402 /// take back ownership of `mem`.
403 ///
404 /// Note that memory inserted into the VM's address space must not overlap with any other memory
405 /// slot's region.
406 ///
407 /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
408 /// write will trigger a mmio VM exit, leaving the memory untouched.
409 ///
410 /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
411 /// by the guest with `get_dirty_log`.
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<u32>412 pub fn add_memory_region(
413 &mut self,
414 guest_addr: GuestAddress,
415 mem: Box<dyn MappedRegion>,
416 read_only: bool,
417 log_dirty_pages: bool,
418 ) -> Result<u32> {
419 let size = mem.size() as u64;
420 let end_addr = guest_addr
421 .checked_add(size)
422 .ok_or_else(|| Error::new(EOVERFLOW))?;
423 if self.guest_mem.range_overlap(guest_addr, end_addr) {
424 return Err(Error::new(ENOSPC));
425 }
426 let mut regions = self.mem_regions.lock();
427 let mut gaps = self.mem_slot_gaps.lock();
428 let slot = match gaps.pop() {
429 Some(gap) => gap.0,
430 None => (regions.len() + self.guest_mem.num_regions() as usize) as u32,
431 };
432
433 // SAFETY:
434 // Safe because we check that the given guest address is valid and has no overlaps. We also
435 // know that the pointer and size are correct because the MemoryMapping interface ensures
436 // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
437 // is removed.
438 let res = unsafe {
439 set_user_memory_region(
440 &self.vm,
441 slot,
442 read_only,
443 log_dirty_pages,
444 guest_addr.offset(),
445 size,
446 mem.as_ptr(),
447 )
448 };
449
450 if let Err(e) = res {
451 gaps.push(MemSlot(slot));
452 return Err(e);
453 }
454 regions.insert(slot, mem);
455 Ok(slot)
456 }
457
458 /// Removes memory that was previously added at the given slot.
459 ///
460 /// Ownership of the host memory mapping associated with the given slot is returned on success.
remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>>461 pub fn remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>> {
462 let mut regions = self.mem_regions.lock();
463 if !regions.contains_key(&slot) {
464 return Err(Error::new(ENOENT));
465 }
466 // SAFETY:
467 // Safe because the slot is checked against the list of memory slots.
468 unsafe {
469 set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?;
470 }
471 self.mem_slot_gaps.lock().push(MemSlot(slot));
472 // This remove will always succeed because of the contains_key check above.
473 Ok(regions.remove(&slot).unwrap())
474 }
475
476 /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
477 /// `slot`.
478 ///
479 /// The size of `dirty_log` must be at least as many bits as there are pages in the memory
480 /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
481 /// be 2 bytes or greater.
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>482 pub fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
483 match self.mem_regions.lock().get(&slot) {
484 Some(mem) => {
485 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
486 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
487 return Err(Error::new(EINVAL));
488 }
489 let mut dirty_log_kvm = kvm_dirty_log {
490 slot,
491 ..Default::default()
492 };
493 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
494 // SAFETY:
495 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid
496 // (because it's from a slice) and we checked that it will be large enough to hold
497 // the entire log.
498 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirty_log_kvm) };
499 if ret == 0 {
500 Ok(())
501 } else {
502 errno_result()
503 }
504 }
505 _ => Err(Error::new(ENOENT)),
506 }
507 }
508
509 /// Gets a reference to the guest memory owned by this VM.
510 ///
511 /// Note that `GuestMemory` does not include any mmio memory that may have been added after
512 /// this VM was constructed.
get_memory(&self) -> &GuestMemory513 pub fn get_memory(&self) -> &GuestMemory {
514 &self.guest_mem
515 }
516
517 /// Sets the address of a one-page region in the VM's address space.
518 ///
519 /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
520 #[cfg(target_arch = "x86_64")]
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>521 pub fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
522 // SAFETY:
523 // Safe because we know that our file is a VM fd and we verify the return result.
524 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &addr.offset()) };
525 if ret == 0 {
526 Ok(())
527 } else {
528 errno_result()
529 }
530 }
531
532 /// Retrieves the current timestamp of kvmclock as seen by the current guest.
533 ///
534 /// See the documentation on the KVM_GET_CLOCK ioctl.
535 #[cfg(target_arch = "x86_64")]
get_clock(&self) -> Result<kvm_clock_data>536 pub fn get_clock(&self) -> Result<kvm_clock_data> {
537 // SAFETY: trivially safe
538 let mut clock_data = unsafe { std::mem::zeroed() };
539 // SAFETY:
540 // Safe because we know that our file is a VM fd, we know the kernel will only write
541 // correct amount of memory to our pointer, and we verify the return result.
542 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock_data) };
543 if ret == 0 {
544 Ok(clock_data)
545 } else {
546 errno_result()
547 }
548 }
549
550 /// Sets the current timestamp of kvmclock to the specified value.
551 ///
552 /// See the documentation on the KVM_SET_CLOCK ioctl.
553 #[cfg(target_arch = "x86_64")]
set_clock(&self, clock_data: &kvm_clock_data) -> Result<()>554 pub fn set_clock(&self, clock_data: &kvm_clock_data) -> Result<()> {
555 // SAFETY:
556 // Safe because we know that our file is a VM fd, we know the kernel will only read
557 // correct amount of memory from our pointer, and we verify the return result.
558 let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), clock_data) };
559 if ret == 0 {
560 Ok(())
561 } else {
562 errno_result()
563 }
564 }
565
566 /// Crates an in kernel interrupt controller.
567 ///
568 /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
569 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
create_irq_chip(&self) -> Result<()>570 pub fn create_irq_chip(&self) -> Result<()> {
571 // SAFETY:
572 // Safe because we know that our file is a VM fd and we verify the return result.
573 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) };
574 if ret == 0 {
575 Ok(())
576 } else {
577 errno_result()
578 }
579 }
580
581 /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
582 ///
583 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
584 #[cfg(target_arch = "x86_64")]
get_pic_state(&self, id: PicId) -> Result<kvm_pic_state>585 pub fn get_pic_state(&self, id: PicId) -> Result<kvm_pic_state> {
586 let mut irqchip_state = kvm_irqchip {
587 chip_id: id as u32,
588 ..Default::default()
589 };
590 // SAFETY:
591 // Safe because we know our file is a VM fd, we know the kernel will only write
592 // correct amount of memory to our pointer, and we verify the return result.
593 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state) };
594 if ret == 0 {
595 Ok(
596 // SAFETY:
597 // Safe as we know that we are retrieving data related to the
598 // PIC (primary or secondary) and not IOAPIC.
599 unsafe { irqchip_state.chip.pic },
600 )
601 } else {
602 errno_result()
603 }
604 }
605
606 /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
607 ///
608 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
609 #[cfg(target_arch = "x86_64")]
set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()>610 pub fn set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()> {
611 let mut irqchip_state = kvm_irqchip {
612 chip_id: id as u32,
613 ..Default::default()
614 };
615 irqchip_state.chip.pic = *state;
616 // SAFETY:
617 // Safe because we know that our file is a VM fd, we know the kernel will only read
618 // correct amount of memory from our pointer, and we verify the return result.
619 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
620 if ret == 0 {
621 Ok(())
622 } else {
623 errno_result()
624 }
625 }
626
627 /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
628 ///
629 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
630 #[cfg(target_arch = "x86_64")]
get_ioapic_state(&self) -> Result<kvm_ioapic_state>631 pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
632 let mut irqchip_state = kvm_irqchip {
633 chip_id: 2,
634 ..Default::default()
635 };
636 let ret =
637 // SAFETY:
638 // Safe because we know our file is a VM fd, we know the kernel will only write
639 // correct amount of memory to our pointer, and we verify the return result.
640 unsafe {
641 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
642 };
643 if ret == 0 {
644 Ok(
645 // SAFETY:
646 // Safe as we know that we are retrieving data related to the
647 // IOAPIC and not PIC.
648 unsafe { irqchip_state.chip.ioapic },
649 )
650 } else {
651 errno_result()
652 }
653 }
654
655 /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
656 ///
657 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
658 #[cfg(target_arch = "x86_64")]
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>659 pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
660 let mut irqchip_state = kvm_irqchip {
661 chip_id: 2,
662 ..Default::default()
663 };
664 irqchip_state.chip.ioapic = *state;
665 // SAFETY:
666 // Safe because we know that our file is a VM fd, we know the kernel will only read
667 // correct amount of memory from our pointer, and we verify the return result.
668 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
669 if ret == 0 {
670 Ok(())
671 } else {
672 errno_result()
673 }
674 }
675
676 /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
677 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
set_irq_line(&self, irq: u32, active: bool) -> Result<()>678 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
679 let mut irq_level = kvm_irq_level::default();
680 irq_level.__bindgen_anon_1.irq = irq;
681 irq_level.level = active.into();
682
683 // SAFETY:
684 // Safe because we know that our file is a VM fd, we know the kernel will only read the
685 // correct amount of memory from our pointer, and we verify the return result.
686 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) };
687 if ret == 0 {
688 Ok(())
689 } else {
690 errno_result()
691 }
692 }
693
694 /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
695 ///
696 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
697 #[cfg(target_arch = "x86_64")]
create_pit(&self) -> Result<()>698 pub fn create_pit(&self) -> Result<()> {
699 let pit_config = kvm_pit_config::default();
700 // SAFETY:
701 // Safe because we know that our file is a VM fd, we know the kernel will only read the
702 // correct amount of memory from our pointer, and we verify the return result.
703 let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) };
704 if ret == 0 {
705 Ok(())
706 } else {
707 errno_result()
708 }
709 }
710
711 /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
712 ///
713 /// Note that this call can only succeed after a call to `Vm::create_pit`.
714 #[cfg(target_arch = "x86_64")]
get_pit_state(&self) -> Result<kvm_pit_state2>715 pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
716 // SAFETY: trivially safe
717 let mut pit_state = unsafe { std::mem::zeroed() };
718 // SAFETY:
719 // Safe because we know that our file is a VM fd, we know the kernel will only write
720 // correct amount of memory to our pointer, and we verify the return result.
721 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pit_state) };
722 if ret == 0 {
723 Ok(pit_state)
724 } else {
725 errno_result()
726 }
727 }
728
729 /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
730 ///
731 /// Note that this call can only succeed after a call to `Vm::create_pit`.
732 #[cfg(target_arch = "x86_64")]
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>733 pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
734 // SAFETY:
735 // Safe because we know that our file is a VM fd, we know the kernel will only read
736 // correct amount of memory from our pointer, and we verify the return result.
737 let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pit_state) };
738 if ret == 0 {
739 Ok(())
740 } else {
741 errno_result()
742 }
743 }
744
745 /// Registers an event to be signaled whenever a certain address is written to.
746 ///
747 /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
748 /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
749 /// and must match the expected size of the guest's write.
750 ///
751 /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
752 /// triggered is prevented.
register_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>753 pub fn register_ioevent(
754 &self,
755 evt: &Event,
756 addr: IoeventAddress,
757 datamatch: Datamatch,
758 ) -> Result<()> {
759 self.ioeventfd(evt, addr, datamatch, false)
760 }
761
762 /// Unregisters an event previously registered with `register_ioevent`.
763 ///
764 /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
765 /// `register_ioevent`.
unregister_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>766 pub fn unregister_ioevent(
767 &self,
768 evt: &Event,
769 addr: IoeventAddress,
770 datamatch: Datamatch,
771 ) -> Result<()> {
772 self.ioeventfd(evt, addr, datamatch, true)
773 }
774
ioeventfd( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>775 fn ioeventfd(
776 &self,
777 evt: &Event,
778 addr: IoeventAddress,
779 datamatch: Datamatch,
780 deassign: bool,
781 ) -> Result<()> {
782 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
783 Datamatch::AnyLength => (false, 0, 0),
784 Datamatch::U8(v) => match v {
785 Some(u) => (true, u as u64, 1),
786 None => (false, 0, 1),
787 },
788 Datamatch::U16(v) => match v {
789 Some(u) => (true, u as u64, 2),
790 None => (false, 0, 2),
791 },
792 Datamatch::U32(v) => match v {
793 Some(u) => (true, u as u64, 4),
794 None => (false, 0, 4),
795 },
796 Datamatch::U64(v) => match v {
797 Some(u) => (true, u, 8),
798 None => (false, 0, 8),
799 },
800 };
801 let mut flags = 0;
802 if deassign {
803 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
804 }
805 if do_datamatch {
806 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
807 }
808 if let IoeventAddress::Pio(_) = addr {
809 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
810 }
811 let ioeventfd = kvm_ioeventfd {
812 datamatch: datamatch_value,
813 len: datamatch_len,
814 addr: match addr {
815 IoeventAddress::Pio(p) => p,
816 IoeventAddress::Mmio(m) => m,
817 },
818 fd: evt.as_raw_descriptor(),
819 flags,
820 ..Default::default()
821 };
822 // SAFETY:
823 // Safe because we know that our file is a VM fd, we know the kernel will only read the
824 // correct amount of memory from our pointer, and we verify the return result.
825 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) };
826 if ret == 0 {
827 Ok(())
828 } else {
829 errno_result()
830 }
831 }
832
833 /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt` will
834 /// get triggered when the irqchip is resampled.
835 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
register_irqfd_resample( &self, evt: &Event, resample_evt: &Event, gsi: u32, ) -> Result<()>836 pub fn register_irqfd_resample(
837 &self,
838 evt: &Event,
839 resample_evt: &Event,
840 gsi: u32,
841 ) -> Result<()> {
842 let irqfd = kvm_irqfd {
843 flags: KVM_IRQFD_FLAG_RESAMPLE,
844 fd: evt.as_raw_descriptor() as u32,
845 resamplefd: resample_evt.as_raw_descriptor() as u32,
846 gsi,
847 ..Default::default()
848 };
849 // SAFETY:
850 // Safe because we know that our file is a VM fd, we know the kernel will only read the
851 // correct amount of memory from our pointer, and we verify the return result.
852 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
853 if ret == 0 {
854 Ok(())
855 } else {
856 errno_result()
857 }
858 }
859
860 /// Unregisters an event that was previously registered with
861 /// `register_irqfd`/`register_irqfd_resample`.
862 ///
863 /// The `evt` and `gsi` pair must be the same as the ones passed into
864 /// `register_irqfd`/`register_irqfd_resample`.
865 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()>866 pub fn unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()> {
867 let irqfd = kvm_irqfd {
868 fd: evt.as_raw_descriptor() as u32,
869 gsi,
870 flags: KVM_IRQFD_FLAG_DEASSIGN,
871 ..Default::default()
872 };
873 // SAFETY:
874 // Safe because we know that our file is a VM fd, we know the kernel will only read the
875 // correct amount of memory from our pointer, and we verify the return result.
876 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
877 if ret == 0 {
878 Ok(())
879 } else {
880 errno_result()
881 }
882 }
883
884 /// Sets the GSI routing table, replacing any table set with previous calls to
885 /// `set_gsi_routing`.
886 #[cfg(target_arch = "x86_64")]
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>887 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
888 let mut irq_routing =
889 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
890 irq_routing[0].nr = routes.len() as u32;
891
892 // SAFETY:
893 // Safe because we ensured there is enough space in irq_routing to hold the number of
894 // route entries.
895 let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
896 for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
897 irq_route.gsi = route.gsi;
898 match route.source {
899 IrqSource::Irqchip { chip, pin } => {
900 irq_route.type_ = KVM_IRQ_ROUTING_IRQCHIP;
901 irq_route.u.irqchip = kvm_irq_routing_irqchip { irqchip: chip, pin }
902 }
903 IrqSource::Msi { address, data } => {
904 irq_route.type_ = KVM_IRQ_ROUTING_MSI;
905 irq_route.u.msi = kvm_irq_routing_msi {
906 address_lo: address as u32,
907 address_hi: (address >> 32) as u32,
908 data,
909 ..Default::default()
910 }
911 }
912 }
913 }
914
915 // TODO(b/315998194): Add safety comment
916 #[allow(clippy::undocumented_unsafe_blocks)]
917 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), &irq_routing[0]) };
918 if ret == 0 {
919 Ok(())
920 } else {
921 errno_result()
922 }
923 }
924
925 /// Enable the specified capability.
926 /// See documentation for KVM_ENABLE_CAP.
927 /// # Safety
928 /// This function is marked as unsafe because `cap` may contain values which are interpreted as
929 /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>930 pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
931 // Safe because we allocated the struct and we know the kernel will read exactly the size of
932 // the struct.
933 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
934 if ret < 0 {
935 errno_result()
936 } else {
937 Ok(())
938 }
939 }
940 }
941
942 impl AsRawDescriptor for Vm {
as_raw_descriptor(&self) -> RawDescriptor943 fn as_raw_descriptor(&self) -> RawDescriptor {
944 self.vm.as_raw_descriptor()
945 }
946 }
947
948 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
949 #[derive(Debug)]
950 pub enum VcpuExit {
951 /// An out port instruction was run on the given port with the given data.
952 IoOut {
953 port: u16,
954 size: usize,
955 data: [u8; 8],
956 },
957 /// An in port instruction was run on the given port.
958 ///
959 /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
960 /// called again.
961 IoIn {
962 port: u16,
963 size: usize,
964 },
965 /// A read instruction was run against the given MMIO address.
966 ///
967 /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
968 /// called again.
969 MmioRead {
970 address: u64,
971 size: usize,
972 },
973 /// A write instruction was run against the given MMIO address with the given data.
974 MmioWrite {
975 address: u64,
976 size: usize,
977 data: [u8; 8],
978 },
979 IoapicEoi {
980 vector: u8,
981 },
982 HypervSynic {
983 msr: u32,
984 control: u64,
985 evt_page: u64,
986 msg_page: u64,
987 },
988 HypervHcall {
989 input: u64,
990 params: [u64; 2],
991 },
992 Unknown,
993 Exception,
994 Hypercall,
995 Debug,
996 Hlt,
997 IrqWindowOpen,
998 Shutdown,
999 FailEntry {
1000 hardware_entry_failure_reason: u64,
1001 },
1002 Intr,
1003 SetTpr,
1004 TprAccess,
1005 S390Sieic,
1006 S390Reset,
1007 Dcr,
1008 Nmi,
1009 InternalError,
1010 Osi,
1011 PaprHcall,
1012 S390Ucontrol,
1013 Watchdog,
1014 S390Tsch,
1015 Epr,
1016 /// The cpu triggered a system level event which is specified by the type field.
1017 /// The first field is the event type and the second field is flags.
1018 /// The possible event types are shutdown, reset, or crash. So far there
1019 /// are not any flags defined.
1020 SystemEvent(u32 /* event_type */, u64 /* flags */),
1021 }
1022
1023 /// A wrapper around creating and using a VCPU.
1024 /// `Vcpu` provides all functionality except for running. To run, `to_runnable` must be called to
1025 /// lock the vcpu to a thread. Then the returned `RunnableVcpu` can be used for running.
1026 pub struct Vcpu {
1027 vcpu: File,
1028 run_mmap: MemoryMapping,
1029 }
1030
1031 pub struct VcpuThread {
1032 run: *mut kvm_run,
1033 signal_num: Option<c_int>,
1034 }
1035
1036 thread_local!(static VCPU_THREAD: RefCell<Option<VcpuThread>> = RefCell::new(None));
1037
1038 impl Vcpu {
1039 /// Constructs a new VCPU for `vm`.
1040 ///
1041 /// The `id` argument is the CPU number between [0, max vcpus).
new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu>1042 pub fn new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu> {
1043 let run_mmap_size = kvm.get_vcpu_mmap_size()?;
1044
1045 // SAFETY:
1046 // Safe because we know that vm a VM fd and we verify the return result.
1047 let vcpu_fd = unsafe { ioctl_with_val(vm, KVM_CREATE_VCPU(), id) };
1048 if vcpu_fd < 0 {
1049 return errno_result();
1050 }
1051
1052 // SAFETY:
1053 // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
1054 // the value of the fd and we own the fd.
1055 let vcpu = unsafe { File::from_raw_descriptor(vcpu_fd) };
1056
1057 let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
1058 .from_file(&vcpu)
1059 .build()
1060 .map_err(|_| Error::new(ENOSPC))?;
1061
1062 Ok(Vcpu { vcpu, run_mmap })
1063 }
1064
1065 /// Consumes `self` and returns a `RunnableVcpu`. A `RunnableVcpu` is required to run the
1066 /// guest.
1067 /// Assigns a vcpu to the current thread and stores it in a hash map that can be used by signal
1068 /// handlers to call set_local_immediate_exit(). An optional signal number will be temporarily
1069 /// blocked while assigning the vcpu to the thread and later blocked when `RunnableVcpu` is
1070 /// destroyed.
1071 ///
1072 /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
1073 #[allow(clippy::cast_ptr_alignment)]
to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu>1074 pub fn to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu> {
1075 // Block signal while we add -- if a signal fires (very unlikely,
1076 // as this means something is trying to pause the vcpu before it has
1077 // even started) it'll try to grab the read lock while this write
1078 // lock is grabbed and cause a deadlock.
1079 // Assuming that a failure to block means it's already blocked.
1080 let _blocked_signal = signal_num.map(BlockedSignal::new);
1081
1082 VCPU_THREAD.with(|v| {
1083 if v.borrow().is_none() {
1084 *v.borrow_mut() = Some(VcpuThread {
1085 run: self.run_mmap.as_ptr() as *mut kvm_run,
1086 signal_num,
1087 });
1088 Ok(())
1089 } else {
1090 Err(Error::new(EBUSY))
1091 }
1092 })?;
1093
1094 Ok(RunnableVcpu {
1095 vcpu: self,
1096 phantom: Default::default(),
1097 })
1098 }
1099
1100 /// Sets the data received by a mmio read, ioport in, or hypercall instruction.
1101 ///
1102 /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
1103 /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
1104 #[allow(clippy::cast_ptr_alignment)]
set_data(&self, data: &[u8]) -> Result<()>1105 pub fn set_data(&self, data: &[u8]) -> Result<()> {
1106 // SAFETY:
1107 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1108 // kernel told us how large it was. The pointer is page aligned so casting to a different
1109 // type is well defined, hence the clippy allow attribute.
1110 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1111 match run.exit_reason {
1112 KVM_EXIT_IO => {
1113 let run_start = run as *mut kvm_run as *mut u8;
1114 // SAFETY:
1115 // Safe because the exit_reason (which comes from the kernel) told us which
1116 // union field to use.
1117 let io = unsafe { run.__bindgen_anon_1.io };
1118 if io.direction as u32 != KVM_EXIT_IO_IN {
1119 return Err(Error::new(EINVAL));
1120 }
1121 let data_size = (io.count as usize) * (io.size as usize);
1122 if data_size != data.len() {
1123 return Err(Error::new(EINVAL));
1124 }
1125 // SAFETY:
1126 // The data_offset is defined by the kernel to be some number of bytes into the
1127 // kvm_run structure, which we have fully mmap'd.
1128 unsafe {
1129 let data_ptr = run_start.offset(io.data_offset as isize);
1130 copy_nonoverlapping(data.as_ptr(), data_ptr, data_size);
1131 }
1132 Ok(())
1133 }
1134 KVM_EXIT_MMIO => {
1135 // SAFETY:
1136 // Safe because the exit_reason (which comes from the kernel) told us which
1137 // union field to use.
1138 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1139 if mmio.is_write != 0 {
1140 return Err(Error::new(EINVAL));
1141 }
1142 let len = mmio.len as usize;
1143 if len != data.len() {
1144 return Err(Error::new(EINVAL));
1145 }
1146 mmio.data[..len].copy_from_slice(data);
1147 Ok(())
1148 }
1149 KVM_EXIT_HYPERV => {
1150 // SAFETY:
1151 // Safe because the exit_reason (which comes from the kernel) told us which
1152 // union field to use.
1153 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1154 if hyperv.type_ != KVM_EXIT_HYPERV_HCALL {
1155 return Err(Error::new(EINVAL));
1156 }
1157 // TODO(b/315998194): Add safety comment
1158 #[allow(clippy::undocumented_unsafe_blocks)]
1159 let hcall = unsafe { &mut hyperv.u.hcall };
1160 match data.try_into() {
1161 Ok(data) => {
1162 hcall.result = u64::from_ne_bytes(data);
1163 }
1164 _ => return Err(Error::new(EINVAL)),
1165 }
1166 Ok(())
1167 }
1168 _ => Err(Error::new(EINVAL)),
1169 }
1170 }
1171
1172 /// Sets the bit that requests an immediate exit.
1173 #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)1174 pub fn set_immediate_exit(&self, exit: bool) {
1175 // SAFETY:
1176 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1177 // kernel told us how large it was. The pointer is page aligned so casting to a different
1178 // type is well defined, hence the clippy allow attribute.
1179 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1180 run.immediate_exit = exit.into();
1181 }
1182
1183 /// Sets/clears the bit for immediate exit for the vcpu on the current thread.
set_local_immediate_exit(exit: bool)1184 pub fn set_local_immediate_exit(exit: bool) {
1185 VCPU_THREAD.with(|v| {
1186 if let Some(state) = &(*v.borrow()) {
1187 // TODO(b/315998194): Add safety comment
1188 #[allow(clippy::undocumented_unsafe_blocks)]
1189 unsafe {
1190 (*state.run).immediate_exit = exit.into();
1191 };
1192 }
1193 });
1194 }
1195
1196 /// Gets the VCPU registers.
1197 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
get_regs(&self) -> Result<kvm_regs>1198 pub fn get_regs(&self) -> Result<kvm_regs> {
1199 // SAFETY: trivially safe
1200 let mut regs = unsafe { std::mem::zeroed() };
1201 // SAFETY:
1202 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1203 // correct amount of memory from our pointer, and we verify the return result.
1204 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
1205 if ret != 0 {
1206 return errno_result();
1207 }
1208 Ok(regs)
1209 }
1210
1211 /// Sets the VCPU registers.
1212 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
set_regs(&self, regs: &kvm_regs) -> Result<()>1213 pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
1214 // SAFETY:
1215 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1216 // correct amount of memory from our pointer, and we verify the return result.
1217 let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) };
1218 if ret != 0 {
1219 return errno_result();
1220 }
1221 Ok(())
1222 }
1223
1224 /// Gets the VCPU special registers.
1225 #[cfg(target_arch = "x86_64")]
get_sregs(&self) -> Result<kvm_sregs>1226 pub fn get_sregs(&self) -> Result<kvm_sregs> {
1227 // SAFETY: trivially safe
1228 let mut regs = unsafe { std::mem::zeroed() };
1229 // SAFETY:
1230 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1231 // correct amount of memory to our pointer, and we verify the return result.
1232 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
1233 if ret != 0 {
1234 return errno_result();
1235 }
1236 Ok(regs)
1237 }
1238
1239 /// Sets the VCPU special registers.
1240 #[cfg(target_arch = "x86_64")]
set_sregs(&self, sregs: &kvm_sregs) -> Result<()>1241 pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
1242 // SAFETY:
1243 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1244 // correct amount of memory from our pointer, and we verify the return result.
1245 let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) };
1246 if ret != 0 {
1247 return errno_result();
1248 }
1249 Ok(())
1250 }
1251
1252 /// Gets the VCPU FPU registers.
1253 #[cfg(target_arch = "x86_64")]
get_fpu(&self) -> Result<kvm_fpu>1254 pub fn get_fpu(&self) -> Result<kvm_fpu> {
1255 // SAFETY: trivially safe
1256 // correct amount of memory to our pointer, and we verify the return result.
1257 let mut regs = unsafe { std::mem::zeroed() };
1258 // SAFETY:
1259 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1260 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut regs) };
1261 if ret != 0 {
1262 return errno_result();
1263 }
1264 Ok(regs)
1265 }
1266
1267 /// X86 specific call to setup the FPU
1268 ///
1269 /// See the documentation for KVM_SET_FPU.
1270 #[cfg(target_arch = "x86_64")]
set_fpu(&self, fpu: &kvm_fpu) -> Result<()>1271 pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
1272 let ret = {
1273 // SAFETY:
1274 // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1275 unsafe { ioctl_with_ref(self, KVM_SET_FPU(), fpu) }
1276 };
1277 if ret < 0 {
1278 return errno_result();
1279 }
1280 Ok(())
1281 }
1282
1283 /// Gets the VCPU debug registers.
1284 #[cfg(target_arch = "x86_64")]
get_debugregs(&self) -> Result<kvm_debugregs>1285 pub fn get_debugregs(&self) -> Result<kvm_debugregs> {
1286 // SAFETY: trivially safe
1287 let mut regs = unsafe { std::mem::zeroed() };
1288 // SAFETY:
1289 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1290 // correct amount of memory to our pointer, and we verify the return result.
1291 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut regs) };
1292 if ret != 0 {
1293 return errno_result();
1294 }
1295 Ok(regs)
1296 }
1297
1298 /// Sets the VCPU debug registers
1299 #[cfg(target_arch = "x86_64")]
set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()>1300 pub fn set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()> {
1301 let ret = {
1302 // SAFETY:
1303 // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1304 unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS(), dregs) }
1305 };
1306 if ret < 0 {
1307 return errno_result();
1308 }
1309 Ok(())
1310 }
1311
1312 /// Gets the VCPU extended control registers
1313 #[cfg(target_arch = "x86_64")]
get_xcrs(&self) -> Result<kvm_xcrs>1314 pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1315 // SAFETY: trivially safe
1316 let mut regs = unsafe { std::mem::zeroed() };
1317 // SAFETY:
1318 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1319 // correct amount of memory to our pointer, and we verify the return result.
1320 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut regs) };
1321 if ret != 0 {
1322 return errno_result();
1323 }
1324 Ok(regs)
1325 }
1326
1327 /// Sets the VCPU extended control registers
1328 #[cfg(target_arch = "x86_64")]
set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()>1329 pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1330 let ret = {
1331 // SAFETY:
1332 // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1333 unsafe { ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) }
1334 };
1335 if ret < 0 {
1336 return errno_result();
1337 }
1338 Ok(())
1339 }
1340
1341 /// X86 specific call to get the MSRS
1342 ///
1343 /// See the documentation for KVM_SET_MSRS.
1344 #[cfg(target_arch = "x86_64")]
get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()>1345 pub fn get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()> {
1346 let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(msr_entries.len());
1347 {
1348 // SAFETY:
1349 // Mapping the unsized array to a slice is unsafe because the length isn't known.
1350 // Providing the length used to create the struct guarantees the entire slice is valid.
1351 unsafe {
1352 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(msr_entries.len());
1353 entries.copy_from_slice(msr_entries);
1354 }
1355 }
1356 msrs[0].nmsrs = msr_entries.len() as u32;
1357 let ret = {
1358 // SAFETY:
1359 // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
1360 unsafe { ioctl_with_ref(self, KVM_GET_MSRS(), &msrs[0]) }
1361 };
1362 if ret < 0 {
1363 // KVM_SET_MSRS actually returns the number of msr entries written.
1364 return errno_result();
1365 }
1366 // TODO(b/315998194): Add safety comment
1367 #[allow(clippy::undocumented_unsafe_blocks)]
1368 unsafe {
1369 let count = ret as usize;
1370 assert!(count <= msr_entries.len());
1371 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(count);
1372 msr_entries.truncate(count);
1373 msr_entries.copy_from_slice(entries);
1374 }
1375 Ok(())
1376 }
1377
1378 /// X86 specific call to setup the MSRS
1379 ///
1380 /// See the documentation for KVM_SET_MSRS.
1381 #[cfg(target_arch = "x86_64")]
set_msrs(&self, msrs: &kvm_msrs) -> Result<()>1382 pub fn set_msrs(&self, msrs: &kvm_msrs) -> Result<()> {
1383 let ret = {
1384 // SAFETY:
1385 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1386 unsafe { ioctl_with_ref(self, KVM_SET_MSRS(), msrs) }
1387 };
1388 if ret < 0 {
1389 // KVM_SET_MSRS actually returns the number of msr entries written.
1390 return errno_result();
1391 }
1392 Ok(())
1393 }
1394
1395 /// X86 specific call to setup the CPUID registers
1396 ///
1397 /// See the documentation for KVM_SET_CPUID2.
1398 #[cfg(target_arch = "x86_64")]
set_cpuid2(&self, cpuid: &CpuId) -> Result<()>1399 pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
1400 let ret = {
1401 // SAFETY:
1402 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1403 unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_ptr()) }
1404 };
1405 if ret < 0 {
1406 return errno_result();
1407 }
1408 Ok(())
1409 }
1410
1411 /// X86 specific call to get the system emulated hyper-v CPUID values
1412 #[cfg(target_arch = "x86_64")]
get_hyperv_cpuid(&self) -> Result<CpuId>1413 pub fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1414 const MAX_KVM_CPUID_ENTRIES: usize = 256;
1415 let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
1416
1417 let ret = {
1418 // SAFETY:
1419 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
1420 // allocated for the struct. The limit is read from nent, which is set to the allocated
1421 // size(MAX_KVM_CPUID_ENTRIES) above.
1422 unsafe { ioctl_with_mut_ptr(self, KVM_GET_SUPPORTED_HV_CPUID(), cpuid.as_mut_ptr()) }
1423 };
1424 if ret < 0 {
1425 return errno_result();
1426 }
1427 Ok(cpuid)
1428 }
1429
1430 /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt
1431 /// Controller".
1432 ///
1433 /// See the documentation for KVM_GET_LAPIC.
1434 #[cfg(target_arch = "x86_64")]
get_lapic(&self) -> Result<kvm_lapic_state>1435 pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
1436 let mut klapic: kvm_lapic_state = Default::default();
1437
1438 let ret = {
1439 // SAFETY:
1440 // The ioctl is unsafe unless you trust the kernel not to write past the end of the
1441 // local_apic struct.
1442 unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) }
1443 };
1444 if ret < 0 {
1445 return errno_result();
1446 }
1447 Ok(klapic)
1448 }
1449
1450 /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt
1451 /// Controller".
1452 ///
1453 /// See the documentation for KVM_SET_LAPIC.
1454 #[cfg(target_arch = "x86_64")]
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>1455 pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
1456 let ret = {
1457 // SAFETY:
1458 // The ioctl is safe because the kernel will only read from the klapic struct.
1459 unsafe { ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) }
1460 };
1461 if ret < 0 {
1462 return errno_result();
1463 }
1464 Ok(())
1465 }
1466
1467 /// Gets the vcpu's current "multiprocessing state".
1468 ///
1469 /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1470 /// a call to `Vm::create_irq_chip`.
1471 ///
1472 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1473 /// to run crosvm on s390.
1474 #[cfg(target_arch = "x86_64")]
get_mp_state(&self) -> Result<kvm_mp_state>1475 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1476 // SAFETY: trivially safe
1477 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1478 // SAFETY:
1479 // Safe because we know that our file is a VCPU fd, we know the kernel will only
1480 // write correct amount of memory to our pointer, and we verify the return result.
1481 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut state) };
1482 if ret < 0 {
1483 return errno_result();
1484 }
1485 Ok(state)
1486 }
1487
1488 /// Sets the vcpu's current "multiprocessing state".
1489 ///
1490 /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1491 /// a call to `Vm::create_irq_chip`.
1492 ///
1493 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1494 /// to run crosvm on s390.
1495 #[cfg(target_arch = "x86_64")]
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1496 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1497 let ret = {
1498 // SAFETY:
1499 // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1500 unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), state) }
1501 };
1502 if ret < 0 {
1503 return errno_result();
1504 }
1505 Ok(())
1506 }
1507
1508 /// Gets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1509 ///
1510 /// See the documentation for KVM_GET_VCPU_EVENTS.
1511 #[cfg(target_arch = "x86_64")]
get_vcpu_events(&self) -> Result<kvm_vcpu_events>1512 pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1513 // SAFETY: trivially safe
1514 let mut events: kvm_vcpu_events = unsafe { std::mem::zeroed() };
1515 // SAFETY:
1516 // Safe because we know that our file is a VCPU fd, we know the kernel
1517 // will only write correct amount of memory to our pointer, and we
1518 // verify the return result.
1519 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut events) };
1520 if ret < 0 {
1521 return errno_result();
1522 }
1523 Ok(events)
1524 }
1525
1526 /// Sets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1527 ///
1528 /// See the documentation for KVM_SET_VCPU_EVENTS.
1529 #[cfg(target_arch = "x86_64")]
set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()>1530 pub fn set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()> {
1531 let ret = {
1532 // SAFETY:
1533 // The ioctl is safe because the kernel will only read from the
1534 // kvm_vcpu_events.
1535 unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), events) }
1536 };
1537 if ret < 0 {
1538 return errno_result();
1539 }
1540 Ok(())
1541 }
1542
1543 /// Enable the specified capability.
1544 /// See documentation for KVM_ENABLE_CAP.
1545 /// # Safety
1546 /// This function is marked as unsafe because `cap` may contain values which are interpreted as
1547 /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>1548 pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
1549 // SAFETY:
1550 // Safe because we allocated the struct and we know the kernel will read exactly the size of
1551 // the struct.
1552 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
1553 if ret < 0 {
1554 return errno_result();
1555 }
1556 Ok(())
1557 }
1558
1559 /// Specifies set of signals that are blocked during execution of KVM_RUN.
1560 /// Signals that are not blocked will cause KVM_RUN to return with -EINTR.
1561 ///
1562 /// See the documentation for KVM_SET_SIGNAL_MASK
set_signal_mask(&self, signals: &[c_int]) -> Result<()>1563 pub fn set_signal_mask(&self, signals: &[c_int]) -> Result<()> {
1564 let sigset = signal::create_sigset(signals)?;
1565
1566 let mut kvm_sigmask = vec_with_array_field::<kvm_signal_mask, sigset_t>(1);
1567 // Rust definition of sigset_t takes 128 bytes, but the kernel only
1568 // expects 8-bytes structure, so we can't write
1569 // kvm_sigmask.len = size_of::<sigset_t>() as u32;
1570 kvm_sigmask[0].len = 8;
1571 // Ensure the length is not too big.
1572 const _ASSERT: usize = size_of::<sigset_t>() - 8usize;
1573
1574 // SAFETY:
1575 // Safe as we allocated exactly the needed space
1576 unsafe {
1577 copy_nonoverlapping(
1578 &sigset as *const sigset_t as *const u8,
1579 kvm_sigmask[0].sigset.as_mut_ptr(),
1580 8,
1581 );
1582 }
1583
1584 let ret = {
1585 // SAFETY:
1586 // The ioctl is safe because the kernel will only read from the
1587 // kvm_signal_mask structure.
1588 unsafe { ioctl_with_ref(self, KVM_SET_SIGNAL_MASK(), &kvm_sigmask[0]) }
1589 };
1590 if ret < 0 {
1591 return errno_result();
1592 }
1593 Ok(())
1594 }
1595
1596 /// Sets the value of one register on this VCPU. The id of the register is
1597 /// encoded as specified in the kernel documentation for KVM_SET_ONE_REG.
1598 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
set_one_reg(&self, reg_id: u64, data: u64) -> Result<()>1599 pub fn set_one_reg(&self, reg_id: u64, data: u64) -> Result<()> {
1600 let data_ref = &data as *const u64;
1601 let onereg = kvm_one_reg {
1602 id: reg_id,
1603 addr: data_ref as u64,
1604 };
1605 // SAFETY:
1606 // safe because we allocated the struct and we know the kernel will read
1607 // exactly the size of the struct
1608 let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) };
1609 if ret < 0 {
1610 return errno_result();
1611 }
1612 Ok(())
1613 }
1614 }
1615
1616 impl AsRawDescriptor for Vcpu {
as_raw_descriptor(&self) -> RawDescriptor1617 fn as_raw_descriptor(&self) -> RawDescriptor {
1618 self.vcpu.as_raw_descriptor()
1619 }
1620 }
1621
1622 /// A Vcpu that has a thread and can be run. Created by calling `to_runnable` on a `Vcpu`.
1623 /// Implements `Deref` to a `Vcpu` so all `Vcpu` methods are usable, with the addition of the `run`
1624 /// function to execute the guest.
1625 pub struct RunnableVcpu {
1626 vcpu: Vcpu,
1627 // vcpus must stay on the same thread once they start.
1628 // Add the PhantomData pointer to ensure RunnableVcpu is not `Send`.
1629 phantom: std::marker::PhantomData<*mut u8>,
1630 }
1631
1632 impl RunnableVcpu {
1633 /// Runs the VCPU until it exits, returning the reason for the exit.
1634 ///
1635 /// Note that the state of the VCPU and associated VM must be setup first for this to do
1636 /// anything useful.
1637 #[allow(clippy::cast_ptr_alignment)]
1638 // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1639 // allow attribute.
run(&self) -> Result<VcpuExit>1640 pub fn run(&self) -> Result<VcpuExit> {
1641 // SAFETY:
1642 // Safe because we know that our file is a VCPU fd and we verify the return result.
1643 let ret = unsafe { ioctl(self, KVM_RUN()) };
1644 if ret == 0 {
1645 // SAFETY:
1646 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1647 // kernel told us how large it was.
1648 let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) };
1649 match run.exit_reason {
1650 KVM_EXIT_IO => {
1651 // SAFETY:
1652 // Safe because the exit_reason (which comes from the kernel) told us which
1653 // union field to use.
1654 let io = unsafe { run.__bindgen_anon_1.io };
1655 let port = io.port;
1656 let size = (io.count as usize) * (io.size as usize);
1657 match io.direction as u32 {
1658 KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }),
1659 KVM_EXIT_IO_OUT => {
1660 let mut data = [0; 8];
1661 let run_start = run as *const kvm_run as *const u8;
1662 // SAFETY:
1663 // The data_offset is defined by the kernel to be some number of bytes
1664 // into the kvm_run structure, which we have fully mmap'd.
1665 unsafe {
1666 let data_ptr = run_start.offset(io.data_offset as isize);
1667 copy_nonoverlapping(
1668 data_ptr,
1669 data.as_mut_ptr(),
1670 min(size, data.len()),
1671 );
1672 }
1673 Ok(VcpuExit::IoOut { port, size, data })
1674 }
1675 _ => Err(Error::new(EINVAL)),
1676 }
1677 }
1678 KVM_EXIT_MMIO => {
1679 // SAFETY:
1680 // Safe because the exit_reason (which comes from the kernel) told us which
1681 // union field to use.
1682 let mmio = unsafe { &run.__bindgen_anon_1.mmio };
1683 let address = mmio.phys_addr;
1684 let size = min(mmio.len as usize, mmio.data.len());
1685 if mmio.is_write != 0 {
1686 Ok(VcpuExit::MmioWrite {
1687 address,
1688 size,
1689 data: mmio.data,
1690 })
1691 } else {
1692 Ok(VcpuExit::MmioRead { address, size })
1693 }
1694 }
1695 KVM_EXIT_IOAPIC_EOI => {
1696 // SAFETY:
1697 // Safe because the exit_reason (which comes from the kernel) told us which
1698 // union field to use.
1699 let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
1700 Ok(VcpuExit::IoapicEoi { vector })
1701 }
1702 KVM_EXIT_HYPERV => {
1703 // SAFETY:
1704 // Safe because the exit_reason (which comes from the kernel) told us which
1705 // union field to use.
1706 let hyperv = unsafe { &run.__bindgen_anon_1.hyperv };
1707 match hyperv.type_ {
1708 KVM_EXIT_HYPERV_SYNIC => {
1709 // TODO(b/315998194): Add safety comment
1710 #[allow(clippy::undocumented_unsafe_blocks)]
1711 let synic = unsafe { &hyperv.u.synic };
1712 Ok(VcpuExit::HypervSynic {
1713 msr: synic.msr,
1714 control: synic.control,
1715 evt_page: synic.evt_page,
1716 msg_page: synic.msg_page,
1717 })
1718 }
1719 KVM_EXIT_HYPERV_HCALL => {
1720 // TODO(b/315998194): Add safety comment
1721 #[allow(clippy::undocumented_unsafe_blocks)]
1722 let hcall = unsafe { &hyperv.u.hcall };
1723 Ok(VcpuExit::HypervHcall {
1724 input: hcall.input,
1725 params: hcall.params,
1726 })
1727 }
1728 _ => Err(Error::new(EINVAL)),
1729 }
1730 }
1731 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1732 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1733 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1734 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1735 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1736 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1737 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1738 KVM_EXIT_FAIL_ENTRY => {
1739 // SAFETY:
1740 // Safe because the exit_reason (which comes from the kernel) told us which
1741 // union field to use.
1742 let hardware_entry_failure_reason = unsafe {
1743 run.__bindgen_anon_1
1744 .fail_entry
1745 .hardware_entry_failure_reason
1746 };
1747 Ok(VcpuExit::FailEntry {
1748 hardware_entry_failure_reason,
1749 })
1750 }
1751 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1752 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1753 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1754 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1755 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1756 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1757 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1758 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1759 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1760 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1761 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1762 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1763 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1764 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1765 KVM_EXIT_SYSTEM_EVENT => {
1766 let event_type = {
1767 // SAFETY:
1768 // Safe because we know the exit reason told us this union
1769 // field is valid
1770 unsafe { run.__bindgen_anon_1.system_event.type_ }
1771 };
1772 // TODO(b/315998194): Add safety comment
1773 #[allow(clippy::undocumented_unsafe_blocks)]
1774 let event_flags =
1775 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1776 Ok(VcpuExit::SystemEvent(event_type, event_flags))
1777 }
1778 r => panic!("unknown kvm exit reason: {}", r),
1779 }
1780 } else {
1781 errno_result()
1782 }
1783 }
1784 }
1785
1786 impl Deref for RunnableVcpu {
1787 type Target = Vcpu;
deref(&self) -> &Self::Target1788 fn deref(&self) -> &Self::Target {
1789 &self.vcpu
1790 }
1791 }
1792
1793 impl DerefMut for RunnableVcpu {
deref_mut(&mut self) -> &mut Self::Target1794 fn deref_mut(&mut self) -> &mut Self::Target {
1795 &mut self.vcpu
1796 }
1797 }
1798
1799 impl AsRawDescriptor for RunnableVcpu {
as_raw_descriptor(&self) -> RawDescriptor1800 fn as_raw_descriptor(&self) -> RawDescriptor {
1801 self.vcpu.as_raw_descriptor()
1802 }
1803 }
1804
1805 impl Drop for RunnableVcpu {
drop(&mut self)1806 fn drop(&mut self) {
1807 VCPU_THREAD.with(|v| {
1808 // This assumes that a failure in `BlockedSignal::new` means the signal is already
1809 // blocked and there it should not be unblocked on exit.
1810 let _blocked_signal = &(*v.borrow())
1811 .as_ref()
1812 .and_then(|state| state.signal_num)
1813 .map(BlockedSignal::new);
1814
1815 *v.borrow_mut() = None;
1816 });
1817 }
1818 }
1819
1820 /// Wrapper for kvm_cpuid2 which has a zero length array at the end.
1821 /// Hides the zero length array behind a bounds check.
1822 #[cfg(target_arch = "x86_64")]
1823 pub type CpuId = FlexibleArrayWrapper<kvm_cpuid2, kvm_cpuid_entry2>;
1824