1 // Copyright 2020 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 //! A crate for abstracting the underlying kernel hypervisor used in crosvm. 6 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 7 pub mod aarch64; 8 pub mod caps; 9 pub mod kvm; 10 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 11 pub mod x86_64; 12 13 use std::os::raw::c_int; 14 use std::os::unix::io::AsRawFd; 15 16 use serde::{Deserialize, Serialize}; 17 18 use base::{Event, MappedRegion, Protection, Result, SafeDescriptor}; 19 use vm_memory::{GuestAddress, GuestMemory}; 20 21 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 22 pub use crate::aarch64::*; 23 pub use crate::caps::*; 24 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 25 pub use crate::x86_64::*; 26 27 /// An index in the list of guest-mapped memory regions. 28 pub type MemSlot = u32; 29 30 /// A trait for checking hypervisor capabilities. 31 pub trait Hypervisor: Send { 32 /// Makes a shallow clone of this `Hypervisor`. try_clone(&self) -> Result<Self> where Self: Sized33 fn try_clone(&self) -> Result<Self> 34 where 35 Self: Sized; 36 37 /// Checks if a particular `HypervisorCap` is available. check_capability(&self, cap: &HypervisorCap) -> bool38 fn check_capability(&self, cap: &HypervisorCap) -> bool; 39 } 40 41 /// A wrapper for using a VM and getting/setting its state. 42 pub trait Vm: Send { 43 /// Makes a shallow clone of this `Vm`. try_clone(&self) -> Result<Self> where Self: Sized44 fn try_clone(&self) -> Result<Self> 45 where 46 Self: Sized; 47 48 /// Checks if a particular `VmCap` is available. 49 /// 50 /// This is distinct from the `Hypervisor` version of this method because some extensions depend 51 /// on the particular `Vm` instance. This method is encouraged because it more accurately 52 /// reflects the usable capabilities. check_capability(&self, c: VmCap) -> bool53 fn check_capability(&self, c: VmCap) -> bool; 54 55 /// Gets the guest-mapped memory for the Vm. get_memory(&self) -> &GuestMemory56 fn get_memory(&self) -> &GuestMemory; 57 58 /// Inserts the given `MappedRegion` into the VM's address space at `guest_addr`. 59 /// 60 /// The slot that was assigned the memory mapping is returned on success. The slot can be given 61 /// to `Vm::remove_memory_region` to remove the memory from the VM's address space and take back 62 /// ownership of `mem_region`. 63 /// 64 /// Note that memory inserted into the VM's address space must not overlap with any other memory 65 /// slot's region. 66 /// 67 /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to 68 /// write will trigger a mmio VM exit, leaving the memory untouched. 69 /// 70 /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to 71 /// by the guest with `get_dirty_log`. add_memory_region( &mut self, guest_addr: GuestAddress, mem_region: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<MemSlot>72 fn add_memory_region( 73 &mut self, 74 guest_addr: GuestAddress, 75 mem_region: Box<dyn MappedRegion>, 76 read_only: bool, 77 log_dirty_pages: bool, 78 ) -> Result<MemSlot>; 79 80 /// Does a synchronous msync of the memory mapped at `slot`, syncing `size` bytes starting at 81 /// `offset` from the start of the region. `offset` must be page aligned. msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>82 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>; 83 84 /// Removes and drops the `UserMemoryRegion` that was previously added at the given slot. remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>85 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>; 86 87 /// Creates an emulated device. create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>88 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>; 89 90 /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at 91 /// `slot`. Only works on VMs that support `VmCap::DirtyLog`. 92 /// 93 /// The size of `dirty_log` must be at least as many bits as there are pages in the memory 94 /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must 95 /// be 2 bytes or greater. get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>96 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>; 97 98 /// Registers an event to be signaled whenever a certain address is written to. 99 /// 100 /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the 101 /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important 102 /// and must match the expected size of the guest's write. 103 /// 104 /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be 105 /// triggered is prevented. register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>106 fn register_ioevent( 107 &mut self, 108 evt: &Event, 109 addr: IoEventAddress, 110 datamatch: Datamatch, 111 ) -> Result<()>; 112 113 /// Unregisters an event previously registered with `register_ioevent`. 114 /// 115 /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into 116 /// `register_ioevent`. unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>117 fn unregister_ioevent( 118 &mut self, 119 evt: &Event, 120 addr: IoEventAddress, 121 datamatch: Datamatch, 122 ) -> Result<()>; 123 124 /// Trigger any matching registered io events based on an MMIO or PIO write at `addr`. The 125 /// `data` slice represents the contents and length of the write, which is used to compare with 126 /// the registered io events' Datamatch values. If the hypervisor does in-kernel IO event 127 /// delivery, this is a no-op. handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>128 fn handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>; 129 130 /// Retrieves the current timestamp of the paravirtual clock as seen by the current guest. 131 /// Only works on VMs that support `VmCap::PvClock`. get_pvclock(&self) -> Result<ClockState>132 fn get_pvclock(&self) -> Result<ClockState>; 133 134 /// Sets the current timestamp of the paravirtual clock as seen by the current guest. 135 /// Only works on VMs that support `VmCap::PvClock`. set_pvclock(&self, state: &ClockState) -> Result<()>136 fn set_pvclock(&self, state: &ClockState) -> Result<()>; 137 138 /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd` 139 /// at `offset` bytes from the start of the arena with `prot` protections. 140 /// `offset` must be page aligned. 141 /// 142 /// # Arguments 143 /// * `offset` - Page aligned offset into the arena in bytes. 144 /// * `size` - Size of memory region in bytes. 145 /// * `fd` - File descriptor to mmap from. 146 /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap. 147 /// * `prot` - Protection (e.g. readable/writable) of the memory region. add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawFd, fd_offset: u64, prot: Protection, ) -> Result<()>148 fn add_fd_mapping( 149 &mut self, 150 slot: u32, 151 offset: usize, 152 size: usize, 153 fd: &dyn AsRawFd, 154 fd_offset: u64, 155 prot: Protection, 156 ) -> Result<()>; 157 158 /// Remove `size`-byte mapping starting at `offset`. remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>159 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>; 160 } 161 162 /// A unique fingerprint for a particular `VcpuRunHandle`, used in `Vcpu` impls to ensure the 163 /// `VcpuRunHandle ` they receive is the same one that was returned from `take_run_handle`. 164 #[derive(Clone, PartialEq, Eq)] 165 pub struct VcpuRunHandleFingerprint(u64); 166 167 impl VcpuRunHandleFingerprint { as_u64(&self) -> u64168 pub fn as_u64(&self) -> u64 { 169 self.0 170 } 171 } 172 173 /// A handle returned by a `Vcpu` to be used with `Vcpu::run` to execute a virtual machine's VCPU. 174 /// 175 /// This is used to ensure that the caller has bound the `Vcpu` to a thread with 176 /// `Vcpu::take_run_handle` and to execute hypervisor specific cleanup routines when dropped. 177 pub struct VcpuRunHandle { 178 drop_fn: fn(), 179 fingerprint: VcpuRunHandleFingerprint, 180 // Prevents Send+Sync for this type. 181 phantom: std::marker::PhantomData<*mut ()>, 182 } 183 184 impl VcpuRunHandle { 185 /// Used by `Vcpu` impls to create a unique run handle, that when dropped, will call the given 186 /// `drop_fn`. new(drop_fn: fn()) -> Self187 pub fn new(drop_fn: fn()) -> Self { 188 // Creates a probably unique number with a hash of the current thread id and epoch time. 189 use std::hash::{Hash, Hasher}; 190 let mut hasher = std::collections::hash_map::DefaultHasher::new(); 191 std::time::Instant::now().hash(&mut hasher); 192 std::thread::current().id().hash(&mut hasher); 193 Self { 194 drop_fn, 195 fingerprint: VcpuRunHandleFingerprint(hasher.finish()), 196 phantom: std::marker::PhantomData, 197 } 198 } 199 200 /// Gets the unique fingerprint which may be copied and compared freely. fingerprint(&self) -> &VcpuRunHandleFingerprint201 pub fn fingerprint(&self) -> &VcpuRunHandleFingerprint { 202 &self.fingerprint 203 } 204 } 205 206 impl Drop for VcpuRunHandle { drop(&mut self)207 fn drop(&mut self) { 208 (self.drop_fn)(); 209 } 210 } 211 212 /// A virtual CPU holding a virtualized hardware thread's state, such as registers and interrupt 213 /// state, which may be used to execute virtual machines. 214 /// 215 /// To run, `take_run_handle` must be called to lock the vcpu to a thread. Then the returned 216 /// `VcpuRunHandle` can be used for running. 217 pub trait Vcpu: downcast_rs::DowncastSync { 218 /// Makes a shallow clone of this `Vcpu`. try_clone(&self) -> Result<Self> where Self: Sized219 fn try_clone(&self) -> Result<Self> 220 where 221 Self: Sized; 222 223 /// Casts this architecture specific trait object to the base trait object `Vcpu`. as_vcpu(&self) -> &dyn Vcpu224 fn as_vcpu(&self) -> &dyn Vcpu; 225 226 /// Returns a unique `VcpuRunHandle`. A `VcpuRunHandle` is required to run the guest. 227 /// 228 /// Assigns a vcpu to the current thread so that signal handlers can call 229 /// set_local_immediate_exit(). An optional signal number will be temporarily blocked while 230 /// assigning the vcpu to the thread and later blocked when `VcpuRunHandle` is destroyed. 231 /// 232 /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu. take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>233 fn take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>; 234 235 /// Runs the VCPU until it exits, returning the reason for the exit. 236 /// 237 /// Note that the state of the VCPU and associated VM must be setup first for this to do 238 /// anything useful. The given `run_handle` must be the same as the one returned by 239 /// `take_run_handle` for this `Vcpu`. run(&self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>240 fn run(&self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>; 241 242 /// Returns the vcpu id. id(&self) -> usize243 fn id(&self) -> usize; 244 245 /// Sets the bit that requests an immediate exit. set_immediate_exit(&self, exit: bool)246 fn set_immediate_exit(&self, exit: bool); 247 248 /// Sets/clears the bit for immediate exit for the vcpu on the current thread. set_local_immediate_exit(exit: bool) where Self: Sized249 fn set_local_immediate_exit(exit: bool) 250 where 251 Self: Sized; 252 253 /// Returns a function pointer that invokes `set_local_immediate_exit` in a 254 /// signal-safe way when called. set_local_immediate_exit_fn(&self) -> extern "C" fn()255 fn set_local_immediate_exit_fn(&self) -> extern "C" fn(); 256 257 /// Sets the data received by a mmio read, ioport in, or hypercall instruction. 258 /// 259 /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`, 260 /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`. set_data(&self, data: &[u8]) -> Result<()>261 fn set_data(&self, data: &[u8]) -> Result<()>; 262 263 /// Signals to the hypervisor that this guest is being paused by userspace. Only works on Vms 264 /// that support `VmCap::PvClockSuspend`. pvclock_ctrl(&self) -> Result<()>265 fn pvclock_ctrl(&self) -> Result<()>; 266 267 /// Specifies set of signals that are blocked during execution of `RunnableVcpu::run`. Signals 268 /// that are not blocked will cause run to return with `VcpuExit::Intr`. Only works on Vms that 269 /// support `VmCap::SignalMask`. set_signal_mask(&self, signals: &[c_int]) -> Result<()>270 fn set_signal_mask(&self, signals: &[c_int]) -> Result<()>; 271 272 /// Enables a hypervisor-specific extension on this Vcpu. `cap` is a constant defined by the 273 /// hypervisor API (e.g., kvm.h). `args` are the arguments for enabling the feature, if any. 274 /// 275 /// # Safety 276 /// This function is marked as unsafe because `args` may be interpreted as pointers for some 277 /// capabilities. The caller must ensure that any pointers passed in the `args` array are 278 /// allocated as the kernel expects, and that mutable pointers are owned. enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>279 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>; 280 } 281 282 downcast_rs::impl_downcast!(sync Vcpu); 283 284 /// An address either in programmable I/O space or in memory mapped I/O space. 285 #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, std::hash::Hash)] 286 pub enum IoEventAddress { 287 Pio(u64), 288 Mmio(u64), 289 } 290 291 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match. 292 #[derive(PartialEq, Eq)] 293 pub enum Datamatch { 294 AnyLength, 295 U8(Option<u8>), 296 U16(Option<u16>), 297 U32(Option<u32>), 298 U64(Option<u64>), 299 } 300 301 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called. 302 #[derive(Debug)] 303 pub enum VcpuExit { 304 /// An out port instruction was run on the given port with the given data. 305 IoOut { 306 port: u16, 307 size: usize, 308 data: [u8; 8], 309 }, 310 /// An in port instruction was run on the given port. 311 /// 312 /// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is 313 /// called again. 314 IoIn { 315 port: u16, 316 size: usize, 317 }, 318 /// A read instruction was run against the given MMIO address. 319 /// 320 /// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is 321 /// called again. 322 MmioRead { 323 address: u64, 324 size: usize, 325 }, 326 /// A write instruction was run against the given MMIO address with the given data. 327 MmioWrite { 328 address: u64, 329 size: usize, 330 data: [u8; 8], 331 }, 332 IoapicEoi { 333 vector: u8, 334 }, 335 HypervSynic { 336 msr: u32, 337 control: u64, 338 evt_page: u64, 339 msg_page: u64, 340 }, 341 HypervHcall { 342 input: u64, 343 params: [u64; 2], 344 }, 345 Unknown, 346 Exception, 347 Hypercall, 348 Debug, 349 Hlt, 350 IrqWindowOpen, 351 Shutdown, 352 FailEntry { 353 hardware_entry_failure_reason: u64, 354 }, 355 Intr, 356 SetTpr, 357 TprAccess, 358 S390Sieic, 359 S390Reset, 360 Dcr, 361 Nmi, 362 InternalError, 363 Osi, 364 PaprHcall, 365 S390Ucontrol, 366 Watchdog, 367 S390Tsch, 368 Epr, 369 /// The cpu triggered a system level event which is specified by the type field. 370 /// The first field is the event type and the second field is flags. 371 /// The possible event types are shutdown, reset, or crash. So far there 372 /// are not any flags defined. 373 SystemEvent(u32 /* event_type */, u64 /* flags */), 374 } 375 376 /// A device type to create with `Vm.create_device`. 377 #[derive(Clone, Copy, Debug, PartialEq)] 378 pub enum DeviceKind { 379 /// VFIO device for direct access to devices from userspace 380 Vfio, 381 /// ARM virtual general interrupt controller v2 382 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 383 ArmVgicV2, 384 /// ARM virtual general interrupt controller v3 385 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 386 ArmVgicV3, 387 } 388 389 /// The source chip of an `IrqSource` 390 #[repr(C)] 391 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 392 pub enum IrqSourceChip { 393 PicPrimary, 394 PicSecondary, 395 Ioapic, 396 Gic, 397 } 398 399 /// A source of IRQs in an `IrqRoute`. 400 #[repr(C)] 401 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 402 pub enum IrqSource { 403 Irqchip { chip: IrqSourceChip, pin: u32 }, 404 Msi { address: u64, data: u32 }, 405 } 406 407 /// A single route for an IRQ. 408 #[repr(C)] 409 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 410 pub struct IrqRoute { 411 pub gsi: u32, 412 pub source: IrqSource, 413 } 414 415 /// The state of the paravirtual clock. 416 #[derive(Debug, Default, Copy, Clone)] 417 pub struct ClockState { 418 /// Current pv clock timestamp, as seen by the guest 419 pub clock: u64, 420 /// Hypervisor-specific feature flags for the pv clock 421 pub flags: u32, 422 } 423 424 /// The MPState represents the state of a processor. 425 #[repr(C)] 426 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 427 pub enum MPState { 428 /// the vcpu is currently running (x86/x86_64,arm/arm64) 429 Runnable, 430 /// the vcpu is an application processor (AP) which has not yet received an INIT signal 431 /// (x86/x86_64) 432 Uninitialized, 433 /// the vcpu has received an INIT signal, and is now ready for a SIPI (x86/x86_64) 434 InitReceived, 435 /// the vcpu has executed a HLT instruction and is waiting for an interrupt (x86/x86_64) 436 Halted, 437 /// the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) (x86/x86_64) 438 SipiReceived, 439 /// the vcpu is stopped (arm/arm64) 440 Stopped, 441 } 442