1 // Copyright 2020 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 //! A crate for abstracting the underlying kernel hypervisor used in crosvm. 6 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 7 pub mod aarch64; 8 pub mod caps; 9 pub mod kvm; 10 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 11 pub mod x86_64; 12 13 use std::os::raw::c_int; 14 15 use serde::{Deserialize, Serialize}; 16 17 use base::{AsRawDescriptor, Event, MappedRegion, Protection, Result, SafeDescriptor}; 18 use vm_memory::{GuestAddress, GuestMemory}; 19 20 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 21 pub use crate::aarch64::*; 22 pub use crate::caps::*; 23 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 24 pub use crate::x86_64::*; 25 26 /// An index in the list of guest-mapped memory regions. 27 pub type MemSlot = u32; 28 29 /// A trait for checking hypervisor capabilities. 30 pub trait Hypervisor: Send { 31 /// Makes a shallow clone of this `Hypervisor`. try_clone(&self) -> Result<Self> where Self: Sized32 fn try_clone(&self) -> Result<Self> 33 where 34 Self: Sized; 35 36 /// Checks if a particular `HypervisorCap` is available. check_capability(&self, cap: HypervisorCap) -> bool37 fn check_capability(&self, cap: HypervisorCap) -> bool; 38 } 39 40 /// A wrapper for using a VM and getting/setting its state. 41 pub trait Vm: Send { 42 /// Makes a shallow clone of this `Vm`. try_clone(&self) -> Result<Self> where Self: Sized43 fn try_clone(&self) -> Result<Self> 44 where 45 Self: Sized; 46 47 /// Checks if a particular `VmCap` is available. 48 /// 49 /// This is distinct from the `Hypervisor` version of this method because some extensions depend 50 /// on the particular `Vm` instance. This method is encouraged because it more accurately 51 /// reflects the usable capabilities. check_capability(&self, c: VmCap) -> bool52 fn check_capability(&self, c: VmCap) -> bool; 53 54 /// Get the guest physical address size in bits. get_guest_phys_addr_bits(&self) -> u855 fn get_guest_phys_addr_bits(&self) -> u8; 56 57 /// Gets the guest-mapped memory for the Vm. get_memory(&self) -> &GuestMemory58 fn get_memory(&self) -> &GuestMemory; 59 60 /// Inserts the given `MappedRegion` into the VM's address space at `guest_addr`. 61 /// 62 /// The slot that was assigned the memory mapping is returned on success. The slot can be given 63 /// to `Vm::remove_memory_region` to remove the memory from the VM's address space and take back 64 /// ownership of `mem_region`. 65 /// 66 /// Note that memory inserted into the VM's address space must not overlap with any other memory 67 /// slot's region. 68 /// 69 /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to 70 /// write will trigger a mmio VM exit, leaving the memory untouched. 71 /// 72 /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to 73 /// by the guest with `get_dirty_log`. add_memory_region( &mut self, guest_addr: GuestAddress, mem_region: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<MemSlot>74 fn add_memory_region( 75 &mut self, 76 guest_addr: GuestAddress, 77 mem_region: Box<dyn MappedRegion>, 78 read_only: bool, 79 log_dirty_pages: bool, 80 ) -> Result<MemSlot>; 81 82 /// Does a synchronous msync of the memory mapped at `slot`, syncing `size` bytes starting at 83 /// `offset` from the start of the region. `offset` must be page aligned. msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>84 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>; 85 86 /// Removes and drops the `UserMemoryRegion` that was previously added at the given slot. remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>87 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>; 88 89 /// Creates an emulated device. create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>90 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>; 91 92 /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at 93 /// `slot`. Only works on VMs that support `VmCap::DirtyLog`. 94 /// 95 /// The size of `dirty_log` must be at least as many bits as there are pages in the memory 96 /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must 97 /// be 2 bytes or greater. get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>98 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>; 99 100 /// Registers an event to be signaled whenever a certain address is written to. 101 /// 102 /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the 103 /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important 104 /// and must match the expected size of the guest's write. 105 /// 106 /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be 107 /// triggered is prevented. register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>108 fn register_ioevent( 109 &mut self, 110 evt: &Event, 111 addr: IoEventAddress, 112 datamatch: Datamatch, 113 ) -> Result<()>; 114 115 /// Unregisters an event previously registered with `register_ioevent`. 116 /// 117 /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into 118 /// `register_ioevent`. unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>119 fn unregister_ioevent( 120 &mut self, 121 evt: &Event, 122 addr: IoEventAddress, 123 datamatch: Datamatch, 124 ) -> Result<()>; 125 126 /// Trigger any matching registered io events based on an MMIO or PIO write at `addr`. The 127 /// `data` slice represents the contents and length of the write, which is used to compare with 128 /// the registered io events' Datamatch values. If the hypervisor does in-kernel IO event 129 /// delivery, this is a no-op. handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>130 fn handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>; 131 132 /// Retrieves the current timestamp of the paravirtual clock as seen by the current guest. 133 /// Only works on VMs that support `VmCap::PvClock`. get_pvclock(&self) -> Result<ClockState>134 fn get_pvclock(&self) -> Result<ClockState>; 135 136 /// Sets the current timestamp of the paravirtual clock as seen by the current guest. 137 /// Only works on VMs that support `VmCap::PvClock`. set_pvclock(&self, state: &ClockState) -> Result<()>138 fn set_pvclock(&self, state: &ClockState) -> Result<()>; 139 140 /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd` 141 /// at `offset` bytes from the start of the arena with `prot` protections. 142 /// `offset` must be page aligned. 143 /// 144 /// # Arguments 145 /// * `offset` - Page aligned offset into the arena in bytes. 146 /// * `size` - Size of memory region in bytes. 147 /// * `fd` - File descriptor to mmap from. 148 /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap. 149 /// * `prot` - Protection (e.g. readable/writable) of the memory region. add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>150 fn add_fd_mapping( 151 &mut self, 152 slot: u32, 153 offset: usize, 154 size: usize, 155 fd: &dyn AsRawDescriptor, 156 fd_offset: u64, 157 prot: Protection, 158 ) -> Result<()>; 159 160 /// Remove `size`-byte mapping starting at `offset`. remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>161 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>; 162 } 163 164 /// A unique fingerprint for a particular `VcpuRunHandle`, used in `Vcpu` impls to ensure the 165 /// `VcpuRunHandle ` they receive is the same one that was returned from `take_run_handle`. 166 #[derive(Clone, PartialEq, Eq)] 167 pub struct VcpuRunHandleFingerprint(u64); 168 169 impl VcpuRunHandleFingerprint { as_u64(&self) -> u64170 pub fn as_u64(&self) -> u64 { 171 self.0 172 } 173 } 174 175 /// A handle returned by a `Vcpu` to be used with `Vcpu::run` to execute a virtual machine's VCPU. 176 /// 177 /// This is used to ensure that the caller has bound the `Vcpu` to a thread with 178 /// `Vcpu::take_run_handle` and to execute hypervisor specific cleanup routines when dropped. 179 pub struct VcpuRunHandle { 180 drop_fn: fn(), 181 fingerprint: VcpuRunHandleFingerprint, 182 // Prevents Send+Sync for this type. 183 phantom: std::marker::PhantomData<*mut ()>, 184 } 185 186 impl VcpuRunHandle { 187 /// Used by `Vcpu` impls to create a unique run handle, that when dropped, will call the given 188 /// `drop_fn`. new(drop_fn: fn()) -> Self189 pub fn new(drop_fn: fn()) -> Self { 190 // Creates a probably unique number with a hash of the current thread id and epoch time. 191 use std::hash::{Hash, Hasher}; 192 let mut hasher = std::collections::hash_map::DefaultHasher::new(); 193 std::time::Instant::now().hash(&mut hasher); 194 std::thread::current().id().hash(&mut hasher); 195 Self { 196 drop_fn, 197 fingerprint: VcpuRunHandleFingerprint(hasher.finish()), 198 phantom: std::marker::PhantomData, 199 } 200 } 201 202 /// Gets the unique fingerprint which may be copied and compared freely. fingerprint(&self) -> &VcpuRunHandleFingerprint203 pub fn fingerprint(&self) -> &VcpuRunHandleFingerprint { 204 &self.fingerprint 205 } 206 } 207 208 impl Drop for VcpuRunHandle { drop(&mut self)209 fn drop(&mut self) { 210 (self.drop_fn)(); 211 } 212 } 213 214 /// A virtual CPU holding a virtualized hardware thread's state, such as registers and interrupt 215 /// state, which may be used to execute virtual machines. 216 /// 217 /// To run, `take_run_handle` must be called to lock the vcpu to a thread. Then the returned 218 /// `VcpuRunHandle` can be used for running. 219 pub trait Vcpu: downcast_rs::DowncastSync { 220 /// Makes a shallow clone of this `Vcpu`. try_clone(&self) -> Result<Self> where Self: Sized221 fn try_clone(&self) -> Result<Self> 222 where 223 Self: Sized; 224 225 /// Casts this architecture specific trait object to the base trait object `Vcpu`. as_vcpu(&self) -> &dyn Vcpu226 fn as_vcpu(&self) -> &dyn Vcpu; 227 228 /// Returns a unique `VcpuRunHandle`. A `VcpuRunHandle` is required to run the guest. 229 /// 230 /// Assigns a vcpu to the current thread so that signal handlers can call 231 /// set_local_immediate_exit(). An optional signal number will be temporarily blocked while 232 /// assigning the vcpu to the thread and later blocked when `VcpuRunHandle` is destroyed. 233 /// 234 /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu. take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>235 fn take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>; 236 237 /// Runs the VCPU until it exits, returning the reason for the exit. 238 /// 239 /// Note that the state of the VCPU and associated VM must be setup first for this to do 240 /// anything useful. The given `run_handle` must be the same as the one returned by 241 /// `take_run_handle` for this `Vcpu`. run(&self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>242 fn run(&self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>; 243 244 /// Returns the vcpu id. id(&self) -> usize245 fn id(&self) -> usize; 246 247 /// Sets the bit that requests an immediate exit. set_immediate_exit(&self, exit: bool)248 fn set_immediate_exit(&self, exit: bool); 249 250 /// Sets/clears the bit for immediate exit for the vcpu on the current thread. set_local_immediate_exit(exit: bool) where Self: Sized251 fn set_local_immediate_exit(exit: bool) 252 where 253 Self: Sized; 254 255 /// Returns a function pointer that invokes `set_local_immediate_exit` in a 256 /// signal-safe way when called. set_local_immediate_exit_fn(&self) -> extern "C" fn()257 fn set_local_immediate_exit_fn(&self) -> extern "C" fn(); 258 259 /// Sets the data received by a mmio read, ioport in, or hypercall instruction. 260 /// 261 /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`, 262 /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`. set_data(&self, data: &[u8]) -> Result<()>263 fn set_data(&self, data: &[u8]) -> Result<()>; 264 265 /// Signals to the hypervisor that this guest is being paused by userspace. Only works on Vms 266 /// that support `VmCap::PvClockSuspend`. pvclock_ctrl(&self) -> Result<()>267 fn pvclock_ctrl(&self) -> Result<()>; 268 269 /// Specifies set of signals that are blocked during execution of `RunnableVcpu::run`. Signals 270 /// that are not blocked will cause run to return with `VcpuExit::Intr`. Only works on Vms that 271 /// support `VmCap::SignalMask`. set_signal_mask(&self, signals: &[c_int]) -> Result<()>272 fn set_signal_mask(&self, signals: &[c_int]) -> Result<()>; 273 274 /// Enables a hypervisor-specific extension on this Vcpu. `cap` is a constant defined by the 275 /// hypervisor API (e.g., kvm.h). `args` are the arguments for enabling the feature, if any. 276 /// 277 /// # Safety 278 /// This function is marked as unsafe because `args` may be interpreted as pointers for some 279 /// capabilities. The caller must ensure that any pointers passed in the `args` array are 280 /// allocated as the kernel expects, and that mutable pointers are owned. enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>281 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>; 282 } 283 284 downcast_rs::impl_downcast!(sync Vcpu); 285 286 /// An address either in programmable I/O space or in memory mapped I/O space. 287 #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, std::hash::Hash)] 288 pub enum IoEventAddress { 289 Pio(u64), 290 Mmio(u64), 291 } 292 293 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match. 294 #[derive(PartialEq, Eq)] 295 pub enum Datamatch { 296 AnyLength, 297 U8(Option<u8>), 298 U16(Option<u16>), 299 U32(Option<u32>), 300 U64(Option<u64>), 301 } 302 303 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called. 304 #[derive(Debug)] 305 pub enum VcpuExit { 306 /// An out port instruction was run on the given port with the given data. 307 IoOut { 308 port: u16, 309 size: usize, 310 data: [u8; 8], 311 }, 312 /// An in port instruction was run on the given port. 313 /// 314 /// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is 315 /// called again. 316 IoIn { 317 port: u16, 318 size: usize, 319 }, 320 /// A read instruction was run against the given MMIO address. 321 /// 322 /// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is 323 /// called again. 324 MmioRead { 325 address: u64, 326 size: usize, 327 }, 328 /// A write instruction was run against the given MMIO address with the given data. 329 MmioWrite { 330 address: u64, 331 size: usize, 332 data: [u8; 8], 333 }, 334 IoapicEoi { 335 vector: u8, 336 }, 337 HypervSynic { 338 msr: u32, 339 control: u64, 340 evt_page: u64, 341 msg_page: u64, 342 }, 343 HypervHcall { 344 input: u64, 345 params: [u64; 2], 346 }, 347 Unknown, 348 Exception, 349 Hypercall, 350 Debug, 351 Hlt, 352 IrqWindowOpen, 353 Shutdown, 354 FailEntry { 355 hardware_entry_failure_reason: u64, 356 }, 357 Intr, 358 SetTpr, 359 TprAccess, 360 S390Sieic, 361 S390Reset, 362 Dcr, 363 Nmi, 364 InternalError, 365 Osi, 366 PaprHcall, 367 S390Ucontrol, 368 Watchdog, 369 S390Tsch, 370 Epr, 371 SystemEventShutdown, 372 SystemEventReset, 373 SystemEventCrash, 374 SystemEventS2Idle, 375 RdMsr { 376 index: u32, 377 }, 378 WrMsr { 379 index: u32, 380 data: u64, 381 }, 382 } 383 384 /// A device type to create with `Vm.create_device`. 385 #[derive(Clone, Copy, Debug, PartialEq)] 386 pub enum DeviceKind { 387 /// VFIO device for direct access to devices from userspace 388 Vfio, 389 /// ARM virtual general interrupt controller v2 390 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 391 ArmVgicV2, 392 /// ARM virtual general interrupt controller v3 393 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 394 ArmVgicV3, 395 } 396 397 /// The source chip of an `IrqSource` 398 #[repr(C)] 399 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 400 pub enum IrqSourceChip { 401 PicPrimary, 402 PicSecondary, 403 Ioapic, 404 Gic, 405 } 406 407 /// A source of IRQs in an `IrqRoute`. 408 #[repr(C)] 409 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 410 pub enum IrqSource { 411 Irqchip { chip: IrqSourceChip, pin: u32 }, 412 Msi { address: u64, data: u32 }, 413 } 414 415 /// A single route for an IRQ. 416 #[repr(C)] 417 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 418 pub struct IrqRoute { 419 pub gsi: u32, 420 pub source: IrqSource, 421 } 422 423 /// The state of the paravirtual clock. 424 #[derive(Debug, Default, Copy, Clone)] 425 pub struct ClockState { 426 /// Current pv clock timestamp, as seen by the guest 427 pub clock: u64, 428 /// Hypervisor-specific feature flags for the pv clock 429 pub flags: u32, 430 } 431 432 /// The MPState represents the state of a processor. 433 #[repr(C)] 434 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 435 pub enum MPState { 436 /// the vcpu is currently running (x86/x86_64,arm/arm64) 437 Runnable, 438 /// the vcpu is an application processor (AP) which has not yet received an INIT signal 439 /// (x86/x86_64) 440 Uninitialized, 441 /// the vcpu has received an INIT signal, and is now ready for a SIPI (x86/x86_64) 442 InitReceived, 443 /// the vcpu has executed a HLT instruction and is waiting for an interrupt (x86/x86_64) 444 Halted, 445 /// the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) (x86/x86_64) 446 SipiReceived, 447 /// the vcpu is stopped (arm/arm64) 448 Stopped, 449 } 450 451 /// Whether the VM should be run in protected mode or not. 452 #[derive(Copy, Clone, Debug, Eq, PartialEq)] 453 pub enum ProtectionType { 454 /// The VM should be run in the unprotected mode, where the host has access to its memory. 455 Unprotected, 456 /// The VM should be run in protected mode, so the host cannot access its memory directly. It 457 /// should be booted via the protected VM firmware, so that it can access its secrets. 458 Protected, 459 /// The VM should be run in protected mode, but booted directly without pVM firmware. The host 460 /// will still be unable to access the VM memory, but it won't be given any secrets. 461 ProtectedWithoutFirmware, 462 } 463