• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! A crate for abstracting the underlying kernel hypervisor used in crosvm.
6 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
7 pub mod aarch64;
8 pub mod caps;
9 pub mod kvm;
10 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
11 pub mod x86_64;
12 
13 use std::os::raw::c_int;
14 
15 use serde::{Deserialize, Serialize};
16 
17 use base::{AsRawDescriptor, Event, MappedRegion, Protection, Result, SafeDescriptor};
18 use vm_memory::{GuestAddress, GuestMemory};
19 
20 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
21 pub use crate::aarch64::*;
22 pub use crate::caps::*;
23 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
24 pub use crate::x86_64::*;
25 
26 /// An index in the list of guest-mapped memory regions.
27 pub type MemSlot = u32;
28 
29 /// A trait for checking hypervisor capabilities.
30 pub trait Hypervisor: Send {
31     /// Makes a shallow clone of this `Hypervisor`.
try_clone(&self) -> Result<Self> where Self: Sized32     fn try_clone(&self) -> Result<Self>
33     where
34         Self: Sized;
35 
36     /// Checks if a particular `HypervisorCap` is available.
check_capability(&self, cap: HypervisorCap) -> bool37     fn check_capability(&self, cap: HypervisorCap) -> bool;
38 }
39 
40 /// A wrapper for using a VM and getting/setting its state.
41 pub trait Vm: Send {
42     /// Makes a shallow clone of this `Vm`.
try_clone(&self) -> Result<Self> where Self: Sized43     fn try_clone(&self) -> Result<Self>
44     where
45         Self: Sized;
46 
47     /// Checks if a particular `VmCap` is available.
48     ///
49     /// This is distinct from the `Hypervisor` version of this method because some extensions depend
50     /// on the particular `Vm` instance. This method is encouraged because it more accurately
51     /// reflects the usable capabilities.
check_capability(&self, c: VmCap) -> bool52     fn check_capability(&self, c: VmCap) -> bool;
53 
54     /// Get the guest physical address size in bits.
get_guest_phys_addr_bits(&self) -> u855     fn get_guest_phys_addr_bits(&self) -> u8;
56 
57     /// Gets the guest-mapped memory for the Vm.
get_memory(&self) -> &GuestMemory58     fn get_memory(&self) -> &GuestMemory;
59 
60     /// Inserts the given `MappedRegion` into the VM's address space at `guest_addr`.
61     ///
62     /// The slot that was assigned the memory mapping is returned on success.  The slot can be given
63     /// to `Vm::remove_memory_region` to remove the memory from the VM's address space and take back
64     /// ownership of `mem_region`.
65     ///
66     /// Note that memory inserted into the VM's address space must not overlap with any other memory
67     /// slot's region.
68     ///
69     /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
70     /// write will trigger a mmio VM exit, leaving the memory untouched.
71     ///
72     /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
73     /// by the guest with `get_dirty_log`.
add_memory_region( &mut self, guest_addr: GuestAddress, mem_region: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<MemSlot>74     fn add_memory_region(
75         &mut self,
76         guest_addr: GuestAddress,
77         mem_region: Box<dyn MappedRegion>,
78         read_only: bool,
79         log_dirty_pages: bool,
80     ) -> Result<MemSlot>;
81 
82     /// Does a synchronous msync of the memory mapped at `slot`, syncing `size` bytes starting at
83     /// `offset` from the start of the region.  `offset` must be page aligned.
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>84     fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>;
85 
86     /// Removes and drops the `UserMemoryRegion` that was previously added at the given slot.
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>87     fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>;
88 
89     /// Creates an emulated device.
create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>90     fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>;
91 
92     /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
93     /// `slot`.  Only works on VMs that support `VmCap::DirtyLog`.
94     ///
95     /// The size of `dirty_log` must be at least as many bits as there are pages in the memory
96     /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
97     /// be 2 bytes or greater.
get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>98     fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>;
99 
100     /// Registers an event to be signaled whenever a certain address is written to.
101     ///
102     /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
103     /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
104     /// and must match the expected size of the guest's write.
105     ///
106     /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
107     /// triggered is prevented.
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>108     fn register_ioevent(
109         &mut self,
110         evt: &Event,
111         addr: IoEventAddress,
112         datamatch: Datamatch,
113     ) -> Result<()>;
114 
115     /// Unregisters an event previously registered with `register_ioevent`.
116     ///
117     /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
118     /// `register_ioevent`.
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>119     fn unregister_ioevent(
120         &mut self,
121         evt: &Event,
122         addr: IoEventAddress,
123         datamatch: Datamatch,
124     ) -> Result<()>;
125 
126     /// Trigger any matching registered io events based on an MMIO or PIO write at `addr`. The
127     /// `data` slice represents the contents and length of the write, which is used to compare with
128     /// the registered io events' Datamatch values. If the hypervisor does in-kernel IO event
129     /// delivery, this is a no-op.
handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>130     fn handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>;
131 
132     /// Retrieves the current timestamp of the paravirtual clock as seen by the current guest.
133     /// Only works on VMs that support `VmCap::PvClock`.
get_pvclock(&self) -> Result<ClockState>134     fn get_pvclock(&self) -> Result<ClockState>;
135 
136     /// Sets the current timestamp of the paravirtual clock as seen by the current guest.
137     /// Only works on VMs that support `VmCap::PvClock`.
set_pvclock(&self, state: &ClockState) -> Result<()>138     fn set_pvclock(&self, state: &ClockState) -> Result<()>;
139 
140     /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
141     /// at `offset` bytes from the start of the arena with `prot` protections.
142     /// `offset` must be page aligned.
143     ///
144     /// # Arguments
145     /// * `offset` - Page aligned offset into the arena in bytes.
146     /// * `size` - Size of memory region in bytes.
147     /// * `fd` - File descriptor to mmap from.
148     /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
149     /// * `prot` - Protection (e.g. readable/writable) of the memory region.
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>150     fn add_fd_mapping(
151         &mut self,
152         slot: u32,
153         offset: usize,
154         size: usize,
155         fd: &dyn AsRawDescriptor,
156         fd_offset: u64,
157         prot: Protection,
158     ) -> Result<()>;
159 
160     /// Remove `size`-byte mapping starting at `offset`.
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>161     fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>;
162 }
163 
164 /// A unique fingerprint for a particular `VcpuRunHandle`, used in `Vcpu` impls to ensure the
165 /// `VcpuRunHandle ` they receive is the same one that was returned from `take_run_handle`.
166 #[derive(Clone, PartialEq, Eq)]
167 pub struct VcpuRunHandleFingerprint(u64);
168 
169 impl VcpuRunHandleFingerprint {
as_u64(&self) -> u64170     pub fn as_u64(&self) -> u64 {
171         self.0
172     }
173 }
174 
175 /// A handle returned by a `Vcpu` to be used with `Vcpu::run` to execute a virtual machine's VCPU.
176 ///
177 /// This is used to ensure that the caller has bound the `Vcpu` to a thread with
178 /// `Vcpu::take_run_handle` and to execute hypervisor specific cleanup routines when dropped.
179 pub struct VcpuRunHandle {
180     drop_fn: fn(),
181     fingerprint: VcpuRunHandleFingerprint,
182     // Prevents Send+Sync for this type.
183     phantom: std::marker::PhantomData<*mut ()>,
184 }
185 
186 impl VcpuRunHandle {
187     /// Used by `Vcpu` impls to create a unique run handle, that when dropped, will call the given
188     /// `drop_fn`.
new(drop_fn: fn()) -> Self189     pub fn new(drop_fn: fn()) -> Self {
190         // Creates a probably unique number with a hash of the current thread id and epoch time.
191         use std::hash::{Hash, Hasher};
192         let mut hasher = std::collections::hash_map::DefaultHasher::new();
193         std::time::Instant::now().hash(&mut hasher);
194         std::thread::current().id().hash(&mut hasher);
195         Self {
196             drop_fn,
197             fingerprint: VcpuRunHandleFingerprint(hasher.finish()),
198             phantom: std::marker::PhantomData,
199         }
200     }
201 
202     /// Gets the unique fingerprint which may be copied and compared freely.
fingerprint(&self) -> &VcpuRunHandleFingerprint203     pub fn fingerprint(&self) -> &VcpuRunHandleFingerprint {
204         &self.fingerprint
205     }
206 }
207 
208 impl Drop for VcpuRunHandle {
drop(&mut self)209     fn drop(&mut self) {
210         (self.drop_fn)();
211     }
212 }
213 
214 /// A virtual CPU holding a virtualized hardware thread's state, such as registers and interrupt
215 /// state, which may be used to execute virtual machines.
216 ///
217 /// To run, `take_run_handle` must be called to lock the vcpu to a thread. Then the returned
218 /// `VcpuRunHandle` can be used for running.
219 pub trait Vcpu: downcast_rs::DowncastSync {
220     /// Makes a shallow clone of this `Vcpu`.
try_clone(&self) -> Result<Self> where Self: Sized221     fn try_clone(&self) -> Result<Self>
222     where
223         Self: Sized;
224 
225     /// Casts this architecture specific trait object to the base trait object `Vcpu`.
as_vcpu(&self) -> &dyn Vcpu226     fn as_vcpu(&self) -> &dyn Vcpu;
227 
228     /// Returns a unique `VcpuRunHandle`. A `VcpuRunHandle` is required to run the guest.
229     ///
230     /// Assigns a vcpu to the current thread so that signal handlers can call
231     /// set_local_immediate_exit().  An optional signal number will be temporarily blocked while
232     /// assigning the vcpu to the thread and later blocked when `VcpuRunHandle` is destroyed.
233     ///
234     /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>235     fn take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>;
236 
237     /// Runs the VCPU until it exits, returning the reason for the exit.
238     ///
239     /// Note that the state of the VCPU and associated VM must be setup first for this to do
240     /// anything useful. The given `run_handle` must be the same as the one returned by
241     /// `take_run_handle` for this `Vcpu`.
run(&self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>242     fn run(&self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>;
243 
244     /// Returns the vcpu id.
id(&self) -> usize245     fn id(&self) -> usize;
246 
247     /// Sets the bit that requests an immediate exit.
set_immediate_exit(&self, exit: bool)248     fn set_immediate_exit(&self, exit: bool);
249 
250     /// Sets/clears the bit for immediate exit for the vcpu on the current thread.
set_local_immediate_exit(exit: bool) where Self: Sized251     fn set_local_immediate_exit(exit: bool)
252     where
253         Self: Sized;
254 
255     /// Returns a function pointer that invokes `set_local_immediate_exit` in a
256     /// signal-safe way when called.
set_local_immediate_exit_fn(&self) -> extern "C" fn()257     fn set_local_immediate_exit_fn(&self) -> extern "C" fn();
258 
259     /// Sets the data received by a mmio read, ioport in, or hypercall instruction.
260     ///
261     /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
262     /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
set_data(&self, data: &[u8]) -> Result<()>263     fn set_data(&self, data: &[u8]) -> Result<()>;
264 
265     /// Signals to the hypervisor that this guest is being paused by userspace.  Only works on Vms
266     /// that support `VmCap::PvClockSuspend`.
pvclock_ctrl(&self) -> Result<()>267     fn pvclock_ctrl(&self) -> Result<()>;
268 
269     /// Specifies set of signals that are blocked during execution of `RunnableVcpu::run`.  Signals
270     /// that are not blocked will cause run to return with `VcpuExit::Intr`.  Only works on Vms that
271     /// support `VmCap::SignalMask`.
set_signal_mask(&self, signals: &[c_int]) -> Result<()>272     fn set_signal_mask(&self, signals: &[c_int]) -> Result<()>;
273 
274     /// Enables a hypervisor-specific extension on this Vcpu.  `cap` is a constant defined by the
275     /// hypervisor API (e.g., kvm.h).  `args` are the arguments for enabling the feature, if any.
276     ///
277     /// # Safety
278     /// This function is marked as unsafe because `args` may be interpreted as pointers for some
279     /// capabilities. The caller must ensure that any pointers passed in the `args` array are
280     /// allocated as the kernel expects, and that mutable pointers are owned.
enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>281     unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>;
282 }
283 
284 downcast_rs::impl_downcast!(sync Vcpu);
285 
286 /// An address either in programmable I/O space or in memory mapped I/O space.
287 #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, std::hash::Hash)]
288 pub enum IoEventAddress {
289     Pio(u64),
290     Mmio(u64),
291 }
292 
293 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
294 #[derive(PartialEq, Eq)]
295 pub enum Datamatch {
296     AnyLength,
297     U8(Option<u8>),
298     U16(Option<u16>),
299     U32(Option<u32>),
300     U64(Option<u64>),
301 }
302 
303 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
304 #[derive(Debug)]
305 pub enum VcpuExit {
306     /// An out port instruction was run on the given port with the given data.
307     IoOut {
308         port: u16,
309         size: usize,
310         data: [u8; 8],
311     },
312     /// An in port instruction was run on the given port.
313     ///
314     /// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is
315     /// called again.
316     IoIn {
317         port: u16,
318         size: usize,
319     },
320     /// A read instruction was run against the given MMIO address.
321     ///
322     /// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is
323     /// called again.
324     MmioRead {
325         address: u64,
326         size: usize,
327     },
328     /// A write instruction was run against the given MMIO address with the given data.
329     MmioWrite {
330         address: u64,
331         size: usize,
332         data: [u8; 8],
333     },
334     IoapicEoi {
335         vector: u8,
336     },
337     HypervSynic {
338         msr: u32,
339         control: u64,
340         evt_page: u64,
341         msg_page: u64,
342     },
343     HypervHcall {
344         input: u64,
345         params: [u64; 2],
346     },
347     Unknown,
348     Exception,
349     Hypercall,
350     Debug,
351     Hlt,
352     IrqWindowOpen,
353     Shutdown,
354     FailEntry {
355         hardware_entry_failure_reason: u64,
356     },
357     Intr,
358     SetTpr,
359     TprAccess,
360     S390Sieic,
361     S390Reset,
362     Dcr,
363     Nmi,
364     InternalError,
365     Osi,
366     PaprHcall,
367     S390Ucontrol,
368     Watchdog,
369     S390Tsch,
370     Epr,
371     SystemEventShutdown,
372     SystemEventReset,
373     SystemEventCrash,
374     SystemEventS2Idle,
375     RdMsr {
376         index: u32,
377     },
378     WrMsr {
379         index: u32,
380         data: u64,
381     },
382 }
383 
384 /// A device type to create with `Vm.create_device`.
385 #[derive(Clone, Copy, Debug, PartialEq)]
386 pub enum DeviceKind {
387     /// VFIO device for direct access to devices from userspace
388     Vfio,
389     /// ARM virtual general interrupt controller v2
390     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
391     ArmVgicV2,
392     /// ARM virtual general interrupt controller v3
393     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
394     ArmVgicV3,
395 }
396 
397 /// The source chip of an `IrqSource`
398 #[repr(C)]
399 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
400 pub enum IrqSourceChip {
401     PicPrimary,
402     PicSecondary,
403     Ioapic,
404     Gic,
405 }
406 
407 /// A source of IRQs in an `IrqRoute`.
408 #[repr(C)]
409 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
410 pub enum IrqSource {
411     Irqchip { chip: IrqSourceChip, pin: u32 },
412     Msi { address: u64, data: u32 },
413 }
414 
415 /// A single route for an IRQ.
416 #[repr(C)]
417 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
418 pub struct IrqRoute {
419     pub gsi: u32,
420     pub source: IrqSource,
421 }
422 
423 /// The state of the paravirtual clock.
424 #[derive(Debug, Default, Copy, Clone)]
425 pub struct ClockState {
426     /// Current pv clock timestamp, as seen by the guest
427     pub clock: u64,
428     /// Hypervisor-specific feature flags for the pv clock
429     pub flags: u32,
430 }
431 
432 /// The MPState represents the state of a processor.
433 #[repr(C)]
434 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
435 pub enum MPState {
436     /// the vcpu is currently running (x86/x86_64,arm/arm64)
437     Runnable,
438     /// the vcpu is an application processor (AP) which has not yet received an INIT signal
439     /// (x86/x86_64)
440     Uninitialized,
441     /// the vcpu has received an INIT signal, and is now ready for a SIPI (x86/x86_64)
442     InitReceived,
443     /// the vcpu has executed a HLT instruction and is waiting for an interrupt (x86/x86_64)
444     Halted,
445     /// the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) (x86/x86_64)
446     SipiReceived,
447     /// the vcpu is stopped (arm/arm64)
448     Stopped,
449 }
450 
451 /// Whether the VM should be run in protected mode or not.
452 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
453 pub enum ProtectionType {
454     /// The VM should be run in the unprotected mode, where the host has access to its memory.
455     Unprotected,
456     /// The VM should be run in protected mode, so the host cannot access its memory directly. It
457     /// should be booted via the protected VM firmware, so that it can access its secrets.
458     Protected,
459     /// The VM should be run in protected mode, but booted directly without pVM firmware. The host
460     /// will still be unable to access the VM memory, but it won't be given any secrets.
461     ProtectedWithoutFirmware,
462 }
463