• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::arch::x86_64::CpuidResult;
6 #[cfg(any(unix, feature = "haxm", feature = "whpx"))]
7 use std::arch::x86_64::__cpuid;
8 use std::arch::x86_64::_rdtsc;
9 use std::collections::BTreeMap;
10 use std::collections::HashSet;
11 
12 use anyhow::Context;
13 use base::custom_serde::deserialize_seq_to_arr;
14 use base::custom_serde::serialize_arr;
15 use base::error;
16 use base::warn;
17 use base::Result;
18 use bit_field::*;
19 use downcast_rs::impl_downcast;
20 use libc::c_void;
21 use serde::Deserialize;
22 use serde::Serialize;
23 use vm_memory::GuestAddress;
24 
25 use crate::Hypervisor;
26 use crate::IrqRoute;
27 use crate::IrqSource;
28 use crate::IrqSourceChip;
29 use crate::Vcpu;
30 use crate::Vm;
31 
32 const MSR_F15H_PERF_CTL0: u32 = 0xc0010200;
33 const MSR_F15H_PERF_CTL1: u32 = 0xc0010202;
34 const MSR_F15H_PERF_CTL2: u32 = 0xc0010204;
35 const MSR_F15H_PERF_CTL3: u32 = 0xc0010206;
36 const MSR_F15H_PERF_CTL4: u32 = 0xc0010208;
37 const MSR_F15H_PERF_CTL5: u32 = 0xc001020a;
38 const MSR_F15H_PERF_CTR0: u32 = 0xc0010201;
39 const MSR_F15H_PERF_CTR1: u32 = 0xc0010203;
40 const MSR_F15H_PERF_CTR2: u32 = 0xc0010205;
41 const MSR_F15H_PERF_CTR3: u32 = 0xc0010207;
42 const MSR_F15H_PERF_CTR4: u32 = 0xc0010209;
43 const MSR_F15H_PERF_CTR5: u32 = 0xc001020b;
44 const MSR_IA32_PERF_CAPABILITIES: u32 = 0x00000345;
45 
46 /// A trait for managing cpuids for an x86_64 hypervisor and for checking its capabilities.
47 pub trait HypervisorX86_64: Hypervisor {
48     /// Get the system supported CPUID values.
get_supported_cpuid(&self) -> Result<CpuId>49     fn get_supported_cpuid(&self) -> Result<CpuId>;
50 
51     /// Get the system emulated CPUID values.
get_emulated_cpuid(&self) -> Result<CpuId>52     fn get_emulated_cpuid(&self) -> Result<CpuId>;
53 
54     /// Gets the list of supported MSRs.
get_msr_index_list(&self) -> Result<Vec<u32>>55     fn get_msr_index_list(&self) -> Result<Vec<u32>>;
56 }
57 
58 /// A wrapper for using a VM on x86_64 and getting/setting its state.
59 pub trait VmX86_64: Vm {
60     /// Gets the `HypervisorX86_64` that created this VM.
get_hypervisor(&self) -> &dyn HypervisorX86_6461     fn get_hypervisor(&self) -> &dyn HypervisorX86_64;
62 
63     /// Create a Vcpu with the specified Vcpu ID.
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>64     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>;
65 
66     /// Sets the address of the three-page region in the VM's address space.
set_tss_addr(&self, addr: GuestAddress) -> Result<()>67     fn set_tss_addr(&self, addr: GuestAddress) -> Result<()>;
68 
69     /// Sets the address of a one-page region in the VM's address space.
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>70     fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>;
71 }
72 
73 /// A wrapper around creating and using a VCPU on x86_64.
74 pub trait VcpuX86_64: Vcpu {
75     /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
76     /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)77     fn set_interrupt_window_requested(&self, requested: bool);
78 
79     /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool80     fn ready_for_interrupt(&self) -> bool;
81 
82     /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u32) -> Result<()>83     fn interrupt(&self, irq: u32) -> Result<()>;
84 
85     /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>86     fn inject_nmi(&self) -> Result<()>;
87 
88     /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>89     fn get_regs(&self) -> Result<Regs>;
90 
91     /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>92     fn set_regs(&self, regs: &Regs) -> Result<()>;
93 
94     /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>95     fn get_sregs(&self) -> Result<Sregs>;
96 
97     /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>98     fn set_sregs(&self, sregs: &Sregs) -> Result<()>;
99 
100     /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>101     fn get_fpu(&self) -> Result<Fpu>;
102 
103     /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>104     fn set_fpu(&self, fpu: &Fpu) -> Result<()>;
105 
106     /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>107     fn get_debugregs(&self) -> Result<DebugRegs>;
108 
109     /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>110     fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>;
111 
112     /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>113     fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>>;
114 
115     /// Sets a VCPU extended control register.
set_xcr(&self, xcr: u32, value: u64) -> Result<()>116     fn set_xcr(&self, xcr: u32, value: u64) -> Result<()>;
117 
118     /// Gets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.
get_xsave(&self) -> Result<Xsave>119     fn get_xsave(&self) -> Result<Xsave>;
120 
121     /// Sets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.
set_xsave(&self, xsave: &Xsave) -> Result<()>122     fn set_xsave(&self, xsave: &Xsave) -> Result<()>;
123 
124     /// Gets interrupt state (hypervisor specific) for this VCPU that must be
125     /// saved/restored for snapshotting.
get_interrupt_state(&self) -> Result<serde_json::Value>126     fn get_interrupt_state(&self) -> Result<serde_json::Value>;
127 
128     /// Sets interrupt state (hypervisor specific) for this VCPU. Only used for
129     /// snapshotting.
set_interrupt_state(&self, data: serde_json::Value) -> Result<()>130     fn set_interrupt_state(&self, data: serde_json::Value) -> Result<()>;
131 
132     /// Gets a single model-specific register's value.
get_msr(&self, msr_index: u32) -> Result<u64>133     fn get_msr(&self, msr_index: u32) -> Result<u64>;
134 
135     /// Gets the model-specific registers. Returns all the MSRs for the VCPU.
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>136     fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>;
137 
138     /// Sets a single model-specific register's value.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>139     fn set_msr(&self, msr_index: u32, value: u64) -> Result<()>;
140 
141     /// Sets up the data returned by the CPUID instruction.
set_cpuid(&self, cpuid: &CpuId) -> Result<()>142     fn set_cpuid(&self, cpuid: &CpuId) -> Result<()>;
143 
144     /// Gets the system emulated hyper-v CPUID values.
get_hyperv_cpuid(&self) -> Result<CpuId>145     fn get_hyperv_cpuid(&self) -> Result<CpuId>;
146 
147     /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>148     fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>;
149 
150     /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
151     /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
152     /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>153     fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>;
154 
155     /// Gets the guest->host TSC offset.
156     ///
157     /// The default implementation uses [`VcpuX86_64::get_msr()`] to read the guest TSC.
get_tsc_offset(&self) -> Result<u64>158     fn get_tsc_offset(&self) -> Result<u64> {
159         // SAFETY:
160         // Safe because _rdtsc takes no arguments
161         let host_before_tsc = unsafe { _rdtsc() };
162 
163         // get guest TSC value from our hypervisor
164         let guest_tsc = self.get_msr(crate::MSR_IA32_TSC)?;
165 
166         // SAFETY:
167         // Safe because _rdtsc takes no arguments
168         let host_after_tsc = unsafe { _rdtsc() };
169 
170         // Average the before and after host tsc to get the best value
171         let host_tsc = ((host_before_tsc as u128 + host_after_tsc as u128) / 2) as u64;
172 
173         Ok(guest_tsc.wrapping_sub(host_tsc))
174     }
175 
176     /// Sets the guest->host TSC offset.
177     ///
178     /// The default implementation uses [`VcpuX86_64::set_tsc_value()`] to set the TSC value.
179     ///
180     /// It sets TSC_OFFSET (VMCS / CB field) by setting the TSC MSR to the current
181     /// host TSC value plus the desired offset. We rely on the fact that hypervisors
182     /// determine the value of TSC_OFFSET by computing TSC_OFFSET = new_tsc_value
183     /// - _rdtsc() = _rdtsc() + offset - _rdtsc() ~= offset. Note that the ~= is
184     /// important: this is an approximate operation, because the two _rdtsc() calls
185     /// are separated by at least a few ticks.
186     ///
187     /// Note: TSC_OFFSET, host TSC, guest TSC, and TSC MSR are all different
188     /// concepts.
189     /// * When a guest executes rdtsc, the value (guest TSC) returned is host_tsc * TSC_MULTIPLIER +
190     ///   TSC_OFFSET + TSC_ADJUST.
191     /// * The TSC MSR is a special MSR that when written to by the host, will cause TSC_OFFSET to be
192     ///   set accordingly by the hypervisor.
193     /// * When the guest *writes* to TSC MSR, it actually changes the TSC_ADJUST MSR *for the
194     ///   guest*. Generally this is only happens if the guest is trying to re-zero or synchronize
195     ///   TSCs.
set_tsc_offset(&self, offset: u64) -> Result<()>196     fn set_tsc_offset(&self, offset: u64) -> Result<()> {
197         // SAFETY: _rdtsc takes no arguments.
198         let host_tsc = unsafe { _rdtsc() };
199         self.set_tsc_value(host_tsc.wrapping_add(offset))
200     }
201 
202     /// Sets the guest TSC exactly to the provided value.
203     ///
204     /// The default implementation sets the guest's TSC by writing the value to the MSR directly.
205     ///
206     /// See [`VcpuX86_64::set_tsc_offset()`] for an explanation of how this value is actually read
207     /// by the guest after being set.
set_tsc_value(&self, value: u64) -> Result<()>208     fn set_tsc_value(&self, value: u64) -> Result<()> {
209         self.set_msr(crate::MSR_IA32_TSC, value)
210     }
211 
212     /// Some hypervisors require special handling to restore timekeeping when
213     /// a snapshot is restored. They are provided with a host TSC reference
214     /// moment, guaranteed to be the same across all Vcpus, and the Vcpu's TSC
215     /// offset at the moment it was snapshotted.
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>216     fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>;
217 
218     /// Snapshot vCPU state
snapshot(&self) -> anyhow::Result<VcpuSnapshot>219     fn snapshot(&self) -> anyhow::Result<VcpuSnapshot> {
220         Ok(VcpuSnapshot {
221             vcpu_id: self.id(),
222             regs: self.get_regs()?,
223             sregs: self.get_sregs()?,
224             debug_regs: self.get_debugregs()?,
225             xcrs: self.get_xcrs()?,
226             msrs: self.get_all_msrs()?,
227             xsave: self.get_xsave()?,
228             hypervisor_data: self.get_interrupt_state()?,
229             tsc_offset: self.get_tsc_offset()?,
230         })
231     }
232 
restore( &mut self, snapshot: &VcpuSnapshot, host_tsc_reference_moment: u64, ) -> anyhow::Result<()>233     fn restore(
234         &mut self,
235         snapshot: &VcpuSnapshot,
236         host_tsc_reference_moment: u64,
237     ) -> anyhow::Result<()> {
238         // List of MSRs that may fail to restore due to lack of support in the host kernel.
239         // Some hosts are may be running older kernels which do not support all MSRs, but
240         // get_all_msrs will still fetch the MSRs supported by the CPU. Trying to set those MSRs
241         // will result in failures, so they will throw a warning instead.
242         let msr_allowlist = HashSet::from([
243             MSR_F15H_PERF_CTL0,
244             MSR_F15H_PERF_CTL1,
245             MSR_F15H_PERF_CTL2,
246             MSR_F15H_PERF_CTL3,
247             MSR_F15H_PERF_CTL4,
248             MSR_F15H_PERF_CTL5,
249             MSR_F15H_PERF_CTR0,
250             MSR_F15H_PERF_CTR1,
251             MSR_F15H_PERF_CTR2,
252             MSR_F15H_PERF_CTR3,
253             MSR_F15H_PERF_CTR4,
254             MSR_F15H_PERF_CTR5,
255             MSR_IA32_PERF_CAPABILITIES,
256         ]);
257         assert_eq!(snapshot.vcpu_id, self.id());
258         self.set_regs(&snapshot.regs)?;
259         self.set_sregs(&snapshot.sregs)?;
260         self.set_debugregs(&snapshot.debug_regs)?;
261         for (xcr_index, value) in &snapshot.xcrs {
262             self.set_xcr(*xcr_index, *value)?;
263         }
264 
265         for (msr_index, value) in snapshot.msrs.iter() {
266             if self.get_msr(*msr_index) == Ok(*value) {
267                 continue; // no need to set MSR since the values are the same.
268             }
269             if let Err(e) = self.set_msr(*msr_index, *value) {
270                 if msr_allowlist.contains(msr_index) {
271                     warn!(
272                         "Failed to set MSR. MSR might not be supported in this kernel. Err: {}",
273                         e
274                     );
275                 } else {
276                     return Err(e).context(
277                         "Failed to set MSR. MSR might not be supported by the CPU or by the kernel,
278                          and was not allow-listed.",
279                     );
280                 }
281             };
282         }
283         self.set_xsave(&snapshot.xsave)?;
284         self.set_interrupt_state(snapshot.hypervisor_data.clone())?;
285         self.restore_timekeeping(host_tsc_reference_moment, snapshot.tsc_offset)?;
286         Ok(())
287     }
288 }
289 
290 /// x86 specific vCPU snapshot.
291 #[derive(Clone, Debug, Serialize, Deserialize)]
292 pub struct VcpuSnapshot {
293     pub vcpu_id: usize,
294     regs: Regs,
295     sregs: Sregs,
296     debug_regs: DebugRegs,
297     xcrs: BTreeMap<u32, u64>,
298     msrs: BTreeMap<u32, u64>,
299     xsave: Xsave,
300     hypervisor_data: serde_json::Value,
301     tsc_offset: u64,
302 }
303 
304 impl_downcast!(VcpuX86_64);
305 
306 // TSC MSR
307 pub const MSR_IA32_TSC: u32 = 0x00000010;
308 
309 /// Gets host cpu max physical address bits.
310 #[cfg(any(unix, feature = "haxm", feature = "whpx"))]
host_phys_addr_bits() -> u8311 pub(crate) fn host_phys_addr_bits() -> u8 {
312     // SAFETY: trivially safe
313     let highest_ext_function = unsafe { __cpuid(0x80000000) };
314     if highest_ext_function.eax >= 0x80000008 {
315         // SAFETY: trivially safe
316         let addr_size = unsafe { __cpuid(0x80000008) };
317         // Low 8 bits of 0x80000008 leaf: host physical address size in bits.
318         addr_size.eax as u8
319     } else {
320         36
321     }
322 }
323 
324 /// Initial state for x86_64 VCPUs.
325 #[derive(Clone, Default)]
326 pub struct VcpuInitX86_64 {
327     /// General-purpose registers.
328     pub regs: Regs,
329 
330     /// Special registers.
331     pub sregs: Sregs,
332 
333     /// Floating-point registers.
334     pub fpu: Fpu,
335 
336     /// Machine-specific registers.
337     pub msrs: BTreeMap<u32, u64>,
338 }
339 
340 /// Hold the CPU feature configurations that are needed to setup a vCPU.
341 #[derive(Clone, Debug, PartialEq, Eq)]
342 pub struct CpuConfigX86_64 {
343     /// whether to force using a calibrated TSC leaf (0x15).
344     pub force_calibrated_tsc_leaf: bool,
345 
346     /// whether enabling host cpu topology.
347     pub host_cpu_topology: bool,
348 
349     /// whether expose HWP feature to the guest.
350     pub enable_hwp: bool,
351 
352     /// Wheter diabling SMT (Simultaneous Multithreading).
353     pub no_smt: bool,
354 
355     /// whether enabling ITMT scheduler
356     pub itmt: bool,
357 
358     /// whether setting hybrid CPU type
359     pub hybrid_type: Option<CpuHybridType>,
360 }
361 
362 impl CpuConfigX86_64 {
new( force_calibrated_tsc_leaf: bool, host_cpu_topology: bool, enable_hwp: bool, no_smt: bool, itmt: bool, hybrid_type: Option<CpuHybridType>, ) -> Self363     pub fn new(
364         force_calibrated_tsc_leaf: bool,
365         host_cpu_topology: bool,
366         enable_hwp: bool,
367         no_smt: bool,
368         itmt: bool,
369         hybrid_type: Option<CpuHybridType>,
370     ) -> Self {
371         CpuConfigX86_64 {
372             force_calibrated_tsc_leaf,
373             host_cpu_topology,
374             enable_hwp,
375             no_smt,
376             itmt,
377             hybrid_type,
378         }
379     }
380 }
381 
382 /// A CpuId Entry contains supported feature information for the given processor.
383 /// This can be modified by the hypervisor to pass additional information to the guest kernel
384 /// about the hypervisor or vm. Information is returned in the eax, ebx, ecx and edx registers
385 /// by the cpu for a given function and index/subfunction (passed into the cpu via the eax and ecx
386 /// register respectively).
387 #[repr(C)]
388 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
389 pub struct CpuIdEntry {
390     pub function: u32,
391     pub index: u32,
392     // flags is needed for KVM.  We store it on CpuIdEntry to preserve the flags across
393     // get_supported_cpuids() -> kvm_cpuid2 -> CpuId -> kvm_cpuid2 -> set_cpuid().
394     pub flags: u32,
395     pub cpuid: CpuidResult,
396 }
397 
398 /// A container for the list of cpu id entries for the hypervisor and underlying cpu.
399 pub struct CpuId {
400     pub cpu_id_entries: Vec<CpuIdEntry>,
401 }
402 
403 impl CpuId {
404     /// Constructs a new CpuId, with space allocated for `initial_capacity` CpuIdEntries.
new(initial_capacity: usize) -> Self405     pub fn new(initial_capacity: usize) -> Self {
406         CpuId {
407             cpu_id_entries: Vec::with_capacity(initial_capacity),
408         }
409     }
410 }
411 
412 #[bitfield]
413 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
414 pub enum DestinationMode {
415     Physical = 0,
416     Logical = 1,
417 }
418 
419 #[bitfield]
420 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
421 pub enum TriggerMode {
422     Edge = 0,
423     Level = 1,
424 }
425 
426 #[bitfield]
427 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
428 pub enum DeliveryMode {
429     Fixed = 0b000,
430     Lowest = 0b001,
431     SMI = 0b010,        // System management interrupt
432     RemoteRead = 0b011, // This is no longer supported by intel.
433     NMI = 0b100,        // Non maskable interrupt
434     Init = 0b101,
435     Startup = 0b110,
436     External = 0b111,
437 }
438 
439 // These MSI structures are for Intel's implementation of MSI.  The PCI spec defines most of MSI,
440 // but the Intel spec defines the format of messages for raising interrupts.  The PCI spec defines
441 // three u32s -- the address, address_high, and data -- but Intel only makes use of the address and
442 // data.  The Intel portion of the specification is in Volume 3 section 10.11.
443 #[bitfield]
444 #[derive(Clone, Copy, PartialEq, Eq)]
445 pub struct MsiAddressMessage {
446     pub reserved: BitField2,
447     #[bits = 1]
448     pub destination_mode: DestinationMode,
449     pub redirection_hint: BitField1,
450     pub reserved_2: BitField8,
451     pub destination_id: BitField8,
452     // According to Intel's implementation of MSI, these bits must always be 0xfee.
453     pub always_0xfee: BitField12,
454 }
455 
456 #[bitfield]
457 #[derive(Clone, Copy, PartialEq, Eq)]
458 pub struct MsiDataMessage {
459     pub vector: BitField8,
460     #[bits = 3]
461     pub delivery_mode: DeliveryMode,
462     pub reserved: BitField3,
463     #[bits = 1]
464     pub level: Level,
465     #[bits = 1]
466     pub trigger: TriggerMode,
467     pub reserved2: BitField16,
468 }
469 
470 #[bitfield]
471 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
472 pub enum DeliveryStatus {
473     Idle = 0,
474     Pending = 1,
475 }
476 
477 /// The level of a level-triggered interrupt: asserted or deasserted.
478 #[bitfield]
479 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
480 pub enum Level {
481     Deassert = 0,
482     Assert = 1,
483 }
484 
485 /// Represents a IOAPIC redirection table entry.
486 #[bitfield]
487 #[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
488 pub struct IoapicRedirectionTableEntry {
489     vector: BitField8,
490     #[bits = 3]
491     delivery_mode: DeliveryMode,
492     #[bits = 1]
493     dest_mode: DestinationMode,
494     #[bits = 1]
495     delivery_status: DeliveryStatus,
496     polarity: BitField1,
497     remote_irr: bool,
498     #[bits = 1]
499     trigger_mode: TriggerMode,
500     interrupt_mask: bool, // true iff interrupts are masked.
501     reserved: BitField39,
502     dest_id: BitField8,
503 }
504 
505 /// Number of pins on the standard KVM/IOAPIC.
506 pub const NUM_IOAPIC_PINS: usize = 24;
507 
508 /// Represents the state of the IOAPIC.
509 #[repr(C)]
510 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
511 pub struct IoapicState {
512     /// base_address is the memory base address for this IOAPIC. It cannot be changed.
513     pub base_address: u64,
514     /// ioregsel register. Used for selecting which entry of the redirect table to read/write.
515     pub ioregsel: u8,
516     /// ioapicid register. Bits 24 - 27 contain the APIC ID for this device.
517     pub ioapicid: u32,
518     /// current_interrupt_level_bitmap represents a bitmap of the state of all of the irq lines
519     pub current_interrupt_level_bitmap: u32,
520     /// redirect_table contains the irq settings for each irq line
521     #[serde(
522         serialize_with = "serialize_arr",
523         deserialize_with = "deserialize_seq_to_arr"
524     )]
525     pub redirect_table: [IoapicRedirectionTableEntry; NUM_IOAPIC_PINS],
526 }
527 
528 impl Default for IoapicState {
default() -> IoapicState529     fn default() -> IoapicState {
530         // SAFETY: trivially safe
531         unsafe { std::mem::zeroed() }
532     }
533 }
534 
535 #[repr(C)]
536 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
537 pub enum PicSelect {
538     Primary = 0,
539     Secondary = 1,
540 }
541 
542 #[repr(C)]
543 #[derive(enumn::N, Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
544 pub enum PicInitState {
545     #[default]
546     Icw1 = 0,
547     Icw2 = 1,
548     Icw3 = 2,
549     Icw4 = 3,
550 }
551 
552 /// Convenience implementation for converting from a u8
553 impl From<u8> for PicInitState {
from(item: u8) -> Self554     fn from(item: u8) -> Self {
555         PicInitState::n(item).unwrap_or_else(|| {
556             error!("Invalid PicInitState {}, setting to 0", item);
557             PicInitState::Icw1
558         })
559     }
560 }
561 
562 /// Represents the state of the PIC.
563 #[repr(C)]
564 #[derive(Clone, Copy, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
565 pub struct PicState {
566     /// Edge detection.
567     pub last_irr: u8,
568     /// Interrupt Request Register.
569     pub irr: u8,
570     /// Interrupt Mask Register.
571     pub imr: u8,
572     /// Interrupt Service Register.
573     pub isr: u8,
574     /// Highest priority, for priority rotation.
575     pub priority_add: u8,
576     pub irq_base: u8,
577     pub read_reg_select: bool,
578     pub poll: bool,
579     pub special_mask: bool,
580     pub init_state: PicInitState,
581     pub auto_eoi: bool,
582     pub rotate_on_auto_eoi: bool,
583     pub special_fully_nested_mode: bool,
584     /// PIC takes either 3 or 4 bytes of initialization command word during
585     /// initialization. use_4_byte_icw is true if 4 bytes of ICW are needed.
586     pub use_4_byte_icw: bool,
587     /// "Edge/Level Control Registers", for edge trigger selection.
588     /// When a particular bit is set, the corresponding IRQ is in level-triggered mode. Otherwise
589     /// it is in edge-triggered mode.
590     pub elcr: u8,
591     pub elcr_mask: u8,
592 }
593 
594 /// The LapicState represents the state of an x86 CPU's Local APIC.
595 /// The Local APIC consists of 64 128-bit registers, but only the first 32-bits of each register
596 /// can be used, so this structure only stores the first 32-bits of each register.
597 #[repr(C)]
598 #[derive(Clone, Copy, Serialize, Deserialize)]
599 pub struct LapicState {
600     #[serde(
601         serialize_with = "serialize_arr",
602         deserialize_with = "deserialize_seq_to_arr"
603     )]
604     pub regs: [LapicRegister; 64],
605 }
606 
607 pub type LapicRegister = u32;
608 
609 // rust arrays longer than 32 need custom implementations of Debug
610 impl std::fmt::Debug for LapicState {
fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result611     fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
612         self.regs[..].fmt(formatter)
613     }
614 }
615 
616 // rust arrays longer than 32 need custom implementations of PartialEq
617 impl PartialEq for LapicState {
eq(&self, other: &LapicState) -> bool618     fn eq(&self, other: &LapicState) -> bool {
619         self.regs[..] == other.regs[..]
620     }
621 }
622 
623 // Lapic equality is reflexive, so we impl Eq
624 impl Eq for LapicState {}
625 
626 /// The PitState represents the state of the PIT (aka the Programmable Interval Timer).
627 /// The state is simply the state of it's three channels.
628 #[repr(C)]
629 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
630 pub struct PitState {
631     pub channels: [PitChannelState; 3],
632     /// Hypervisor-specific flags for setting the pit state.
633     pub flags: u32,
634 }
635 
636 /// The PitRWMode enum represents the access mode of a PIT channel.
637 /// Reads and writes to the Pit happen over Port-mapped I/O, which happens one byte at a time,
638 /// but the count values and latch values are two bytes. So the access mode controls which of the
639 /// two bytes will be read when.
640 #[repr(C)]
641 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
642 pub enum PitRWMode {
643     /// None mode means that no access mode has been set.
644     None = 0,
645     /// Least mode means all reads/writes will read/write the least significant byte.
646     Least = 1,
647     /// Most mode means all reads/writes will read/write the most significant byte.
648     Most = 2,
649     /// Both mode means first the least significant byte will be read/written, then the
650     /// next read/write will read/write the most significant byte.
651     Both = 3,
652 }
653 
654 /// Convenience implementation for converting from a u8
655 impl From<u8> for PitRWMode {
from(item: u8) -> Self656     fn from(item: u8) -> Self {
657         PitRWMode::n(item).unwrap_or_else(|| {
658             error!("Invalid PitRWMode value {}, setting to 0", item);
659             PitRWMode::None
660         })
661     }
662 }
663 
664 /// The PitRWState enum represents the state of reading to or writing from a channel.
665 /// This is related to the PitRWMode, it mainly gives more detail about the state of the channel
666 /// with respect to PitRWMode::Both.
667 #[repr(C)]
668 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
669 pub enum PitRWState {
670     /// None mode means that no access mode has been set.
671     None = 0,
672     /// LSB means that the channel is in PitRWMode::Least access mode.
673     LSB = 1,
674     /// MSB means that the channel is in PitRWMode::Most access mode.
675     MSB = 2,
676     /// Word0 means that the channel is in PitRWMode::Both mode, and the least sginificant byte
677     /// has not been read/written yet.
678     Word0 = 3,
679     /// Word1 means that the channel is in PitRWMode::Both mode and the least significant byte
680     /// has already been read/written, and the next byte to be read/written will be the most
681     /// significant byte.
682     Word1 = 4,
683 }
684 
685 /// Convenience implementation for converting from a u8
686 impl From<u8> for PitRWState {
from(item: u8) -> Self687     fn from(item: u8) -> Self {
688         PitRWState::n(item).unwrap_or_else(|| {
689             error!("Invalid PitRWState value {}, setting to 0", item);
690             PitRWState::None
691         })
692     }
693 }
694 
695 /// The PitChannelState represents the state of one of the PIT's three counters.
696 #[repr(C)]
697 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
698 pub struct PitChannelState {
699     /// The starting value for the counter.
700     pub count: u32,
701     /// Stores the channel count from the last time the count was latched.
702     pub latched_count: u16,
703     /// Indicates the PitRWState state of reading the latch value.
704     pub count_latched: PitRWState,
705     /// Indicates whether ReadBack status has been latched.
706     pub status_latched: bool,
707     /// Stores the channel status from the last time the status was latched. The status contains
708     /// information about the access mode of this channel, but changing those bits in the status
709     /// will not change the behavior of the pit.
710     pub status: u8,
711     /// Indicates the PitRWState state of reading the counter.
712     pub read_state: PitRWState,
713     /// Indicates the PitRWState state of writing the counter.
714     pub write_state: PitRWState,
715     /// Stores the value with which the counter was initialized. Counters are 16-
716     /// bit values with an effective range of 1-65536 (65536 represented by 0).
717     pub reload_value: u16,
718     /// The command access mode of this channel.
719     pub rw_mode: PitRWMode,
720     /// The operation mode of this channel.
721     pub mode: u8,
722     /// Whether or not we are in bcd mode. Not supported by KVM or crosvm's PIT implementation.
723     pub bcd: bool,
724     /// Value of the gate input pin. This only applies to channel 2.
725     pub gate: bool,
726     /// Nanosecond timestamp of when the count value was loaded.
727     pub count_load_time: u64,
728 }
729 
730 // Convenience constructors for IrqRoutes
731 impl IrqRoute {
ioapic_irq_route(irq_num: u32) -> IrqRoute732     pub fn ioapic_irq_route(irq_num: u32) -> IrqRoute {
733         IrqRoute {
734             gsi: irq_num,
735             source: IrqSource::Irqchip {
736                 chip: IrqSourceChip::Ioapic,
737                 pin: irq_num,
738             },
739         }
740     }
741 
pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute742     pub fn pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute {
743         IrqRoute {
744             gsi: irq_num,
745             source: IrqSource::Irqchip {
746                 chip: id,
747                 pin: irq_num % 8,
748             },
749         }
750     }
751 }
752 
753 /// State of a VCPU's general purpose registers.
754 #[repr(C)]
755 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
756 pub struct Regs {
757     pub rax: u64,
758     pub rbx: u64,
759     pub rcx: u64,
760     pub rdx: u64,
761     pub rsi: u64,
762     pub rdi: u64,
763     pub rsp: u64,
764     pub rbp: u64,
765     pub r8: u64,
766     pub r9: u64,
767     pub r10: u64,
768     pub r11: u64,
769     pub r12: u64,
770     pub r13: u64,
771     pub r14: u64,
772     pub r15: u64,
773     pub rip: u64,
774     pub rflags: u64,
775 }
776 
777 impl Default for Regs {
default() -> Self778     fn default() -> Self {
779         Regs {
780             rax: 0,
781             rbx: 0,
782             rcx: 0,
783             rdx: 0,
784             rsi: 0,
785             rdi: 0,
786             rsp: 0,
787             rbp: 0,
788             r8: 0,
789             r9: 0,
790             r10: 0,
791             r11: 0,
792             r12: 0,
793             r13: 0,
794             r14: 0,
795             r15: 0,
796             rip: 0xfff0, // Reset vector.
797             rflags: 0x2, // Bit 1 (0x2) is always 1.
798         }
799     }
800 }
801 
802 /// State of a memory segment.
803 #[repr(C)]
804 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
805 pub struct Segment {
806     pub base: u64,
807     pub limit: u32,
808     pub selector: u16,
809     pub type_: u8,
810     pub present: u8,
811     pub dpl: u8,
812     pub db: u8,
813     pub s: u8,
814     pub l: u8,
815     pub g: u8,
816     pub avl: u8,
817 }
818 
819 /// State of a global descriptor table or interrupt descriptor table.
820 #[repr(C)]
821 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
822 pub struct DescriptorTable {
823     pub base: u64,
824     pub limit: u16,
825 }
826 
827 /// State of a VCPU's special registers.
828 #[repr(C)]
829 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
830 pub struct Sregs {
831     pub cs: Segment,
832     pub ds: Segment,
833     pub es: Segment,
834     pub fs: Segment,
835     pub gs: Segment,
836     pub ss: Segment,
837     pub tr: Segment,
838     pub ldt: Segment,
839     pub gdt: DescriptorTable,
840     pub idt: DescriptorTable,
841     pub cr0: u64,
842     pub cr2: u64,
843     pub cr3: u64,
844     pub cr4: u64,
845     pub cr8: u64,
846     pub efer: u64,
847 }
848 
849 impl Default for Sregs {
default() -> Self850     fn default() -> Self {
851         // Intel SDM Vol. 3A, 3.4.5.1 ("Code- and Data-Segment Descriptor Types")
852         const SEG_TYPE_DATA: u8 = 0b0000;
853         const SEG_TYPE_DATA_WRITABLE: u8 = 0b0010;
854 
855         const SEG_TYPE_CODE: u8 = 0b1000;
856         const SEG_TYPE_CODE_READABLE: u8 = 0b0010;
857 
858         const SEG_TYPE_ACCESSED: u8 = 0b0001;
859 
860         // Intel SDM Vol. 3A, 3.4.5 ("Segment Descriptors")
861         const SEG_S_SYSTEM: u8 = 0; // System segment.
862         const SEG_S_CODE_OR_DATA: u8 = 1; // Data/code segment.
863 
864         // 16-bit real-mode code segment (reset vector).
865         let code_seg = Segment {
866             base: 0xffff0000,
867             limit: 0xffff,
868             selector: 0xf000,
869             type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11
870             present: 1,
871             s: SEG_S_CODE_OR_DATA,
872             ..Default::default()
873         };
874 
875         // 16-bit real-mode data segment.
876         let data_seg = Segment {
877             base: 0,
878             limit: 0xffff,
879             selector: 0,
880             type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE | SEG_TYPE_ACCESSED, // 3
881             present: 1,
882             s: SEG_S_CODE_OR_DATA,
883             ..Default::default()
884         };
885 
886         // 16-bit TSS segment.
887         let task_seg = Segment {
888             base: 0,
889             limit: 0xffff,
890             selector: 0,
891             type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11
892             present: 1,
893             s: SEG_S_SYSTEM,
894             ..Default::default()
895         };
896 
897         // Local descriptor table.
898         let ldt = Segment {
899             base: 0,
900             limit: 0xffff,
901             selector: 0,
902             type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE, // 2
903             present: 1,
904             s: SEG_S_SYSTEM,
905             ..Default::default()
906         };
907 
908         // Global descriptor table.
909         let gdt = DescriptorTable {
910             base: 0,
911             limit: 0xffff,
912         };
913 
914         // Interrupt descriptor table.
915         let idt = DescriptorTable {
916             base: 0,
917             limit: 0xffff,
918         };
919 
920         let cr0 = (1 << 4) // CR0.ET (reserved, always 1)
921                 | (1 << 30); // CR0.CD (cache disable)
922 
923         Sregs {
924             cs: code_seg,
925             ds: data_seg,
926             es: data_seg,
927             fs: data_seg,
928             gs: data_seg,
929             ss: data_seg,
930             tr: task_seg,
931             ldt,
932             gdt,
933             idt,
934             cr0,
935             cr2: 0,
936             cr3: 0,
937             cr4: 0,
938             cr8: 0,
939             efer: 0,
940         }
941     }
942 }
943 
944 /// State of a VCPU's floating point unit.
945 #[repr(C)]
946 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
947 pub struct Fpu {
948     pub fpr: [[u8; 16usize]; 8usize],
949     pub fcw: u16,
950     pub fsw: u16,
951     pub ftwx: u8,
952     pub last_opcode: u16,
953     pub last_ip: u64,
954     pub last_dp: u64,
955     pub xmm: [[u8; 16usize]; 16usize],
956     pub mxcsr: u32,
957 }
958 
959 impl Default for Fpu {
default() -> Self960     fn default() -> Self {
961         Fpu {
962             fpr: Default::default(),
963             fcw: 0x37f, // Intel SDM Vol. 1, 13.6
964             fsw: 0,
965             ftwx: 0,
966             last_opcode: 0,
967             last_ip: 0,
968             last_dp: 0,
969             xmm: Default::default(),
970             mxcsr: 0x1f80, // Intel SDM Vol. 1, 11.6.4
971         }
972     }
973 }
974 
975 /// State of a VCPU's debug registers.
976 #[repr(C)]
977 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
978 pub struct DebugRegs {
979     pub db: [u64; 4usize],
980     pub dr6: u64,
981     pub dr7: u64,
982 }
983 
984 /// The hybrid type for intel hybrid CPU.
985 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
986 pub enum CpuHybridType {
987     /// Intel Atom.
988     Atom,
989     /// Intel Core.
990     Core,
991 }
992 
993 /// State of the VCPU's x87 FPU, MMX, XMM, YMM registers.
994 /// May contain more state depending on enabled extensions.
995 #[derive(Clone, Debug, Serialize, Deserialize)]
996 pub struct Xsave {
997     data: Vec<u32>,
998 
999     // Actual length in bytes. May be smaller than data if a non-u32 multiple of bytes is
1000     // requested.
1001     len: usize,
1002 }
1003 
1004 impl Xsave {
1005     /// Create a new buffer to store Xsave data.
1006     ///
1007     /// # Argments
1008     /// * `len` size in bytes.
new(len: usize) -> Self1009     pub fn new(len: usize) -> Self {
1010         Xsave {
1011             data: vec![0; (len + 3) / 4],
1012             len,
1013         }
1014     }
1015 
as_ptr(&self) -> *const c_void1016     pub fn as_ptr(&self) -> *const c_void {
1017         self.data.as_ptr() as *const c_void
1018     }
1019 
as_mut_ptr(&mut self) -> *mut c_void1020     pub fn as_mut_ptr(&mut self) -> *mut c_void {
1021         self.data.as_mut_ptr() as *mut c_void
1022     }
1023 
1024     /// Length in bytes of the XSAVE data.
len(&self) -> usize1025     pub fn len(&self) -> usize {
1026         self.len
1027     }
1028 
1029     /// Returns true is length of XSAVE data is zero
is_empty(&self) -> bool1030     pub fn is_empty(&self) -> bool {
1031         self.len() == 0
1032     }
1033 }
1034