1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::arch::x86_64::CpuidResult;
6 #[cfg(any(unix, feature = "haxm", feature = "whpx"))]
7 use std::arch::x86_64::__cpuid;
8 use std::arch::x86_64::_rdtsc;
9 use std::collections::BTreeMap;
10 use std::collections::HashSet;
11
12 use anyhow::Context;
13 use base::custom_serde::deserialize_seq_to_arr;
14 use base::custom_serde::serialize_arr;
15 use base::error;
16 use base::warn;
17 use base::Result;
18 use bit_field::*;
19 use downcast_rs::impl_downcast;
20 use libc::c_void;
21 use serde::Deserialize;
22 use serde::Serialize;
23 use vm_memory::GuestAddress;
24
25 use crate::Hypervisor;
26 use crate::IrqRoute;
27 use crate::IrqSource;
28 use crate::IrqSourceChip;
29 use crate::Vcpu;
30 use crate::Vm;
31
32 const MSR_F15H_PERF_CTL0: u32 = 0xc0010200;
33 const MSR_F15H_PERF_CTL1: u32 = 0xc0010202;
34 const MSR_F15H_PERF_CTL2: u32 = 0xc0010204;
35 const MSR_F15H_PERF_CTL3: u32 = 0xc0010206;
36 const MSR_F15H_PERF_CTL4: u32 = 0xc0010208;
37 const MSR_F15H_PERF_CTL5: u32 = 0xc001020a;
38 const MSR_F15H_PERF_CTR0: u32 = 0xc0010201;
39 const MSR_F15H_PERF_CTR1: u32 = 0xc0010203;
40 const MSR_F15H_PERF_CTR2: u32 = 0xc0010205;
41 const MSR_F15H_PERF_CTR3: u32 = 0xc0010207;
42 const MSR_F15H_PERF_CTR4: u32 = 0xc0010209;
43 const MSR_F15H_PERF_CTR5: u32 = 0xc001020b;
44 const MSR_IA32_PERF_CAPABILITIES: u32 = 0x00000345;
45
46 /// A trait for managing cpuids for an x86_64 hypervisor and for checking its capabilities.
47 pub trait HypervisorX86_64: Hypervisor {
48 /// Get the system supported CPUID values.
get_supported_cpuid(&self) -> Result<CpuId>49 fn get_supported_cpuid(&self) -> Result<CpuId>;
50
51 /// Get the system emulated CPUID values.
get_emulated_cpuid(&self) -> Result<CpuId>52 fn get_emulated_cpuid(&self) -> Result<CpuId>;
53
54 /// Gets the list of supported MSRs.
get_msr_index_list(&self) -> Result<Vec<u32>>55 fn get_msr_index_list(&self) -> Result<Vec<u32>>;
56 }
57
58 /// A wrapper for using a VM on x86_64 and getting/setting its state.
59 pub trait VmX86_64: Vm {
60 /// Gets the `HypervisorX86_64` that created this VM.
get_hypervisor(&self) -> &dyn HypervisorX86_6461 fn get_hypervisor(&self) -> &dyn HypervisorX86_64;
62
63 /// Create a Vcpu with the specified Vcpu ID.
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>64 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>;
65
66 /// Sets the address of the three-page region in the VM's address space.
set_tss_addr(&self, addr: GuestAddress) -> Result<()>67 fn set_tss_addr(&self, addr: GuestAddress) -> Result<()>;
68
69 /// Sets the address of a one-page region in the VM's address space.
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>70 fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>;
71 }
72
73 /// A wrapper around creating and using a VCPU on x86_64.
74 pub trait VcpuX86_64: Vcpu {
75 /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
76 /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)77 fn set_interrupt_window_requested(&self, requested: bool);
78
79 /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool80 fn ready_for_interrupt(&self) -> bool;
81
82 /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u32) -> Result<()>83 fn interrupt(&self, irq: u32) -> Result<()>;
84
85 /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>86 fn inject_nmi(&self) -> Result<()>;
87
88 /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>89 fn get_regs(&self) -> Result<Regs>;
90
91 /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>92 fn set_regs(&self, regs: &Regs) -> Result<()>;
93
94 /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>95 fn get_sregs(&self) -> Result<Sregs>;
96
97 /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>98 fn set_sregs(&self, sregs: &Sregs) -> Result<()>;
99
100 /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>101 fn get_fpu(&self) -> Result<Fpu>;
102
103 /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>104 fn set_fpu(&self, fpu: &Fpu) -> Result<()>;
105
106 /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>107 fn get_debugregs(&self) -> Result<DebugRegs>;
108
109 /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>110 fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>;
111
112 /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>113 fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>>;
114
115 /// Sets a VCPU extended control register.
set_xcr(&self, xcr: u32, value: u64) -> Result<()>116 fn set_xcr(&self, xcr: u32, value: u64) -> Result<()>;
117
118 /// Gets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.
get_xsave(&self) -> Result<Xsave>119 fn get_xsave(&self) -> Result<Xsave>;
120
121 /// Sets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.
set_xsave(&self, xsave: &Xsave) -> Result<()>122 fn set_xsave(&self, xsave: &Xsave) -> Result<()>;
123
124 /// Gets interrupt state (hypervisor specific) for this VCPU that must be
125 /// saved/restored for snapshotting.
get_interrupt_state(&self) -> Result<serde_json::Value>126 fn get_interrupt_state(&self) -> Result<serde_json::Value>;
127
128 /// Sets interrupt state (hypervisor specific) for this VCPU. Only used for
129 /// snapshotting.
set_interrupt_state(&self, data: serde_json::Value) -> Result<()>130 fn set_interrupt_state(&self, data: serde_json::Value) -> Result<()>;
131
132 /// Gets a single model-specific register's value.
get_msr(&self, msr_index: u32) -> Result<u64>133 fn get_msr(&self, msr_index: u32) -> Result<u64>;
134
135 /// Gets the model-specific registers. Returns all the MSRs for the VCPU.
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>136 fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>;
137
138 /// Sets a single model-specific register's value.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>139 fn set_msr(&self, msr_index: u32, value: u64) -> Result<()>;
140
141 /// Sets up the data returned by the CPUID instruction.
set_cpuid(&self, cpuid: &CpuId) -> Result<()>142 fn set_cpuid(&self, cpuid: &CpuId) -> Result<()>;
143
144 /// Gets the system emulated hyper-v CPUID values.
get_hyperv_cpuid(&self) -> Result<CpuId>145 fn get_hyperv_cpuid(&self) -> Result<CpuId>;
146
147 /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>148 fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>;
149
150 /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
151 /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
152 /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>153 fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>;
154
155 /// Gets the guest->host TSC offset.
156 ///
157 /// The default implementation uses [`VcpuX86_64::get_msr()`] to read the guest TSC.
get_tsc_offset(&self) -> Result<u64>158 fn get_tsc_offset(&self) -> Result<u64> {
159 // SAFETY:
160 // Safe because _rdtsc takes no arguments
161 let host_before_tsc = unsafe { _rdtsc() };
162
163 // get guest TSC value from our hypervisor
164 let guest_tsc = self.get_msr(crate::MSR_IA32_TSC)?;
165
166 // SAFETY:
167 // Safe because _rdtsc takes no arguments
168 let host_after_tsc = unsafe { _rdtsc() };
169
170 // Average the before and after host tsc to get the best value
171 let host_tsc = ((host_before_tsc as u128 + host_after_tsc as u128) / 2) as u64;
172
173 Ok(guest_tsc.wrapping_sub(host_tsc))
174 }
175
176 /// Sets the guest->host TSC offset.
177 ///
178 /// The default implementation uses [`VcpuX86_64::set_tsc_value()`] to set the TSC value.
179 ///
180 /// It sets TSC_OFFSET (VMCS / CB field) by setting the TSC MSR to the current
181 /// host TSC value plus the desired offset. We rely on the fact that hypervisors
182 /// determine the value of TSC_OFFSET by computing TSC_OFFSET = new_tsc_value
183 /// - _rdtsc() = _rdtsc() + offset - _rdtsc() ~= offset. Note that the ~= is
184 /// important: this is an approximate operation, because the two _rdtsc() calls
185 /// are separated by at least a few ticks.
186 ///
187 /// Note: TSC_OFFSET, host TSC, guest TSC, and TSC MSR are all different
188 /// concepts.
189 /// * When a guest executes rdtsc, the value (guest TSC) returned is host_tsc * TSC_MULTIPLIER +
190 /// TSC_OFFSET + TSC_ADJUST.
191 /// * The TSC MSR is a special MSR that when written to by the host, will cause TSC_OFFSET to be
192 /// set accordingly by the hypervisor.
193 /// * When the guest *writes* to TSC MSR, it actually changes the TSC_ADJUST MSR *for the
194 /// guest*. Generally this is only happens if the guest is trying to re-zero or synchronize
195 /// TSCs.
set_tsc_offset(&self, offset: u64) -> Result<()>196 fn set_tsc_offset(&self, offset: u64) -> Result<()> {
197 // SAFETY: _rdtsc takes no arguments.
198 let host_tsc = unsafe { _rdtsc() };
199 self.set_tsc_value(host_tsc.wrapping_add(offset))
200 }
201
202 /// Sets the guest TSC exactly to the provided value.
203 ///
204 /// The default implementation sets the guest's TSC by writing the value to the MSR directly.
205 ///
206 /// See [`VcpuX86_64::set_tsc_offset()`] for an explanation of how this value is actually read
207 /// by the guest after being set.
set_tsc_value(&self, value: u64) -> Result<()>208 fn set_tsc_value(&self, value: u64) -> Result<()> {
209 self.set_msr(crate::MSR_IA32_TSC, value)
210 }
211
212 /// Some hypervisors require special handling to restore timekeeping when
213 /// a snapshot is restored. They are provided with a host TSC reference
214 /// moment, guaranteed to be the same across all Vcpus, and the Vcpu's TSC
215 /// offset at the moment it was snapshotted.
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>216 fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>;
217
218 /// Snapshot vCPU state
snapshot(&self) -> anyhow::Result<VcpuSnapshot>219 fn snapshot(&self) -> anyhow::Result<VcpuSnapshot> {
220 Ok(VcpuSnapshot {
221 vcpu_id: self.id(),
222 regs: self.get_regs()?,
223 sregs: self.get_sregs()?,
224 debug_regs: self.get_debugregs()?,
225 xcrs: self.get_xcrs()?,
226 msrs: self.get_all_msrs()?,
227 xsave: self.get_xsave()?,
228 hypervisor_data: self.get_interrupt_state()?,
229 tsc_offset: self.get_tsc_offset()?,
230 })
231 }
232
restore( &mut self, snapshot: &VcpuSnapshot, host_tsc_reference_moment: u64, ) -> anyhow::Result<()>233 fn restore(
234 &mut self,
235 snapshot: &VcpuSnapshot,
236 host_tsc_reference_moment: u64,
237 ) -> anyhow::Result<()> {
238 // List of MSRs that may fail to restore due to lack of support in the host kernel.
239 // Some hosts are may be running older kernels which do not support all MSRs, but
240 // get_all_msrs will still fetch the MSRs supported by the CPU. Trying to set those MSRs
241 // will result in failures, so they will throw a warning instead.
242 let msr_allowlist = HashSet::from([
243 MSR_F15H_PERF_CTL0,
244 MSR_F15H_PERF_CTL1,
245 MSR_F15H_PERF_CTL2,
246 MSR_F15H_PERF_CTL3,
247 MSR_F15H_PERF_CTL4,
248 MSR_F15H_PERF_CTL5,
249 MSR_F15H_PERF_CTR0,
250 MSR_F15H_PERF_CTR1,
251 MSR_F15H_PERF_CTR2,
252 MSR_F15H_PERF_CTR3,
253 MSR_F15H_PERF_CTR4,
254 MSR_F15H_PERF_CTR5,
255 MSR_IA32_PERF_CAPABILITIES,
256 ]);
257 assert_eq!(snapshot.vcpu_id, self.id());
258 self.set_regs(&snapshot.regs)?;
259 self.set_sregs(&snapshot.sregs)?;
260 self.set_debugregs(&snapshot.debug_regs)?;
261 for (xcr_index, value) in &snapshot.xcrs {
262 self.set_xcr(*xcr_index, *value)?;
263 }
264
265 for (msr_index, value) in snapshot.msrs.iter() {
266 if self.get_msr(*msr_index) == Ok(*value) {
267 continue; // no need to set MSR since the values are the same.
268 }
269 if let Err(e) = self.set_msr(*msr_index, *value) {
270 if msr_allowlist.contains(msr_index) {
271 warn!(
272 "Failed to set MSR. MSR might not be supported in this kernel. Err: {}",
273 e
274 );
275 } else {
276 return Err(e).context(
277 "Failed to set MSR. MSR might not be supported by the CPU or by the kernel,
278 and was not allow-listed.",
279 );
280 }
281 };
282 }
283 self.set_xsave(&snapshot.xsave)?;
284 self.set_interrupt_state(snapshot.hypervisor_data.clone())?;
285 self.restore_timekeeping(host_tsc_reference_moment, snapshot.tsc_offset)?;
286 Ok(())
287 }
288 }
289
290 /// x86 specific vCPU snapshot.
291 #[derive(Clone, Debug, Serialize, Deserialize)]
292 pub struct VcpuSnapshot {
293 pub vcpu_id: usize,
294 regs: Regs,
295 sregs: Sregs,
296 debug_regs: DebugRegs,
297 xcrs: BTreeMap<u32, u64>,
298 msrs: BTreeMap<u32, u64>,
299 xsave: Xsave,
300 hypervisor_data: serde_json::Value,
301 tsc_offset: u64,
302 }
303
304 impl_downcast!(VcpuX86_64);
305
306 // TSC MSR
307 pub const MSR_IA32_TSC: u32 = 0x00000010;
308
309 /// Gets host cpu max physical address bits.
310 #[cfg(any(unix, feature = "haxm", feature = "whpx"))]
host_phys_addr_bits() -> u8311 pub(crate) fn host_phys_addr_bits() -> u8 {
312 // SAFETY: trivially safe
313 let highest_ext_function = unsafe { __cpuid(0x80000000) };
314 if highest_ext_function.eax >= 0x80000008 {
315 // SAFETY: trivially safe
316 let addr_size = unsafe { __cpuid(0x80000008) };
317 // Low 8 bits of 0x80000008 leaf: host physical address size in bits.
318 addr_size.eax as u8
319 } else {
320 36
321 }
322 }
323
324 /// Initial state for x86_64 VCPUs.
325 #[derive(Clone, Default)]
326 pub struct VcpuInitX86_64 {
327 /// General-purpose registers.
328 pub regs: Regs,
329
330 /// Special registers.
331 pub sregs: Sregs,
332
333 /// Floating-point registers.
334 pub fpu: Fpu,
335
336 /// Machine-specific registers.
337 pub msrs: BTreeMap<u32, u64>,
338 }
339
340 /// Hold the CPU feature configurations that are needed to setup a vCPU.
341 #[derive(Clone, Debug, PartialEq, Eq)]
342 pub struct CpuConfigX86_64 {
343 /// whether to force using a calibrated TSC leaf (0x15).
344 pub force_calibrated_tsc_leaf: bool,
345
346 /// whether enabling host cpu topology.
347 pub host_cpu_topology: bool,
348
349 /// whether expose HWP feature to the guest.
350 pub enable_hwp: bool,
351
352 /// Wheter diabling SMT (Simultaneous Multithreading).
353 pub no_smt: bool,
354
355 /// whether enabling ITMT scheduler
356 pub itmt: bool,
357
358 /// whether setting hybrid CPU type
359 pub hybrid_type: Option<CpuHybridType>,
360 }
361
362 impl CpuConfigX86_64 {
new( force_calibrated_tsc_leaf: bool, host_cpu_topology: bool, enable_hwp: bool, no_smt: bool, itmt: bool, hybrid_type: Option<CpuHybridType>, ) -> Self363 pub fn new(
364 force_calibrated_tsc_leaf: bool,
365 host_cpu_topology: bool,
366 enable_hwp: bool,
367 no_smt: bool,
368 itmt: bool,
369 hybrid_type: Option<CpuHybridType>,
370 ) -> Self {
371 CpuConfigX86_64 {
372 force_calibrated_tsc_leaf,
373 host_cpu_topology,
374 enable_hwp,
375 no_smt,
376 itmt,
377 hybrid_type,
378 }
379 }
380 }
381
382 /// A CpuId Entry contains supported feature information for the given processor.
383 /// This can be modified by the hypervisor to pass additional information to the guest kernel
384 /// about the hypervisor or vm. Information is returned in the eax, ebx, ecx and edx registers
385 /// by the cpu for a given function and index/subfunction (passed into the cpu via the eax and ecx
386 /// register respectively).
387 #[repr(C)]
388 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
389 pub struct CpuIdEntry {
390 pub function: u32,
391 pub index: u32,
392 // flags is needed for KVM. We store it on CpuIdEntry to preserve the flags across
393 // get_supported_cpuids() -> kvm_cpuid2 -> CpuId -> kvm_cpuid2 -> set_cpuid().
394 pub flags: u32,
395 pub cpuid: CpuidResult,
396 }
397
398 /// A container for the list of cpu id entries for the hypervisor and underlying cpu.
399 pub struct CpuId {
400 pub cpu_id_entries: Vec<CpuIdEntry>,
401 }
402
403 impl CpuId {
404 /// Constructs a new CpuId, with space allocated for `initial_capacity` CpuIdEntries.
new(initial_capacity: usize) -> Self405 pub fn new(initial_capacity: usize) -> Self {
406 CpuId {
407 cpu_id_entries: Vec::with_capacity(initial_capacity),
408 }
409 }
410 }
411
412 #[bitfield]
413 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
414 pub enum DestinationMode {
415 Physical = 0,
416 Logical = 1,
417 }
418
419 #[bitfield]
420 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
421 pub enum TriggerMode {
422 Edge = 0,
423 Level = 1,
424 }
425
426 #[bitfield]
427 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
428 pub enum DeliveryMode {
429 Fixed = 0b000,
430 Lowest = 0b001,
431 SMI = 0b010, // System management interrupt
432 RemoteRead = 0b011, // This is no longer supported by intel.
433 NMI = 0b100, // Non maskable interrupt
434 Init = 0b101,
435 Startup = 0b110,
436 External = 0b111,
437 }
438
439 // These MSI structures are for Intel's implementation of MSI. The PCI spec defines most of MSI,
440 // but the Intel spec defines the format of messages for raising interrupts. The PCI spec defines
441 // three u32s -- the address, address_high, and data -- but Intel only makes use of the address and
442 // data. The Intel portion of the specification is in Volume 3 section 10.11.
443 #[bitfield]
444 #[derive(Clone, Copy, PartialEq, Eq)]
445 pub struct MsiAddressMessage {
446 pub reserved: BitField2,
447 #[bits = 1]
448 pub destination_mode: DestinationMode,
449 pub redirection_hint: BitField1,
450 pub reserved_2: BitField8,
451 pub destination_id: BitField8,
452 // According to Intel's implementation of MSI, these bits must always be 0xfee.
453 pub always_0xfee: BitField12,
454 }
455
456 #[bitfield]
457 #[derive(Clone, Copy, PartialEq, Eq)]
458 pub struct MsiDataMessage {
459 pub vector: BitField8,
460 #[bits = 3]
461 pub delivery_mode: DeliveryMode,
462 pub reserved: BitField3,
463 #[bits = 1]
464 pub level: Level,
465 #[bits = 1]
466 pub trigger: TriggerMode,
467 pub reserved2: BitField16,
468 }
469
470 #[bitfield]
471 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
472 pub enum DeliveryStatus {
473 Idle = 0,
474 Pending = 1,
475 }
476
477 /// The level of a level-triggered interrupt: asserted or deasserted.
478 #[bitfield]
479 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
480 pub enum Level {
481 Deassert = 0,
482 Assert = 1,
483 }
484
485 /// Represents a IOAPIC redirection table entry.
486 #[bitfield]
487 #[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
488 pub struct IoapicRedirectionTableEntry {
489 vector: BitField8,
490 #[bits = 3]
491 delivery_mode: DeliveryMode,
492 #[bits = 1]
493 dest_mode: DestinationMode,
494 #[bits = 1]
495 delivery_status: DeliveryStatus,
496 polarity: BitField1,
497 remote_irr: bool,
498 #[bits = 1]
499 trigger_mode: TriggerMode,
500 interrupt_mask: bool, // true iff interrupts are masked.
501 reserved: BitField39,
502 dest_id: BitField8,
503 }
504
505 /// Number of pins on the standard KVM/IOAPIC.
506 pub const NUM_IOAPIC_PINS: usize = 24;
507
508 /// Represents the state of the IOAPIC.
509 #[repr(C)]
510 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
511 pub struct IoapicState {
512 /// base_address is the memory base address for this IOAPIC. It cannot be changed.
513 pub base_address: u64,
514 /// ioregsel register. Used for selecting which entry of the redirect table to read/write.
515 pub ioregsel: u8,
516 /// ioapicid register. Bits 24 - 27 contain the APIC ID for this device.
517 pub ioapicid: u32,
518 /// current_interrupt_level_bitmap represents a bitmap of the state of all of the irq lines
519 pub current_interrupt_level_bitmap: u32,
520 /// redirect_table contains the irq settings for each irq line
521 #[serde(
522 serialize_with = "serialize_arr",
523 deserialize_with = "deserialize_seq_to_arr"
524 )]
525 pub redirect_table: [IoapicRedirectionTableEntry; NUM_IOAPIC_PINS],
526 }
527
528 impl Default for IoapicState {
default() -> IoapicState529 fn default() -> IoapicState {
530 // SAFETY: trivially safe
531 unsafe { std::mem::zeroed() }
532 }
533 }
534
535 #[repr(C)]
536 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
537 pub enum PicSelect {
538 Primary = 0,
539 Secondary = 1,
540 }
541
542 #[repr(C)]
543 #[derive(enumn::N, Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
544 pub enum PicInitState {
545 #[default]
546 Icw1 = 0,
547 Icw2 = 1,
548 Icw3 = 2,
549 Icw4 = 3,
550 }
551
552 /// Convenience implementation for converting from a u8
553 impl From<u8> for PicInitState {
from(item: u8) -> Self554 fn from(item: u8) -> Self {
555 PicInitState::n(item).unwrap_or_else(|| {
556 error!("Invalid PicInitState {}, setting to 0", item);
557 PicInitState::Icw1
558 })
559 }
560 }
561
562 /// Represents the state of the PIC.
563 #[repr(C)]
564 #[derive(Clone, Copy, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
565 pub struct PicState {
566 /// Edge detection.
567 pub last_irr: u8,
568 /// Interrupt Request Register.
569 pub irr: u8,
570 /// Interrupt Mask Register.
571 pub imr: u8,
572 /// Interrupt Service Register.
573 pub isr: u8,
574 /// Highest priority, for priority rotation.
575 pub priority_add: u8,
576 pub irq_base: u8,
577 pub read_reg_select: bool,
578 pub poll: bool,
579 pub special_mask: bool,
580 pub init_state: PicInitState,
581 pub auto_eoi: bool,
582 pub rotate_on_auto_eoi: bool,
583 pub special_fully_nested_mode: bool,
584 /// PIC takes either 3 or 4 bytes of initialization command word during
585 /// initialization. use_4_byte_icw is true if 4 bytes of ICW are needed.
586 pub use_4_byte_icw: bool,
587 /// "Edge/Level Control Registers", for edge trigger selection.
588 /// When a particular bit is set, the corresponding IRQ is in level-triggered mode. Otherwise
589 /// it is in edge-triggered mode.
590 pub elcr: u8,
591 pub elcr_mask: u8,
592 }
593
594 /// The LapicState represents the state of an x86 CPU's Local APIC.
595 /// The Local APIC consists of 64 128-bit registers, but only the first 32-bits of each register
596 /// can be used, so this structure only stores the first 32-bits of each register.
597 #[repr(C)]
598 #[derive(Clone, Copy, Serialize, Deserialize)]
599 pub struct LapicState {
600 #[serde(
601 serialize_with = "serialize_arr",
602 deserialize_with = "deserialize_seq_to_arr"
603 )]
604 pub regs: [LapicRegister; 64],
605 }
606
607 pub type LapicRegister = u32;
608
609 // rust arrays longer than 32 need custom implementations of Debug
610 impl std::fmt::Debug for LapicState {
fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result611 fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
612 self.regs[..].fmt(formatter)
613 }
614 }
615
616 // rust arrays longer than 32 need custom implementations of PartialEq
617 impl PartialEq for LapicState {
eq(&self, other: &LapicState) -> bool618 fn eq(&self, other: &LapicState) -> bool {
619 self.regs[..] == other.regs[..]
620 }
621 }
622
623 // Lapic equality is reflexive, so we impl Eq
624 impl Eq for LapicState {}
625
626 /// The PitState represents the state of the PIT (aka the Programmable Interval Timer).
627 /// The state is simply the state of it's three channels.
628 #[repr(C)]
629 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
630 pub struct PitState {
631 pub channels: [PitChannelState; 3],
632 /// Hypervisor-specific flags for setting the pit state.
633 pub flags: u32,
634 }
635
636 /// The PitRWMode enum represents the access mode of a PIT channel.
637 /// Reads and writes to the Pit happen over Port-mapped I/O, which happens one byte at a time,
638 /// but the count values and latch values are two bytes. So the access mode controls which of the
639 /// two bytes will be read when.
640 #[repr(C)]
641 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
642 pub enum PitRWMode {
643 /// None mode means that no access mode has been set.
644 None = 0,
645 /// Least mode means all reads/writes will read/write the least significant byte.
646 Least = 1,
647 /// Most mode means all reads/writes will read/write the most significant byte.
648 Most = 2,
649 /// Both mode means first the least significant byte will be read/written, then the
650 /// next read/write will read/write the most significant byte.
651 Both = 3,
652 }
653
654 /// Convenience implementation for converting from a u8
655 impl From<u8> for PitRWMode {
from(item: u8) -> Self656 fn from(item: u8) -> Self {
657 PitRWMode::n(item).unwrap_or_else(|| {
658 error!("Invalid PitRWMode value {}, setting to 0", item);
659 PitRWMode::None
660 })
661 }
662 }
663
664 /// The PitRWState enum represents the state of reading to or writing from a channel.
665 /// This is related to the PitRWMode, it mainly gives more detail about the state of the channel
666 /// with respect to PitRWMode::Both.
667 #[repr(C)]
668 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
669 pub enum PitRWState {
670 /// None mode means that no access mode has been set.
671 None = 0,
672 /// LSB means that the channel is in PitRWMode::Least access mode.
673 LSB = 1,
674 /// MSB means that the channel is in PitRWMode::Most access mode.
675 MSB = 2,
676 /// Word0 means that the channel is in PitRWMode::Both mode, and the least sginificant byte
677 /// has not been read/written yet.
678 Word0 = 3,
679 /// Word1 means that the channel is in PitRWMode::Both mode and the least significant byte
680 /// has already been read/written, and the next byte to be read/written will be the most
681 /// significant byte.
682 Word1 = 4,
683 }
684
685 /// Convenience implementation for converting from a u8
686 impl From<u8> for PitRWState {
from(item: u8) -> Self687 fn from(item: u8) -> Self {
688 PitRWState::n(item).unwrap_or_else(|| {
689 error!("Invalid PitRWState value {}, setting to 0", item);
690 PitRWState::None
691 })
692 }
693 }
694
695 /// The PitChannelState represents the state of one of the PIT's three counters.
696 #[repr(C)]
697 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
698 pub struct PitChannelState {
699 /// The starting value for the counter.
700 pub count: u32,
701 /// Stores the channel count from the last time the count was latched.
702 pub latched_count: u16,
703 /// Indicates the PitRWState state of reading the latch value.
704 pub count_latched: PitRWState,
705 /// Indicates whether ReadBack status has been latched.
706 pub status_latched: bool,
707 /// Stores the channel status from the last time the status was latched. The status contains
708 /// information about the access mode of this channel, but changing those bits in the status
709 /// will not change the behavior of the pit.
710 pub status: u8,
711 /// Indicates the PitRWState state of reading the counter.
712 pub read_state: PitRWState,
713 /// Indicates the PitRWState state of writing the counter.
714 pub write_state: PitRWState,
715 /// Stores the value with which the counter was initialized. Counters are 16-
716 /// bit values with an effective range of 1-65536 (65536 represented by 0).
717 pub reload_value: u16,
718 /// The command access mode of this channel.
719 pub rw_mode: PitRWMode,
720 /// The operation mode of this channel.
721 pub mode: u8,
722 /// Whether or not we are in bcd mode. Not supported by KVM or crosvm's PIT implementation.
723 pub bcd: bool,
724 /// Value of the gate input pin. This only applies to channel 2.
725 pub gate: bool,
726 /// Nanosecond timestamp of when the count value was loaded.
727 pub count_load_time: u64,
728 }
729
730 // Convenience constructors for IrqRoutes
731 impl IrqRoute {
ioapic_irq_route(irq_num: u32) -> IrqRoute732 pub fn ioapic_irq_route(irq_num: u32) -> IrqRoute {
733 IrqRoute {
734 gsi: irq_num,
735 source: IrqSource::Irqchip {
736 chip: IrqSourceChip::Ioapic,
737 pin: irq_num,
738 },
739 }
740 }
741
pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute742 pub fn pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute {
743 IrqRoute {
744 gsi: irq_num,
745 source: IrqSource::Irqchip {
746 chip: id,
747 pin: irq_num % 8,
748 },
749 }
750 }
751 }
752
753 /// State of a VCPU's general purpose registers.
754 #[repr(C)]
755 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
756 pub struct Regs {
757 pub rax: u64,
758 pub rbx: u64,
759 pub rcx: u64,
760 pub rdx: u64,
761 pub rsi: u64,
762 pub rdi: u64,
763 pub rsp: u64,
764 pub rbp: u64,
765 pub r8: u64,
766 pub r9: u64,
767 pub r10: u64,
768 pub r11: u64,
769 pub r12: u64,
770 pub r13: u64,
771 pub r14: u64,
772 pub r15: u64,
773 pub rip: u64,
774 pub rflags: u64,
775 }
776
777 impl Default for Regs {
default() -> Self778 fn default() -> Self {
779 Regs {
780 rax: 0,
781 rbx: 0,
782 rcx: 0,
783 rdx: 0,
784 rsi: 0,
785 rdi: 0,
786 rsp: 0,
787 rbp: 0,
788 r8: 0,
789 r9: 0,
790 r10: 0,
791 r11: 0,
792 r12: 0,
793 r13: 0,
794 r14: 0,
795 r15: 0,
796 rip: 0xfff0, // Reset vector.
797 rflags: 0x2, // Bit 1 (0x2) is always 1.
798 }
799 }
800 }
801
802 /// State of a memory segment.
803 #[repr(C)]
804 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
805 pub struct Segment {
806 pub base: u64,
807 pub limit: u32,
808 pub selector: u16,
809 pub type_: u8,
810 pub present: u8,
811 pub dpl: u8,
812 pub db: u8,
813 pub s: u8,
814 pub l: u8,
815 pub g: u8,
816 pub avl: u8,
817 }
818
819 /// State of a global descriptor table or interrupt descriptor table.
820 #[repr(C)]
821 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
822 pub struct DescriptorTable {
823 pub base: u64,
824 pub limit: u16,
825 }
826
827 /// State of a VCPU's special registers.
828 #[repr(C)]
829 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
830 pub struct Sregs {
831 pub cs: Segment,
832 pub ds: Segment,
833 pub es: Segment,
834 pub fs: Segment,
835 pub gs: Segment,
836 pub ss: Segment,
837 pub tr: Segment,
838 pub ldt: Segment,
839 pub gdt: DescriptorTable,
840 pub idt: DescriptorTable,
841 pub cr0: u64,
842 pub cr2: u64,
843 pub cr3: u64,
844 pub cr4: u64,
845 pub cr8: u64,
846 pub efer: u64,
847 }
848
849 impl Default for Sregs {
default() -> Self850 fn default() -> Self {
851 // Intel SDM Vol. 3A, 3.4.5.1 ("Code- and Data-Segment Descriptor Types")
852 const SEG_TYPE_DATA: u8 = 0b0000;
853 const SEG_TYPE_DATA_WRITABLE: u8 = 0b0010;
854
855 const SEG_TYPE_CODE: u8 = 0b1000;
856 const SEG_TYPE_CODE_READABLE: u8 = 0b0010;
857
858 const SEG_TYPE_ACCESSED: u8 = 0b0001;
859
860 // Intel SDM Vol. 3A, 3.4.5 ("Segment Descriptors")
861 const SEG_S_SYSTEM: u8 = 0; // System segment.
862 const SEG_S_CODE_OR_DATA: u8 = 1; // Data/code segment.
863
864 // 16-bit real-mode code segment (reset vector).
865 let code_seg = Segment {
866 base: 0xffff0000,
867 limit: 0xffff,
868 selector: 0xf000,
869 type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11
870 present: 1,
871 s: SEG_S_CODE_OR_DATA,
872 ..Default::default()
873 };
874
875 // 16-bit real-mode data segment.
876 let data_seg = Segment {
877 base: 0,
878 limit: 0xffff,
879 selector: 0,
880 type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE | SEG_TYPE_ACCESSED, // 3
881 present: 1,
882 s: SEG_S_CODE_OR_DATA,
883 ..Default::default()
884 };
885
886 // 16-bit TSS segment.
887 let task_seg = Segment {
888 base: 0,
889 limit: 0xffff,
890 selector: 0,
891 type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11
892 present: 1,
893 s: SEG_S_SYSTEM,
894 ..Default::default()
895 };
896
897 // Local descriptor table.
898 let ldt = Segment {
899 base: 0,
900 limit: 0xffff,
901 selector: 0,
902 type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE, // 2
903 present: 1,
904 s: SEG_S_SYSTEM,
905 ..Default::default()
906 };
907
908 // Global descriptor table.
909 let gdt = DescriptorTable {
910 base: 0,
911 limit: 0xffff,
912 };
913
914 // Interrupt descriptor table.
915 let idt = DescriptorTable {
916 base: 0,
917 limit: 0xffff,
918 };
919
920 let cr0 = (1 << 4) // CR0.ET (reserved, always 1)
921 | (1 << 30); // CR0.CD (cache disable)
922
923 Sregs {
924 cs: code_seg,
925 ds: data_seg,
926 es: data_seg,
927 fs: data_seg,
928 gs: data_seg,
929 ss: data_seg,
930 tr: task_seg,
931 ldt,
932 gdt,
933 idt,
934 cr0,
935 cr2: 0,
936 cr3: 0,
937 cr4: 0,
938 cr8: 0,
939 efer: 0,
940 }
941 }
942 }
943
944 /// State of a VCPU's floating point unit.
945 #[repr(C)]
946 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
947 pub struct Fpu {
948 pub fpr: [[u8; 16usize]; 8usize],
949 pub fcw: u16,
950 pub fsw: u16,
951 pub ftwx: u8,
952 pub last_opcode: u16,
953 pub last_ip: u64,
954 pub last_dp: u64,
955 pub xmm: [[u8; 16usize]; 16usize],
956 pub mxcsr: u32,
957 }
958
959 impl Default for Fpu {
default() -> Self960 fn default() -> Self {
961 Fpu {
962 fpr: Default::default(),
963 fcw: 0x37f, // Intel SDM Vol. 1, 13.6
964 fsw: 0,
965 ftwx: 0,
966 last_opcode: 0,
967 last_ip: 0,
968 last_dp: 0,
969 xmm: Default::default(),
970 mxcsr: 0x1f80, // Intel SDM Vol. 1, 11.6.4
971 }
972 }
973 }
974
975 /// State of a VCPU's debug registers.
976 #[repr(C)]
977 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
978 pub struct DebugRegs {
979 pub db: [u64; 4usize],
980 pub dr6: u64,
981 pub dr7: u64,
982 }
983
984 /// The hybrid type for intel hybrid CPU.
985 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
986 pub enum CpuHybridType {
987 /// Intel Atom.
988 Atom,
989 /// Intel Core.
990 Core,
991 }
992
993 /// State of the VCPU's x87 FPU, MMX, XMM, YMM registers.
994 /// May contain more state depending on enabled extensions.
995 #[derive(Clone, Debug, Serialize, Deserialize)]
996 pub struct Xsave {
997 data: Vec<u32>,
998
999 // Actual length in bytes. May be smaller than data if a non-u32 multiple of bytes is
1000 // requested.
1001 len: usize,
1002 }
1003
1004 impl Xsave {
1005 /// Create a new buffer to store Xsave data.
1006 ///
1007 /// # Argments
1008 /// * `len` size in bytes.
new(len: usize) -> Self1009 pub fn new(len: usize) -> Self {
1010 Xsave {
1011 data: vec![0; (len + 3) / 4],
1012 len,
1013 }
1014 }
1015
as_ptr(&self) -> *const c_void1016 pub fn as_ptr(&self) -> *const c_void {
1017 self.data.as_ptr() as *const c_void
1018 }
1019
as_mut_ptr(&mut self) -> *mut c_void1020 pub fn as_mut_ptr(&mut self) -> *mut c_void {
1021 self.data.as_mut_ptr() as *mut c_void
1022 }
1023
1024 /// Length in bytes of the XSAVE data.
len(&self) -> usize1025 pub fn len(&self) -> usize {
1026 self.len
1027 }
1028
1029 /// Returns true is length of XSAVE data is zero
is_empty(&self) -> bool1030 pub fn is_empty(&self) -> bool {
1031 self.len() == 0
1032 }
1033 }
1034