1 // Copyright 2020 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use serde::{Deserialize, Serialize}; 6 7 use base::{error, Result}; 8 use bit_field::*; 9 use downcast_rs::impl_downcast; 10 11 use vm_memory::GuestAddress; 12 13 use crate::{Hypervisor, IrqRoute, IrqSource, IrqSourceChip, Vcpu, Vm}; 14 15 /// A trait for managing cpuids for an x86_64 hypervisor and for checking its capabilities. 16 pub trait HypervisorX86_64: Hypervisor { 17 /// Get the system supported CPUID values. get_supported_cpuid(&self) -> Result<CpuId>18 fn get_supported_cpuid(&self) -> Result<CpuId>; 19 20 /// Get the system emulated CPUID values. get_emulated_cpuid(&self) -> Result<CpuId>21 fn get_emulated_cpuid(&self) -> Result<CpuId>; 22 23 /// Gets the list of supported MSRs. get_msr_index_list(&self) -> Result<Vec<u32>>24 fn get_msr_index_list(&self) -> Result<Vec<u32>>; 25 } 26 27 /// A wrapper for using a VM on x86_64 and getting/setting its state. 28 pub trait VmX86_64: Vm { 29 /// Gets the `HypervisorX86_64` that created this VM. get_hypervisor(&self) -> &dyn HypervisorX86_6430 fn get_hypervisor(&self) -> &dyn HypervisorX86_64; 31 32 /// Create a Vcpu with the specified Vcpu ID. create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>33 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>; 34 35 /// Sets the address of the three-page region in the VM's address space. set_tss_addr(&self, addr: GuestAddress) -> Result<()>36 fn set_tss_addr(&self, addr: GuestAddress) -> Result<()>; 37 38 /// Sets the address of a one-page region in the VM's address space. set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>39 fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>; 40 } 41 42 /// A wrapper around creating and using a VCPU on x86_64. 43 pub trait VcpuX86_64: Vcpu { 44 /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject 45 /// interrupts into the guest. set_interrupt_window_requested(&self, requested: bool)46 fn set_interrupt_window_requested(&self, requested: bool); 47 48 /// Checks if we can inject an interrupt into the VCPU. ready_for_interrupt(&self) -> bool49 fn ready_for_interrupt(&self) -> bool; 50 51 /// Injects interrupt vector `irq` into the VCPU. interrupt(&self, irq: u32) -> Result<()>52 fn interrupt(&self, irq: u32) -> Result<()>; 53 54 /// Injects a non-maskable interrupt into the VCPU. inject_nmi(&self) -> Result<()>55 fn inject_nmi(&self) -> Result<()>; 56 57 /// Gets the VCPU general purpose registers. get_regs(&self) -> Result<Regs>58 fn get_regs(&self) -> Result<Regs>; 59 60 /// Sets the VCPU general purpose registers. set_regs(&self, regs: &Regs) -> Result<()>61 fn set_regs(&self, regs: &Regs) -> Result<()>; 62 63 /// Gets the VCPU special registers. get_sregs(&self) -> Result<Sregs>64 fn get_sregs(&self) -> Result<Sregs>; 65 66 /// Sets the VCPU special registers. set_sregs(&self, sregs: &Sregs) -> Result<()>67 fn set_sregs(&self, sregs: &Sregs) -> Result<()>; 68 69 /// Gets the VCPU FPU registers. get_fpu(&self) -> Result<Fpu>70 fn get_fpu(&self) -> Result<Fpu>; 71 72 /// Sets the VCPU FPU registers. set_fpu(&self, fpu: &Fpu) -> Result<()>73 fn set_fpu(&self, fpu: &Fpu) -> Result<()>; 74 75 /// Gets the VCPU debug registers. get_debugregs(&self) -> Result<DebugRegs>76 fn get_debugregs(&self) -> Result<DebugRegs>; 77 78 /// Sets the VCPU debug registers. set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>79 fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>; 80 81 /// Gets the VCPU extended control registers. get_xcrs(&self) -> Result<Vec<Register>>82 fn get_xcrs(&self) -> Result<Vec<Register>>; 83 84 /// Sets the VCPU extended control registers. set_xcrs(&self, xcrs: &[Register]) -> Result<()>85 fn set_xcrs(&self, xcrs: &[Register]) -> Result<()>; 86 87 /// Gets the model-specific registers. `msrs` specifies the MSR indexes to be queried, and 88 /// on success contains their indexes and values. get_msrs(&self, msrs: &mut Vec<Register>) -> Result<()>89 fn get_msrs(&self, msrs: &mut Vec<Register>) -> Result<()>; 90 91 /// Sets the model-specific registers. set_msrs(&self, msrs: &[Register]) -> Result<()>92 fn set_msrs(&self, msrs: &[Register]) -> Result<()>; 93 94 /// Sets up the data returned by the CPUID instruction. set_cpuid(&self, cpuid: &CpuId) -> Result<()>95 fn set_cpuid(&self, cpuid: &CpuId) -> Result<()>; 96 97 /// Gets the system emulated hyper-v CPUID values. get_hyperv_cpuid(&self) -> Result<CpuId>98 fn get_hyperv_cpuid(&self) -> Result<CpuId>; 99 100 /// Sets up debug registers and configure vcpu for handling guest debug events. set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>101 fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>; 102 } 103 104 impl_downcast!(VcpuX86_64); 105 106 /// A CpuId Entry contains supported feature information for the given processor. 107 /// This can be modified by the hypervisor to pass additional information to the guest kernel 108 /// about the hypervisor or vm. Information is returned in the eax, ebx, ecx and edx registers 109 /// by the cpu for a given function and index/subfunction (passed into the cpu via the eax and ecx 110 /// register respectively). 111 #[repr(C)] 112 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] 113 pub struct CpuIdEntry { 114 pub function: u32, 115 pub index: u32, 116 // flags is needed for KVM. We store it on CpuIdEntry to preserve the flags across 117 // get_supported_cpuids() -> kvm_cpuid2 -> CpuId -> kvm_cpuid2 -> set_cpuid(). 118 pub flags: u32, 119 pub eax: u32, 120 pub ebx: u32, 121 pub ecx: u32, 122 pub edx: u32, 123 } 124 125 /// A container for the list of cpu id entries for the hypervisor and underlying cpu. 126 pub struct CpuId { 127 pub cpu_id_entries: Vec<CpuIdEntry>, 128 } 129 130 impl CpuId { 131 /// Constructs a new CpuId, with space allocated for `initial_capacity` CpuIdEntries. new(initial_capacity: usize) -> Self132 pub fn new(initial_capacity: usize) -> Self { 133 CpuId { 134 cpu_id_entries: Vec::with_capacity(initial_capacity), 135 } 136 } 137 } 138 139 #[bitfield] 140 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 141 pub enum DestinationMode { 142 Physical = 0, 143 Logical = 1, 144 } 145 146 #[bitfield] 147 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 148 pub enum TriggerMode { 149 Edge = 0, 150 Level = 1, 151 } 152 153 #[bitfield] 154 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 155 pub enum DeliveryMode { 156 Fixed = 0b000, 157 Lowest = 0b001, 158 SMI = 0b010, // System management interrupt 159 RemoteRead = 0b011, // This is no longer supported by intel. 160 NMI = 0b100, // Non maskable interrupt 161 Init = 0b101, 162 Startup = 0b110, 163 External = 0b111, 164 } 165 166 // These MSI structures are for Intel's implementation of MSI. The PCI spec defines most of MSI, 167 // but the Intel spec defines the format of messages for raising interrupts. The PCI spec defines 168 // three u32s -- the address, address_high, and data -- but Intel only makes use of the address and 169 // data. The Intel portion of the specification is in Volume 3 section 10.11. 170 #[bitfield] 171 #[derive(Clone, Copy, PartialEq, Eq)] 172 pub struct MsiAddressMessage { 173 pub reserved: BitField2, 174 #[bits = 1] 175 pub destination_mode: DestinationMode, 176 pub redirection_hint: BitField1, 177 pub reserved_2: BitField8, 178 pub destination_id: BitField8, 179 // According to Intel's implementation of MSI, these bits must always be 0xfee. 180 pub always_0xfee: BitField12, 181 } 182 183 #[bitfield] 184 #[derive(Clone, Copy, PartialEq, Eq)] 185 pub struct MsiDataMessage { 186 pub vector: BitField8, 187 #[bits = 3] 188 pub delivery_mode: DeliveryMode, 189 pub reserved: BitField3, 190 #[bits = 1] 191 pub level: Level, 192 #[bits = 1] 193 pub trigger: TriggerMode, 194 pub reserved2: BitField16, 195 } 196 197 #[bitfield] 198 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 199 pub enum DeliveryStatus { 200 Idle = 0, 201 Pending = 1, 202 } 203 204 /// The level of a level-triggered interrupt: asserted or deasserted. 205 #[bitfield] 206 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 207 pub enum Level { 208 Deassert = 0, 209 Assert = 1, 210 } 211 212 /// Represents a IOAPIC redirection table entry. 213 #[bitfield] 214 #[derive(Clone, Copy, Default, PartialEq, Eq)] 215 pub struct IoapicRedirectionTableEntry { 216 vector: BitField8, 217 #[bits = 3] 218 delivery_mode: DeliveryMode, 219 #[bits = 1] 220 dest_mode: DestinationMode, 221 #[bits = 1] 222 delivery_status: DeliveryStatus, 223 polarity: BitField1, 224 remote_irr: bool, 225 #[bits = 1] 226 trigger_mode: TriggerMode, 227 interrupt_mask: bool, // true iff interrupts are masked. 228 reserved: BitField39, 229 dest_id: BitField8, 230 } 231 232 /// Number of pins on the IOAPIC. 233 pub const NUM_IOAPIC_PINS: usize = 24; 234 235 /// Represents the state of the IOAPIC. 236 #[repr(C)] 237 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] 238 pub struct IoapicState { 239 /// base_address is the memory base address for this IOAPIC. It cannot be changed. 240 pub base_address: u64, 241 /// ioregsel register. Used for selecting which entry of the redirect table to read/write. 242 pub ioregsel: u8, 243 /// ioapicid register. Bits 24 - 27 contain the APIC ID for this device. 244 pub ioapicid: u32, 245 /// current_interrupt_level_bitmap represents a bitmap of the state of all of the irq lines 246 pub current_interrupt_level_bitmap: u32, 247 /// redirect_table contains the irq settings for each irq line 248 pub redirect_table: [IoapicRedirectionTableEntry; 24], 249 } 250 251 #[repr(C)] 252 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 253 pub enum PicSelect { 254 Primary = 0, 255 Secondary = 1, 256 } 257 258 #[repr(C)] 259 #[derive(enumn::N, Debug, Clone, Copy, PartialEq, Eq)] 260 pub enum PicInitState { 261 Icw1 = 0, 262 Icw2 = 1, 263 Icw3 = 2, 264 Icw4 = 3, 265 } 266 267 /// Convenience implementation for converting from a u8 268 impl From<u8> for PicInitState { from(item: u8) -> Self269 fn from(item: u8) -> Self { 270 PicInitState::n(item).unwrap_or_else(|| { 271 error!("Invalid PicInitState {}, setting to 0", item); 272 PicInitState::Icw1 273 }) 274 } 275 } 276 277 impl Default for PicInitState { default() -> Self278 fn default() -> Self { 279 PicInitState::Icw1 280 } 281 } 282 283 /// Represents the state of the PIC. 284 #[repr(C)] 285 #[derive(Clone, Copy, Default, Debug, PartialEq, Eq)] 286 pub struct PicState { 287 /// Edge detection. 288 pub last_irr: u8, 289 /// Interrupt Request Register. 290 pub irr: u8, 291 /// Interrupt Mask Register. 292 pub imr: u8, 293 /// Interrupt Service Register. 294 pub isr: u8, 295 /// Highest priority, for priority rotation. 296 pub priority_add: u8, 297 pub irq_base: u8, 298 pub read_reg_select: bool, 299 pub poll: bool, 300 pub special_mask: bool, 301 pub init_state: PicInitState, 302 pub auto_eoi: bool, 303 pub rotate_on_auto_eoi: bool, 304 pub special_fully_nested_mode: bool, 305 /// PIC takes either 3 or 4 bytes of initialization command word during 306 /// initialization. use_4_byte_icw is true if 4 bytes of ICW are needed. 307 pub use_4_byte_icw: bool, 308 /// "Edge/Level Control Registers", for edge trigger selection. 309 /// When a particular bit is set, the corresponding IRQ is in level-triggered mode. Otherwise it 310 /// is in edge-triggered mode. 311 pub elcr: u8, 312 pub elcr_mask: u8, 313 } 314 315 /// The LapicState represents the state of an x86 CPU's Local APIC. 316 /// The Local APIC consists of 64 128-bit registers, but only the first 32-bits of each register 317 /// can be used, so this structure only stores the first 32-bits of each register. 318 #[repr(C)] 319 #[derive(Clone, Copy)] 320 pub struct LapicState { 321 pub regs: [LapicRegister; 64], 322 } 323 324 pub type LapicRegister = u32; 325 326 // rust arrays longer than 32 need custom implementations of Debug 327 impl std::fmt::Debug for LapicState { fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result328 fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { 329 self.regs[..].fmt(formatter) 330 } 331 } 332 333 // rust arrays longer than 32 need custom implementations of PartialEq 334 impl PartialEq for LapicState { eq(&self, other: &LapicState) -> bool335 fn eq(&self, other: &LapicState) -> bool { 336 self.regs[..] == other.regs[..] 337 } 338 } 339 340 // Lapic equality is reflexive, so we impl Eq 341 impl Eq for LapicState {} 342 343 /// The PitState represents the state of the PIT (aka the Programmable Interval Timer). 344 /// The state is simply the state of it's three channels. 345 #[repr(C)] 346 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 347 pub struct PitState { 348 pub channels: [PitChannelState; 3], 349 /// Hypervisor-specific flags for setting the pit state. 350 pub flags: u32, 351 } 352 353 /// The PitRWMode enum represents the access mode of a PIT channel. 354 /// Reads and writes to the Pit happen over Port-mapped I/O, which happens one byte at a time, 355 /// but the count values and latch values are two bytes. So the access mode controls which of the 356 /// two bytes will be read when. 357 #[repr(C)] 358 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq)] 359 pub enum PitRWMode { 360 /// None mode means that no access mode has been set. 361 None = 0, 362 /// Least mode means all reads/writes will read/write the least significant byte. 363 Least = 1, 364 /// Most mode means all reads/writes will read/write the most significant byte. 365 Most = 2, 366 /// Both mode means first the least significant byte will be read/written, then the 367 /// next read/write will read/write the most significant byte. 368 Both = 3, 369 } 370 371 /// Convenience implementation for converting from a u8 372 impl From<u8> for PitRWMode { from(item: u8) -> Self373 fn from(item: u8) -> Self { 374 PitRWMode::n(item).unwrap_or_else(|| { 375 error!("Invalid PitRWMode value {}, setting to 0", item); 376 PitRWMode::None 377 }) 378 } 379 } 380 381 /// The PitRWState enum represents the state of reading to or writing from a channel. 382 /// This is related to the PitRWMode, it mainly gives more detail about the state of the channel 383 /// with respect to PitRWMode::Both. 384 #[repr(C)] 385 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq)] 386 pub enum PitRWState { 387 /// None mode means that no access mode has been set. 388 None = 0, 389 /// LSB means that the channel is in PitRWMode::Least access mode. 390 LSB = 1, 391 /// MSB means that the channel is in PitRWMode::Most access mode. 392 MSB = 2, 393 /// Word0 means that the channel is in PitRWMode::Both mode, and the least sginificant byte 394 /// has not been read/written yet. 395 Word0 = 3, 396 /// Word1 means that the channel is in PitRWMode::Both mode and the least significant byte 397 /// has already been read/written, and the next byte to be read/written will be the most 398 /// significant byte. 399 Word1 = 4, 400 } 401 402 /// Convenience implementation for converting from a u8 403 impl From<u8> for PitRWState { from(item: u8) -> Self404 fn from(item: u8) -> Self { 405 PitRWState::n(item).unwrap_or_else(|| { 406 error!("Invalid PitRWState value {}, setting to 0", item); 407 PitRWState::None 408 }) 409 } 410 } 411 412 /// The PitChannelState represents the state of one of the PIT's three counters. 413 #[repr(C)] 414 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 415 pub struct PitChannelState { 416 /// The starting value for the counter. 417 pub count: u32, 418 /// Stores the channel count from the last time the count was latched. 419 pub latched_count: u16, 420 /// Indicates the PitRWState state of reading the latch value. 421 pub count_latched: PitRWState, 422 /// Indicates whether ReadBack status has been latched. 423 pub status_latched: bool, 424 /// Stores the channel status from the last time the status was latched. The status contains 425 /// information about the access mode of this channel, but changing those bits in the status 426 /// will not change the behavior of the pit. 427 pub status: u8, 428 /// Indicates the PitRWState state of reading the counter. 429 pub read_state: PitRWState, 430 /// Indicates the PitRWState state of writing the counter. 431 pub write_state: PitRWState, 432 /// Stores the value with which the counter was initialized. Counters are 16- 433 /// bit values with an effective range of 1-65536 (65536 represented by 0). 434 pub reload_value: u16, 435 /// The command access mode of this channel. 436 pub rw_mode: PitRWMode, 437 /// The operation mode of this channel. 438 pub mode: u8, 439 /// Whether or not we are in bcd mode. Not supported by KVM or crosvm's PIT implementation. 440 pub bcd: bool, 441 /// Value of the gate input pin. This only applies to channel 2. 442 pub gate: bool, 443 /// Nanosecond timestamp of when the count value was loaded. 444 pub count_load_time: u64, 445 } 446 447 // Convenience constructors for IrqRoutes 448 impl IrqRoute { ioapic_irq_route(irq_num: u32) -> IrqRoute449 pub fn ioapic_irq_route(irq_num: u32) -> IrqRoute { 450 IrqRoute { 451 gsi: irq_num, 452 source: IrqSource::Irqchip { 453 chip: IrqSourceChip::Ioapic, 454 pin: irq_num, 455 }, 456 } 457 } 458 pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute459 pub fn pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute { 460 IrqRoute { 461 gsi: irq_num, 462 source: IrqSource::Irqchip { 463 chip: id, 464 pin: irq_num % 8, 465 }, 466 } 467 } 468 } 469 470 /// State of a VCPU's general purpose registers. 471 #[repr(C)] 472 #[derive(Debug, Default, Copy, Clone)] 473 pub struct Regs { 474 pub rax: u64, 475 pub rbx: u64, 476 pub rcx: u64, 477 pub rdx: u64, 478 pub rsi: u64, 479 pub rdi: u64, 480 pub rsp: u64, 481 pub rbp: u64, 482 pub r8: u64, 483 pub r9: u64, 484 pub r10: u64, 485 pub r11: u64, 486 pub r12: u64, 487 pub r13: u64, 488 pub r14: u64, 489 pub r15: u64, 490 pub rip: u64, 491 pub rflags: u64, 492 } 493 494 /// State of a memory segment. 495 #[repr(C)] 496 #[derive(Debug, Default, Copy, Clone)] 497 pub struct Segment { 498 pub base: u64, 499 pub limit: u32, 500 pub selector: u16, 501 pub type_: u8, 502 pub present: u8, 503 pub dpl: u8, 504 pub db: u8, 505 pub s: u8, 506 pub l: u8, 507 pub g: u8, 508 pub avl: u8, 509 } 510 511 /// State of a global descriptor table or interrupt descriptor table. 512 #[repr(C)] 513 #[derive(Debug, Default, Copy, Clone)] 514 pub struct DescriptorTable { 515 pub base: u64, 516 pub limit: u16, 517 } 518 519 /// State of a VCPU's special registers. 520 #[repr(C)] 521 #[derive(Debug, Default, Copy, Clone)] 522 pub struct Sregs { 523 pub cs: Segment, 524 pub ds: Segment, 525 pub es: Segment, 526 pub fs: Segment, 527 pub gs: Segment, 528 pub ss: Segment, 529 pub tr: Segment, 530 pub ldt: Segment, 531 pub gdt: DescriptorTable, 532 pub idt: DescriptorTable, 533 pub cr0: u64, 534 pub cr2: u64, 535 pub cr3: u64, 536 pub cr4: u64, 537 pub cr8: u64, 538 pub efer: u64, 539 pub apic_base: u64, 540 541 /// A bitmap of pending external interrupts. At most one bit may be set. This interrupt has 542 /// been acknowledged by the APIC but not yet injected into the cpu core. 543 pub interrupt_bitmap: [u64; 4usize], 544 } 545 546 /// State of a VCPU's floating point unit. 547 #[repr(C)] 548 #[derive(Debug, Default, Copy, Clone)] 549 pub struct Fpu { 550 pub fpr: [[u8; 16usize]; 8usize], 551 pub fcw: u16, 552 pub fsw: u16, 553 pub ftwx: u8, 554 pub last_opcode: u16, 555 pub last_ip: u64, 556 pub last_dp: u64, 557 pub xmm: [[u8; 16usize]; 16usize], 558 pub mxcsr: u32, 559 } 560 561 /// State of a VCPU's debug registers. 562 #[repr(C)] 563 #[derive(Debug, Default, Copy, Clone)] 564 pub struct DebugRegs { 565 pub db: [u64; 4usize], 566 pub dr6: u64, 567 pub dr7: u64, 568 } 569 570 /// State of one VCPU register. Currently used for MSRs and XCRs. 571 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)] 572 pub struct Register { 573 pub id: u32, 574 pub value: u64, 575 } 576