1 // Copyright 2020 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use serde::{Deserialize, Serialize}; 6 7 use base::{error, Result}; 8 use bit_field::*; 9 use downcast_rs::impl_downcast; 10 11 use vm_memory::GuestAddress; 12 13 use crate::{Hypervisor, IrqRoute, IrqSource, IrqSourceChip, Vcpu, Vm}; 14 15 /// A trait for managing cpuids for an x86_64 hypervisor and for checking its capabilities. 16 pub trait HypervisorX86_64: Hypervisor { 17 /// Get the system supported CPUID values. get_supported_cpuid(&self) -> Result<CpuId>18 fn get_supported_cpuid(&self) -> Result<CpuId>; 19 20 /// Get the system emulated CPUID values. get_emulated_cpuid(&self) -> Result<CpuId>21 fn get_emulated_cpuid(&self) -> Result<CpuId>; 22 23 /// Gets the list of supported MSRs. get_msr_index_list(&self) -> Result<Vec<u32>>24 fn get_msr_index_list(&self) -> Result<Vec<u32>>; 25 } 26 27 /// A wrapper for using a VM on x86_64 and getting/setting its state. 28 pub trait VmX86_64: Vm { 29 /// Gets the `HypervisorX86_64` that created this VM. get_hypervisor(&self) -> &dyn HypervisorX86_6430 fn get_hypervisor(&self) -> &dyn HypervisorX86_64; 31 32 /// Create a Vcpu with the specified Vcpu ID. create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>33 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>; 34 35 /// Sets the address of the three-page region in the VM's address space. set_tss_addr(&self, addr: GuestAddress) -> Result<()>36 fn set_tss_addr(&self, addr: GuestAddress) -> Result<()>; 37 38 /// Sets the address of a one-page region in the VM's address space. set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>39 fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>; 40 } 41 42 /// A wrapper around creating and using a VCPU on x86_64. 43 pub trait VcpuX86_64: Vcpu { 44 /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject 45 /// interrupts into the guest. set_interrupt_window_requested(&self, requested: bool)46 fn set_interrupt_window_requested(&self, requested: bool); 47 48 /// Checks if we can inject an interrupt into the VCPU. ready_for_interrupt(&self) -> bool49 fn ready_for_interrupt(&self) -> bool; 50 51 /// Injects interrupt vector `irq` into the VCPU. interrupt(&self, irq: u32) -> Result<()>52 fn interrupt(&self, irq: u32) -> Result<()>; 53 54 /// Injects a non-maskable interrupt into the VCPU. inject_nmi(&self) -> Result<()>55 fn inject_nmi(&self) -> Result<()>; 56 57 /// Gets the VCPU general purpose registers. get_regs(&self) -> Result<Regs>58 fn get_regs(&self) -> Result<Regs>; 59 60 /// Sets the VCPU general purpose registers. set_regs(&self, regs: &Regs) -> Result<()>61 fn set_regs(&self, regs: &Regs) -> Result<()>; 62 63 /// Gets the VCPU special registers. get_sregs(&self) -> Result<Sregs>64 fn get_sregs(&self) -> Result<Sregs>; 65 66 /// Sets the VCPU special registers. set_sregs(&self, sregs: &Sregs) -> Result<()>67 fn set_sregs(&self, sregs: &Sregs) -> Result<()>; 68 69 /// Gets the VCPU FPU registers. get_fpu(&self) -> Result<Fpu>70 fn get_fpu(&self) -> Result<Fpu>; 71 72 /// Sets the VCPU FPU registers. set_fpu(&self, fpu: &Fpu) -> Result<()>73 fn set_fpu(&self, fpu: &Fpu) -> Result<()>; 74 75 /// Gets the VCPU debug registers. get_debugregs(&self) -> Result<DebugRegs>76 fn get_debugregs(&self) -> Result<DebugRegs>; 77 78 /// Sets the VCPU debug registers. set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>79 fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>; 80 81 /// Gets the VCPU extended control registers. get_xcrs(&self) -> Result<Vec<Register>>82 fn get_xcrs(&self) -> Result<Vec<Register>>; 83 84 /// Sets the VCPU extended control registers. set_xcrs(&self, xcrs: &[Register]) -> Result<()>85 fn set_xcrs(&self, xcrs: &[Register]) -> Result<()>; 86 87 /// Gets the model-specific registers. `msrs` specifies the MSR indexes to be queried, and 88 /// on success contains their indexes and values. get_msrs(&self, msrs: &mut Vec<Register>) -> Result<()>89 fn get_msrs(&self, msrs: &mut Vec<Register>) -> Result<()>; 90 91 /// Sets the model-specific registers. set_msrs(&self, msrs: &[Register]) -> Result<()>92 fn set_msrs(&self, msrs: &[Register]) -> Result<()>; 93 94 /// Sets up the data returned by the CPUID instruction. set_cpuid(&self, cpuid: &CpuId) -> Result<()>95 fn set_cpuid(&self, cpuid: &CpuId) -> Result<()>; 96 97 /// Gets the system emulated hyper-v CPUID values. get_hyperv_cpuid(&self) -> Result<CpuId>98 fn get_hyperv_cpuid(&self) -> Result<CpuId>; 99 100 /// Sets up debug registers and configure vcpu for handling guest debug events. set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>101 fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>; 102 } 103 104 impl_downcast!(VcpuX86_64); 105 106 /// A CpuId Entry contains supported feature information for the given processor. 107 /// This can be modified by the hypervisor to pass additional information to the guest kernel 108 /// about the hypervisor or vm. Information is returned in the eax, ebx, ecx and edx registers 109 /// by the cpu for a given function and index/subfunction (passed into the cpu via the eax and ecx 110 /// register respectively). 111 #[repr(C)] 112 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] 113 pub struct CpuIdEntry { 114 pub function: u32, 115 pub index: u32, 116 // flags is needed for KVM. We store it on CpuIdEntry to preserve the flags across 117 // get_supported_cpuids() -> kvm_cpuid2 -> CpuId -> kvm_cpuid2 -> set_cpuid(). 118 pub flags: u32, 119 pub eax: u32, 120 pub ebx: u32, 121 pub ecx: u32, 122 pub edx: u32, 123 } 124 125 /// A container for the list of cpu id entries for the hypervisor and underlying cpu. 126 pub struct CpuId { 127 pub cpu_id_entries: Vec<CpuIdEntry>, 128 } 129 130 impl CpuId { 131 /// Constructs a new CpuId, with space allocated for `initial_capacity` CpuIdEntries. new(initial_capacity: usize) -> Self132 pub fn new(initial_capacity: usize) -> Self { 133 CpuId { 134 cpu_id_entries: Vec::with_capacity(initial_capacity), 135 } 136 } 137 } 138 139 #[bitfield] 140 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 141 pub enum DestinationMode { 142 Physical = 0, 143 Logical = 1, 144 } 145 146 #[bitfield] 147 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 148 pub enum TriggerMode { 149 Edge = 0, 150 Level = 1, 151 } 152 153 #[bitfield] 154 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 155 pub enum DeliveryMode { 156 Fixed = 0b000, 157 Lowest = 0b001, 158 SMI = 0b010, // System management interrupt 159 RemoteRead = 0b011, // This is no longer supported by intel. 160 NMI = 0b100, // Non maskable interrupt 161 Init = 0b101, 162 Startup = 0b110, 163 External = 0b111, 164 } 165 166 // These MSI structures are for Intel's implementation of MSI. The PCI spec defines most of MSI, 167 // but the Intel spec defines the format of messages for raising interrupts. The PCI spec defines 168 // three u32s -- the address, address_high, and data -- but Intel only makes use of the address and 169 // data. The Intel portion of the specification is in Volume 3 section 10.11. 170 #[bitfield] 171 #[derive(Clone, Copy, PartialEq, Eq)] 172 pub struct MsiAddressMessage { 173 pub reserved: BitField2, 174 #[bits = 1] 175 pub destination_mode: DestinationMode, 176 pub redirection_hint: BitField1, 177 pub reserved_2: BitField8, 178 pub destination_id: BitField8, 179 // According to Intel's implementation of MSI, these bits must always be 0xfee. 180 pub always_0xfee: BitField12, 181 } 182 183 #[bitfield] 184 #[derive(Clone, Copy, PartialEq, Eq)] 185 pub struct MsiDataMessage { 186 pub vector: BitField8, 187 #[bits = 3] 188 pub delivery_mode: DeliveryMode, 189 pub reserved: BitField3, 190 #[bits = 1] 191 pub level: Level, 192 #[bits = 1] 193 pub trigger: TriggerMode, 194 pub reserved2: BitField16, 195 } 196 197 #[bitfield] 198 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 199 pub enum DeliveryStatus { 200 Idle = 0, 201 Pending = 1, 202 } 203 204 /// The level of a level-triggered interrupt: asserted or deasserted. 205 #[bitfield] 206 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 207 pub enum Level { 208 Deassert = 0, 209 Assert = 1, 210 } 211 212 /// Represents a IOAPIC redirection table entry. 213 #[bitfield] 214 #[derive(Clone, Copy, Default, PartialEq, Eq)] 215 pub struct IoapicRedirectionTableEntry { 216 vector: BitField8, 217 #[bits = 3] 218 delivery_mode: DeliveryMode, 219 #[bits = 1] 220 dest_mode: DestinationMode, 221 #[bits = 1] 222 delivery_status: DeliveryStatus, 223 polarity: BitField1, 224 remote_irr: bool, 225 #[bits = 1] 226 trigger_mode: TriggerMode, 227 interrupt_mask: bool, // true iff interrupts are masked. 228 reserved: BitField39, 229 dest_id: BitField8, 230 } 231 232 /// Number of pins on the standard KVM/IOAPIC. 233 pub const NUM_IOAPIC_PINS: usize = 24; 234 235 /// Maximum number of pins on the IOAPIC. 236 pub const MAX_IOAPIC_PINS: usize = 120; 237 238 /// Represents the state of the IOAPIC. 239 #[repr(C)] 240 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 241 pub struct IoapicState { 242 /// base_address is the memory base address for this IOAPIC. It cannot be changed. 243 pub base_address: u64, 244 /// ioregsel register. Used for selecting which entry of the redirect table to read/write. 245 pub ioregsel: u8, 246 /// ioapicid register. Bits 24 - 27 contain the APIC ID for this device. 247 pub ioapicid: u32, 248 /// current_interrupt_level_bitmap represents a bitmap of the state of all of the irq lines 249 pub current_interrupt_level_bitmap: u32, 250 /// redirect_table contains the irq settings for each irq line 251 pub redirect_table: [IoapicRedirectionTableEntry; 120], 252 } 253 254 impl Default for IoapicState { default() -> IoapicState255 fn default() -> IoapicState { 256 unsafe { std::mem::zeroed() } 257 } 258 } 259 260 #[repr(C)] 261 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 262 pub enum PicSelect { 263 Primary = 0, 264 Secondary = 1, 265 } 266 267 #[repr(C)] 268 #[derive(enumn::N, Debug, Clone, Copy, PartialEq, Eq)] 269 pub enum PicInitState { 270 Icw1 = 0, 271 Icw2 = 1, 272 Icw3 = 2, 273 Icw4 = 3, 274 } 275 276 /// Convenience implementation for converting from a u8 277 impl From<u8> for PicInitState { from(item: u8) -> Self278 fn from(item: u8) -> Self { 279 PicInitState::n(item).unwrap_or_else(|| { 280 error!("Invalid PicInitState {}, setting to 0", item); 281 PicInitState::Icw1 282 }) 283 } 284 } 285 286 impl Default for PicInitState { default() -> Self287 fn default() -> Self { 288 PicInitState::Icw1 289 } 290 } 291 292 /// Represents the state of the PIC. 293 #[repr(C)] 294 #[derive(Clone, Copy, Default, Debug, PartialEq, Eq)] 295 pub struct PicState { 296 /// Edge detection. 297 pub last_irr: u8, 298 /// Interrupt Request Register. 299 pub irr: u8, 300 /// Interrupt Mask Register. 301 pub imr: u8, 302 /// Interrupt Service Register. 303 pub isr: u8, 304 /// Highest priority, for priority rotation. 305 pub priority_add: u8, 306 pub irq_base: u8, 307 pub read_reg_select: bool, 308 pub poll: bool, 309 pub special_mask: bool, 310 pub init_state: PicInitState, 311 pub auto_eoi: bool, 312 pub rotate_on_auto_eoi: bool, 313 pub special_fully_nested_mode: bool, 314 /// PIC takes either 3 or 4 bytes of initialization command word during 315 /// initialization. use_4_byte_icw is true if 4 bytes of ICW are needed. 316 pub use_4_byte_icw: bool, 317 /// "Edge/Level Control Registers", for edge trigger selection. 318 /// When a particular bit is set, the corresponding IRQ is in level-triggered mode. Otherwise it 319 /// is in edge-triggered mode. 320 pub elcr: u8, 321 pub elcr_mask: u8, 322 } 323 324 /// The LapicState represents the state of an x86 CPU's Local APIC. 325 /// The Local APIC consists of 64 128-bit registers, but only the first 32-bits of each register 326 /// can be used, so this structure only stores the first 32-bits of each register. 327 #[repr(C)] 328 #[derive(Clone, Copy)] 329 pub struct LapicState { 330 pub regs: [LapicRegister; 64], 331 } 332 333 pub type LapicRegister = u32; 334 335 // rust arrays longer than 32 need custom implementations of Debug 336 impl std::fmt::Debug for LapicState { fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result337 fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { 338 self.regs[..].fmt(formatter) 339 } 340 } 341 342 // rust arrays longer than 32 need custom implementations of PartialEq 343 impl PartialEq for LapicState { eq(&self, other: &LapicState) -> bool344 fn eq(&self, other: &LapicState) -> bool { 345 self.regs[..] == other.regs[..] 346 } 347 } 348 349 // Lapic equality is reflexive, so we impl Eq 350 impl Eq for LapicState {} 351 352 /// The PitState represents the state of the PIT (aka the Programmable Interval Timer). 353 /// The state is simply the state of it's three channels. 354 #[repr(C)] 355 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 356 pub struct PitState { 357 pub channels: [PitChannelState; 3], 358 /// Hypervisor-specific flags for setting the pit state. 359 pub flags: u32, 360 } 361 362 /// The PitRWMode enum represents the access mode of a PIT channel. 363 /// Reads and writes to the Pit happen over Port-mapped I/O, which happens one byte at a time, 364 /// but the count values and latch values are two bytes. So the access mode controls which of the 365 /// two bytes will be read when. 366 #[repr(C)] 367 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq)] 368 pub enum PitRWMode { 369 /// None mode means that no access mode has been set. 370 None = 0, 371 /// Least mode means all reads/writes will read/write the least significant byte. 372 Least = 1, 373 /// Most mode means all reads/writes will read/write the most significant byte. 374 Most = 2, 375 /// Both mode means first the least significant byte will be read/written, then the 376 /// next read/write will read/write the most significant byte. 377 Both = 3, 378 } 379 380 /// Convenience implementation for converting from a u8 381 impl From<u8> for PitRWMode { from(item: u8) -> Self382 fn from(item: u8) -> Self { 383 PitRWMode::n(item).unwrap_or_else(|| { 384 error!("Invalid PitRWMode value {}, setting to 0", item); 385 PitRWMode::None 386 }) 387 } 388 } 389 390 /// The PitRWState enum represents the state of reading to or writing from a channel. 391 /// This is related to the PitRWMode, it mainly gives more detail about the state of the channel 392 /// with respect to PitRWMode::Both. 393 #[repr(C)] 394 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq)] 395 pub enum PitRWState { 396 /// None mode means that no access mode has been set. 397 None = 0, 398 /// LSB means that the channel is in PitRWMode::Least access mode. 399 LSB = 1, 400 /// MSB means that the channel is in PitRWMode::Most access mode. 401 MSB = 2, 402 /// Word0 means that the channel is in PitRWMode::Both mode, and the least sginificant byte 403 /// has not been read/written yet. 404 Word0 = 3, 405 /// Word1 means that the channel is in PitRWMode::Both mode and the least significant byte 406 /// has already been read/written, and the next byte to be read/written will be the most 407 /// significant byte. 408 Word1 = 4, 409 } 410 411 /// Convenience implementation for converting from a u8 412 impl From<u8> for PitRWState { from(item: u8) -> Self413 fn from(item: u8) -> Self { 414 PitRWState::n(item).unwrap_or_else(|| { 415 error!("Invalid PitRWState value {}, setting to 0", item); 416 PitRWState::None 417 }) 418 } 419 } 420 421 /// The PitChannelState represents the state of one of the PIT's three counters. 422 #[repr(C)] 423 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 424 pub struct PitChannelState { 425 /// The starting value for the counter. 426 pub count: u32, 427 /// Stores the channel count from the last time the count was latched. 428 pub latched_count: u16, 429 /// Indicates the PitRWState state of reading the latch value. 430 pub count_latched: PitRWState, 431 /// Indicates whether ReadBack status has been latched. 432 pub status_latched: bool, 433 /// Stores the channel status from the last time the status was latched. The status contains 434 /// information about the access mode of this channel, but changing those bits in the status 435 /// will not change the behavior of the pit. 436 pub status: u8, 437 /// Indicates the PitRWState state of reading the counter. 438 pub read_state: PitRWState, 439 /// Indicates the PitRWState state of writing the counter. 440 pub write_state: PitRWState, 441 /// Stores the value with which the counter was initialized. Counters are 16- 442 /// bit values with an effective range of 1-65536 (65536 represented by 0). 443 pub reload_value: u16, 444 /// The command access mode of this channel. 445 pub rw_mode: PitRWMode, 446 /// The operation mode of this channel. 447 pub mode: u8, 448 /// Whether or not we are in bcd mode. Not supported by KVM or crosvm's PIT implementation. 449 pub bcd: bool, 450 /// Value of the gate input pin. This only applies to channel 2. 451 pub gate: bool, 452 /// Nanosecond timestamp of when the count value was loaded. 453 pub count_load_time: u64, 454 } 455 456 // Convenience constructors for IrqRoutes 457 impl IrqRoute { ioapic_irq_route(irq_num: u32) -> IrqRoute458 pub fn ioapic_irq_route(irq_num: u32) -> IrqRoute { 459 IrqRoute { 460 gsi: irq_num, 461 source: IrqSource::Irqchip { 462 chip: IrqSourceChip::Ioapic, 463 pin: irq_num, 464 }, 465 } 466 } 467 pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute468 pub fn pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute { 469 IrqRoute { 470 gsi: irq_num, 471 source: IrqSource::Irqchip { 472 chip: id, 473 pin: irq_num % 8, 474 }, 475 } 476 } 477 } 478 479 /// State of a VCPU's general purpose registers. 480 #[repr(C)] 481 #[derive(Debug, Default, Copy, Clone)] 482 pub struct Regs { 483 pub rax: u64, 484 pub rbx: u64, 485 pub rcx: u64, 486 pub rdx: u64, 487 pub rsi: u64, 488 pub rdi: u64, 489 pub rsp: u64, 490 pub rbp: u64, 491 pub r8: u64, 492 pub r9: u64, 493 pub r10: u64, 494 pub r11: u64, 495 pub r12: u64, 496 pub r13: u64, 497 pub r14: u64, 498 pub r15: u64, 499 pub rip: u64, 500 pub rflags: u64, 501 } 502 503 /// State of a memory segment. 504 #[repr(C)] 505 #[derive(Debug, Default, Copy, Clone)] 506 pub struct Segment { 507 pub base: u64, 508 pub limit: u32, 509 pub selector: u16, 510 pub type_: u8, 511 pub present: u8, 512 pub dpl: u8, 513 pub db: u8, 514 pub s: u8, 515 pub l: u8, 516 pub g: u8, 517 pub avl: u8, 518 } 519 520 /// State of a global descriptor table or interrupt descriptor table. 521 #[repr(C)] 522 #[derive(Debug, Default, Copy, Clone)] 523 pub struct DescriptorTable { 524 pub base: u64, 525 pub limit: u16, 526 } 527 528 /// State of a VCPU's special registers. 529 #[repr(C)] 530 #[derive(Debug, Default, Copy, Clone)] 531 pub struct Sregs { 532 pub cs: Segment, 533 pub ds: Segment, 534 pub es: Segment, 535 pub fs: Segment, 536 pub gs: Segment, 537 pub ss: Segment, 538 pub tr: Segment, 539 pub ldt: Segment, 540 pub gdt: DescriptorTable, 541 pub idt: DescriptorTable, 542 pub cr0: u64, 543 pub cr2: u64, 544 pub cr3: u64, 545 pub cr4: u64, 546 pub cr8: u64, 547 pub efer: u64, 548 pub apic_base: u64, 549 550 /// A bitmap of pending external interrupts. At most one bit may be set. This interrupt has 551 /// been acknowledged by the APIC but not yet injected into the cpu core. 552 pub interrupt_bitmap: [u64; 4usize], 553 } 554 555 /// State of a VCPU's floating point unit. 556 #[repr(C)] 557 #[derive(Debug, Default, Copy, Clone)] 558 pub struct Fpu { 559 pub fpr: [[u8; 16usize]; 8usize], 560 pub fcw: u16, 561 pub fsw: u16, 562 pub ftwx: u8, 563 pub last_opcode: u16, 564 pub last_ip: u64, 565 pub last_dp: u64, 566 pub xmm: [[u8; 16usize]; 16usize], 567 pub mxcsr: u32, 568 } 569 570 /// State of a VCPU's debug registers. 571 #[repr(C)] 572 #[derive(Debug, Default, Copy, Clone)] 573 pub struct DebugRegs { 574 pub db: [u64; 4usize], 575 pub dr6: u64, 576 pub dr7: u64, 577 } 578 579 /// State of one VCPU register. Currently used for MSRs and XCRs. 580 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)] 581 pub struct Register { 582 pub id: u32, 583 pub value: u64, 584 } 585