1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! x86 architecture support.
6
7 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
8
9 mod fdt;
10
11 const SETUP_DTB: u32 = 2;
12 const SETUP_RNG_SEED: u32 = 9;
13
14 #[allow(dead_code)]
15 #[allow(non_upper_case_globals)]
16 #[allow(non_camel_case_types)]
17 #[allow(non_snake_case)]
18 pub mod bootparam;
19
20 #[allow(dead_code)]
21 #[allow(non_upper_case_globals)]
22 mod msr_index;
23
24 #[allow(dead_code)]
25 #[allow(non_upper_case_globals)]
26 #[allow(non_camel_case_types)]
27 #[allow(clippy::all)]
28 mod mpspec;
29
30 #[cfg(unix)]
31 pub mod msr;
32
33 pub mod acpi;
34 mod bzimage;
35 pub mod cpuid;
36 mod gdt;
37 pub mod interrupts;
38 pub mod mptable;
39 pub mod regs;
40 pub mod smbios;
41
42 use std::arch::x86_64::CpuidResult;
43 use std::collections::BTreeMap;
44 use std::ffi::CStr;
45 use std::ffi::CString;
46 use std::fs::File;
47 use std::io;
48 use std::io::Seek;
49 use std::mem;
50 use std::path::PathBuf;
51 use std::sync::mpsc;
52 use std::sync::Arc;
53
54 use acpi_tables::aml;
55 use acpi_tables::aml::Aml;
56 use acpi_tables::sdt::SDT;
57 use anyhow::Context;
58 use arch::get_serial_cmdline;
59 use arch::GetSerialCmdlineError;
60 use arch::MsrAction;
61 use arch::MsrConfig;
62 use arch::MsrFilter;
63 use arch::MsrRWType;
64 use arch::MsrValueFrom;
65 use arch::RunnableLinuxVm;
66 use arch::VmComponents;
67 use arch::VmImage;
68 #[cfg(feature = "seccomp_trace")]
69 use base::debug;
70 use base::warn;
71 #[cfg(unix)]
72 use base::AsRawDescriptors;
73 use base::Event;
74 use base::SendTube;
75 use base::Tube;
76 use base::TubeError;
77 use chrono::Utc;
78 pub use cpuid::adjust_cpuid;
79 pub use cpuid::CpuIdContext;
80 use devices::BusDevice;
81 use devices::BusDeviceObj;
82 use devices::BusResumeDevice;
83 use devices::Debugcon;
84 use devices::IrqChip;
85 use devices::IrqChipX86_64;
86 use devices::IrqEventSource;
87 use devices::PciAddress;
88 use devices::PciConfigIo;
89 use devices::PciConfigMmio;
90 use devices::PciDevice;
91 use devices::PciRoot;
92 use devices::PciRootCommand;
93 use devices::PciVirtualConfigMmio;
94 use devices::Pflash;
95 #[cfg(unix)]
96 use devices::ProxyDevice;
97 use devices::Serial;
98 use devices::SerialHardware;
99 use devices::SerialParameters;
100 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
101 use gdbstub_arch::x86::reg::id::X86_64CoreRegId;
102 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
103 use gdbstub_arch::x86::reg::X86SegmentRegs;
104 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
105 use gdbstub_arch::x86::reg::X86_64CoreRegs;
106 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
107 use gdbstub_arch::x86::reg::X87FpuInternalRegs;
108 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
109 use hypervisor::x86_64::Regs;
110 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
111 use hypervisor::x86_64::Sregs;
112 use hypervisor::CpuConfigX86_64;
113 use hypervisor::Hypervisor;
114 use hypervisor::HypervisorX86_64;
115 use hypervisor::ProtectionType;
116 use hypervisor::VcpuInitX86_64;
117 use hypervisor::VcpuX86_64;
118 use hypervisor::Vm;
119 use hypervisor::VmCap;
120 use hypervisor::VmX86_64;
121 #[cfg(feature = "seccomp_trace")]
122 use jail::read_jail_addr;
123 #[cfg(windows)]
124 use jail::FakeMinijailStub as Minijail;
125 #[cfg(unix)]
126 use minijail::Minijail;
127 use once_cell::sync::OnceCell;
128 use rand::rngs::OsRng;
129 use rand::RngCore;
130 use remain::sorted;
131 use resources::AddressRange;
132 use resources::SystemAllocator;
133 use resources::SystemAllocatorConfig;
134 use sync::Mutex;
135 use thiserror::Error;
136 use vm_control::BatControl;
137 use vm_control::BatteryType;
138 use vm_memory::GuestAddress;
139 use vm_memory::GuestMemory;
140 use vm_memory::GuestMemoryError;
141 use vm_memory::MemoryRegionOptions;
142 use zerocopy::AsBytes;
143 use zerocopy::FromBytes;
144
145 use crate::bootparam::boot_params;
146 use crate::cpuid::EDX_HYBRID_CPU_SHIFT;
147 use crate::msr_index::*;
148
149 #[sorted]
150 #[derive(Error, Debug)]
151 pub enum Error {
152 #[error("error allocating a single gpe")]
153 AllocateGpe,
154 #[error("error allocating IO resource: {0}")]
155 AllocateIOResouce(resources::Error),
156 #[error("error allocating a single irq")]
157 AllocateIrq,
158 #[error("unable to clone an Event: {0}")]
159 CloneEvent(base::Error),
160 #[error("failed to clone IRQ chip: {0}")]
161 CloneIrqChip(base::Error),
162 #[cfg(unix)]
163 #[error("failed to clone jail: {0}")]
164 CloneJail(minijail::Error),
165 #[error("unable to clone a Tube: {0}")]
166 CloneTube(TubeError),
167 #[error("the given kernel command line was invalid: {0}")]
168 Cmdline(kernel_cmdline::Error),
169 #[error("failed to configure hotplugged pci device: {0}")]
170 ConfigurePciDevice(arch::DeviceRegistrationError),
171 #[error("failed to configure segment registers: {0}")]
172 ConfigureSegments(regs::Error),
173 #[error("error configuring the system")]
174 ConfigureSystem,
175 #[error("unable to create ACPI tables")]
176 CreateAcpi,
177 #[error("unable to create battery devices: {0}")]
178 CreateBatDevices(arch::DeviceRegistrationError),
179 #[error("could not create debugcon device: {0}")]
180 CreateDebugconDevice(devices::SerialError),
181 #[error("unable to make an Event: {0}")]
182 CreateEvent(base::Error),
183 #[error("failed to create fdt: {0}")]
184 CreateFdt(cros_fdt::Error),
185 #[error("failed to create IOAPIC device: {0}")]
186 CreateIoapicDevice(base::Error),
187 #[error("failed to create a PCI root hub: {0}")]
188 CreatePciRoot(arch::DeviceRegistrationError),
189 #[error("unable to create PIT: {0}")]
190 CreatePit(base::Error),
191 #[error("unable to make PIT device: {0}")]
192 CreatePitDevice(devices::PitError),
193 #[cfg(unix)]
194 #[error("unable to create proxy device: {0}")]
195 CreateProxyDevice(devices::ProxyError),
196 #[error("unable to create serial devices: {0}")]
197 CreateSerialDevices(arch::DeviceRegistrationError),
198 #[error("failed to create socket: {0}")]
199 CreateSocket(io::Error),
200 #[error("failed to create VCPU: {0}")]
201 CreateVcpu(base::Error),
202 #[error("failed to create Virtio MMIO bus: {0}")]
203 CreateVirtioMmioBus(arch::DeviceRegistrationError),
204 #[error("invalid e820 setup params")]
205 E820Configuration,
206 #[cfg(feature = "direct")]
207 #[error("failed to enable ACPI event forwarding: {0}")]
208 EnableAcpiEvent(devices::DirectIrqError),
209 #[error("failed to enable singlestep execution: {0}")]
210 EnableSinglestep(base::Error),
211 #[error("failed to enable split irqchip: {0}")]
212 EnableSplitIrqchip(base::Error),
213 #[error("failed to get serial cmdline: {0}")]
214 GetSerialCmdline(GetSerialCmdlineError),
215 #[error("failed to insert device onto bus: {0}")]
216 InsertBus(devices::BusError),
217 #[error("the kernel extends past the end of RAM")]
218 InvalidCpuConfig,
219 #[error("invalid CPU config parameters")]
220 KernelOffsetPastEnd,
221 #[error("error loading bios: {0}")]
222 LoadBios(io::Error),
223 #[error("error loading kernel bzImage: {0}")]
224 LoadBzImage(bzimage::Error),
225 #[error("error loading command line: {0}")]
226 LoadCmdline(kernel_loader::Error),
227 #[error("error loading initrd: {0}")]
228 LoadInitrd(arch::LoadImageError),
229 #[error("error loading Kernel: {0}")]
230 LoadKernel(kernel_loader::Error),
231 #[error("error loading pflash: {0}")]
232 LoadPflash(io::Error),
233 #[error("error translating address: Page not present")]
234 PageNotPresent,
235 #[error("error reading guest memory {0}")]
236 ReadingGuestMemory(vm_memory::GuestMemoryError),
237 #[error("single register read not supported on x86_64")]
238 ReadRegIsUnsupported,
239 #[error("error reading CPU registers {0}")]
240 ReadRegs(base::Error),
241 #[error("error registering an IrqFd: {0}")]
242 RegisterIrqfd(base::Error),
243 #[error("error registering virtual socket device: {0}")]
244 RegisterVsock(arch::DeviceRegistrationError),
245 #[error("error reserved pcie config mmio")]
246 ReservePcieCfgMmio(resources::Error),
247 #[error("failed to set a hardware breakpoint: {0}")]
248 SetHwBreakpoint(base::Error),
249 #[error("failed to set interrupts: {0}")]
250 SetLint(interrupts::Error),
251 #[error("failed to set tss addr: {0}")]
252 SetTssAddr(base::Error),
253 #[error("failed to set up cmos: {0}")]
254 SetupCmos(anyhow::Error),
255 #[error("failed to set up cpuid: {0}")]
256 SetupCpuid(cpuid::Error),
257 #[error("setup data too large")]
258 SetupDataTooLarge,
259 #[error("failed to set up FPU: {0}")]
260 SetupFpu(base::Error),
261 #[error("failed to set up guest memory: {0}")]
262 SetupGuestMemory(GuestMemoryError),
263 #[error("failed to set up mptable: {0}")]
264 SetupMptable(mptable::Error),
265 #[error("failed to set up MSRs: {0}")]
266 SetupMsrs(base::Error),
267 #[error("failed to set up page tables: {0}")]
268 SetupPageTables(regs::Error),
269 #[error("failed to set up pflash: {0}")]
270 SetupPflash(anyhow::Error),
271 #[error("failed to set up registers: {0}")]
272 SetupRegs(regs::Error),
273 #[error("failed to set up SMBIOS: {0}")]
274 SetupSmbios(smbios::Error),
275 #[error("failed to set up sregs: {0}")]
276 SetupSregs(base::Error),
277 #[error("failed to translate virtual address")]
278 TranslatingVirtAddr,
279 #[error("protected VMs not supported on x86_64")]
280 UnsupportedProtectionType,
281 #[error("single register write not supported on x86_64")]
282 WriteRegIsUnsupported,
283 #[error("error writing CPU registers {0}")]
284 WriteRegs(base::Error),
285 #[error("error writing guest memory {0}")]
286 WritingGuestMemory(GuestMemoryError),
287 #[error("error writing setup_data: {0}")]
288 WritingSetupData(GuestMemoryError),
289 #[error("the zero page extends past the end of guest_mem")]
290 ZeroPagePastRamEnd,
291 #[error("error writing the zero page of guest memory")]
292 ZeroPageSetup,
293 }
294
295 pub type Result<T> = std::result::Result<T, Error>;
296
297 pub struct X8664arch;
298
299 // Like `bootparam::setup_data` without the incomplete array field at the end, which allows us to
300 // safely implement Copy, Clone
301 #[repr(C)]
302 #[derive(Copy, Clone, Default, FromBytes, AsBytes)]
303 struct setup_data_hdr {
304 pub next: u64,
305 pub type_: u32,
306 pub len: u32,
307 }
308
309 #[repr(u32)]
310 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
311 pub enum SetupDataType {
312 Dtb = SETUP_DTB,
313 RngSeed = SETUP_RNG_SEED,
314 }
315
316 /// A single entry to be inserted in the bootparam `setup_data` linked list.
317 pub struct SetupData {
318 pub data: Vec<u8>,
319 pub type_: SetupDataType,
320 }
321
322 enum E820Type {
323 Ram = 0x01,
324 Reserved = 0x2,
325 }
326
327 const MB: u64 = 1 << 20;
328 const GB: u64 = 1 << 30;
329
330 pub const BOOT_STACK_POINTER: u64 = 0x8000;
331 const START_OF_RAM_32BITS: u64 = if cfg!(feature = "direct") { 0x1000 } else { 0 };
332 const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32;
333 // Linux (with 4-level paging) has a physical memory limit of 46 bits (64 TiB).
334 const HIGH_MMIO_MAX_END: u64 = (1u64 << 46) - 1;
335 pub const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
336 pub const ZERO_PAGE_OFFSET: u64 = 0x7000;
337 const TSS_ADDR: u64 = 0xfffb_d000;
338
339 pub const KERNEL_START_OFFSET: u64 = 0x20_0000;
340 const CMDLINE_OFFSET: u64 = 0x2_0000;
341 const CMDLINE_MAX_SIZE: u64 = 0x800; // including terminating zero
342 const SETUP_DATA_START: u64 = CMDLINE_OFFSET + CMDLINE_MAX_SIZE;
343 const SETUP_DATA_END: u64 = ACPI_HI_RSDP_WINDOW_BASE;
344 const X86_64_SERIAL_1_3_IRQ: u32 = 4;
345 const X86_64_SERIAL_2_4_IRQ: u32 = 3;
346 // X86_64_SCI_IRQ is used to fill the ACPI FACP table.
347 // The sci_irq number is better to be a legacy
348 // IRQ number which is less than 16(actually most of the
349 // platforms have fixed IRQ number 9). So we can
350 // reserve the IRQ number 5 for SCI and let the
351 // the other devices starts from next.
352 pub const X86_64_SCI_IRQ: u32 = 5;
353 // The CMOS RTC uses IRQ 8; start allocating IRQs at 9.
354 pub const X86_64_IRQ_BASE: u32 = 9;
355 const ACPI_HI_RSDP_WINDOW_BASE: u64 = 0x000E_0000;
356
357 #[derive(Debug, PartialEq, Eq)]
358 pub enum CpuManufacturer {
359 Intel,
360 Amd,
361 Unknown,
362 }
363
get_cpu_manufacturer() -> CpuManufacturer364 pub fn get_cpu_manufacturer() -> CpuManufacturer {
365 cpuid::cpu_manufacturer()
366 }
367
368 // Memory layout below 4G
369 struct LowMemoryLayout {
370 // the pci mmio range below 4G
371 pci_mmio: AddressRange,
372 // the pcie cfg mmio range
373 pcie_cfg_mmio: AddressRange,
374 }
375
376 static LOW_MEMORY_LAYOUT: OnceCell<LowMemoryLayout> = OnceCell::new();
377
init_low_memory_layout(pcie_ecam: Option<AddressRange>, pci_low_start: Option<u64>)378 pub fn init_low_memory_layout(pcie_ecam: Option<AddressRange>, pci_low_start: Option<u64>) {
379 LOW_MEMORY_LAYOUT.get_or_init(|| {
380 // Make sure it align to 256MB for MTRR convenient
381 const MEM_32BIT_GAP_SIZE: u64 = if cfg!(feature = "direct") {
382 // Allow space for identity mapping coreboot memory regions on the host
383 // which is found at around 7a00_0000 (little bit before 2GB)
384 //
385 // TODO(b/188011323): stop hardcoding sizes and addresses here and instead
386 // determine the memory map from how the VM has been configured via the
387 // command line.
388 2560 * MB
389 } else {
390 768 * MB
391 };
392 // Reserved memory for nand_bios/LAPIC/IOAPIC/HPET/.....
393 const RESERVED_MEM_SIZE: u64 = 0x800_0000;
394 const PCI_MMIO_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
395 // Reserve 64MB for pcie enhanced configuration
396 const DEFAULT_PCIE_CFG_MMIO_SIZE: u64 = 0x400_0000;
397 const DEFAULT_PCIE_CFG_MMIO_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
398 const DEFAULT_PCIE_CFG_MMIO_START: u64 =
399 DEFAULT_PCIE_CFG_MMIO_END - DEFAULT_PCIE_CFG_MMIO_SIZE + 1;
400 const DEFAULT_PCIE_CFG_MMIO: AddressRange = AddressRange {
401 start: DEFAULT_PCIE_CFG_MMIO_START,
402 end: DEFAULT_PCIE_CFG_MMIO_END,
403 };
404
405 let pcie_cfg_mmio = pcie_ecam.unwrap_or(DEFAULT_PCIE_CFG_MMIO);
406
407 let pci_mmio = if let Some(pci_low) = pci_low_start {
408 AddressRange {
409 start: pci_low,
410 end: PCI_MMIO_END,
411 }
412 } else {
413 AddressRange {
414 start: pcie_cfg_mmio
415 .start
416 .min(FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE),
417 end: PCI_MMIO_END,
418 }
419 };
420
421 LowMemoryLayout {
422 pci_mmio,
423 pcie_cfg_mmio,
424 }
425 });
426 }
427
read_pci_mmio_before_32bit() -> AddressRange428 pub fn read_pci_mmio_before_32bit() -> AddressRange {
429 LOW_MEMORY_LAYOUT.get().unwrap().pci_mmio
430 }
read_pcie_cfg_mmio() -> AddressRange431 pub fn read_pcie_cfg_mmio() -> AddressRange {
432 LOW_MEMORY_LAYOUT.get().unwrap().pcie_cfg_mmio
433 }
434
435 /// The x86 reset vector for i386+ and x86_64 puts the processor into an "unreal mode" where it
436 /// can access the last 1 MB of the 32-bit address space in 16-bit mode, and starts the instruction
437 /// pointer at the effective physical address 0xFFFF_FFF0.
bios_start(bios_size: u64) -> GuestAddress438 fn bios_start(bios_size: u64) -> GuestAddress {
439 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_size)
440 }
441
configure_system( guest_mem: &GuestMemory, kernel_addr: GuestAddress, cmdline_addr: GuestAddress, cmdline_size: usize, setup_data: Option<GuestAddress>, initrd: Option<(GuestAddress, usize)>, mut params: boot_params, ) -> Result<()>442 fn configure_system(
443 guest_mem: &GuestMemory,
444 kernel_addr: GuestAddress,
445 cmdline_addr: GuestAddress,
446 cmdline_size: usize,
447 setup_data: Option<GuestAddress>,
448 initrd: Option<(GuestAddress, usize)>,
449 mut params: boot_params,
450 ) -> Result<()> {
451 const EBDA_START: u64 = 0x0009_fc00;
452 const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
453 const KERNEL_HDR_MAGIC: u32 = 0x5372_6448;
454 const KERNEL_LOADER_OTHER: u8 = 0xff;
455 const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x100_0000; // Must be non-zero.
456
457 params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
458 params.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
459 params.hdr.header = KERNEL_HDR_MAGIC;
460 params.hdr.cmd_line_ptr = cmdline_addr.offset() as u32;
461 params.ext_cmd_line_ptr = (cmdline_addr.offset() >> 32) as u32;
462 params.hdr.cmdline_size = cmdline_size as u32;
463 params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
464 if let Some(setup_data) = setup_data {
465 params.hdr.setup_data = setup_data.offset();
466 }
467 if let Some((initrd_addr, initrd_size)) = initrd {
468 params.hdr.ramdisk_image = initrd_addr.offset() as u32;
469 params.hdr.ramdisk_size = initrd_size as u32;
470 }
471
472 add_e820_entry(
473 &mut params,
474 AddressRange {
475 start: START_OF_RAM_32BITS,
476 end: EBDA_START - 1,
477 },
478 E820Type::Ram,
479 )?;
480
481 // GuestMemory::end_addr() returns the first address past the end, so subtract 1 to get the
482 // inclusive end.
483 let guest_mem_end = guest_mem.end_addr().offset() - 1;
484 let ram_below_4g = AddressRange {
485 start: kernel_addr.offset(),
486 end: guest_mem_end.min(read_pci_mmio_before_32bit().start - 1),
487 };
488 let ram_above_4g = AddressRange {
489 start: FIRST_ADDR_PAST_32BITS,
490 end: guest_mem_end,
491 };
492 add_e820_entry(&mut params, ram_below_4g, E820Type::Ram)?;
493 if !ram_above_4g.is_empty() {
494 add_e820_entry(&mut params, ram_above_4g, E820Type::Ram)?
495 }
496
497 let pcie_cfg_mmio_range = read_pcie_cfg_mmio();
498 add_e820_entry(&mut params, pcie_cfg_mmio_range, E820Type::Reserved)?;
499
500 add_e820_entry(
501 &mut params,
502 X8664arch::get_pcie_vcfg_mmio_range(guest_mem, &pcie_cfg_mmio_range),
503 E820Type::Reserved,
504 )?;
505
506 let zero_page_addr = GuestAddress(ZERO_PAGE_OFFSET);
507 if !guest_mem.is_valid_range(zero_page_addr, mem::size_of::<boot_params>() as u64) {
508 return Err(Error::ZeroPagePastRamEnd);
509 }
510
511 guest_mem
512 .write_obj_at_addr(params, zero_page_addr)
513 .map_err(|_| Error::ZeroPageSetup)?;
514
515 Ok(())
516 }
517
518 /// Write setup_data entries in guest memory and link them together with the `next` field.
519 ///
520 /// Returns the guest address of the first entry in the setup_data list, if any.
write_setup_data( guest_mem: &GuestMemory, setup_data_start: GuestAddress, setup_data_end: GuestAddress, setup_data: &[SetupData], ) -> Result<Option<GuestAddress>>521 fn write_setup_data(
522 guest_mem: &GuestMemory,
523 setup_data_start: GuestAddress,
524 setup_data_end: GuestAddress,
525 setup_data: &[SetupData],
526 ) -> Result<Option<GuestAddress>> {
527 let mut setup_data_list_head = None;
528
529 // Place the first setup_data at the first 64-bit aligned offset following setup_data_start.
530 let mut setup_data_addr = setup_data_start.align(8).ok_or(Error::SetupDataTooLarge)?;
531
532 let mut entry_iter = setup_data.iter().peekable();
533 while let Some(entry) = entry_iter.next() {
534 if setup_data_list_head.is_none() {
535 setup_data_list_head = Some(setup_data_addr);
536 }
537
538 // Ensure the entry (header plus data) fits into guest memory.
539 let entry_size = (mem::size_of::<setup_data_hdr>() + entry.data.len()) as u64;
540 let entry_end = setup_data_addr
541 .checked_add(entry_size)
542 .ok_or(Error::SetupDataTooLarge)?;
543
544 if entry_end >= setup_data_end {
545 return Err(Error::SetupDataTooLarge);
546 }
547
548 let next_setup_data_addr = if entry_iter.peek().is_some() {
549 // Place the next setup_data at a 64-bit aligned address.
550 setup_data_addr
551 .checked_add(entry_size)
552 .and_then(|addr| addr.align(8))
553 .ok_or(Error::SetupDataTooLarge)?
554 } else {
555 // This is the final entry. Terminate the list with next == 0.
556 GuestAddress(0)
557 };
558
559 let hdr = setup_data_hdr {
560 next: next_setup_data_addr.offset(),
561 type_: entry.type_ as u32,
562 len: entry
563 .data
564 .len()
565 .try_into()
566 .map_err(|_| Error::SetupDataTooLarge)?,
567 };
568
569 guest_mem
570 .write_obj_at_addr(hdr, setup_data_addr)
571 .map_err(Error::WritingSetupData)?;
572 guest_mem
573 .write_all_at_addr(
574 &entry.data,
575 setup_data_addr.unchecked_add(mem::size_of::<setup_data_hdr>() as u64),
576 )
577 .map_err(Error::WritingSetupData)?;
578
579 setup_data_addr = next_setup_data_addr;
580 }
581
582 Ok(setup_data_list_head)
583 }
584
585 /// Generate a SETUP_RNG_SEED SetupData with random seed data.
setup_data_rng_seed() -> SetupData586 fn setup_data_rng_seed() -> SetupData {
587 let mut data = vec![0u8; 256];
588 OsRng.fill_bytes(&mut data);
589 SetupData {
590 data,
591 type_: SetupDataType::RngSeed,
592 }
593 }
594
595 /// Add an e820 region to the e820 map.
596 /// Returns Ok(()) if successful, or an error if there is no space left in the map.
add_e820_entry(params: &mut boot_params, range: AddressRange, mem_type: E820Type) -> Result<()>597 fn add_e820_entry(params: &mut boot_params, range: AddressRange, mem_type: E820Type) -> Result<()> {
598 if params.e820_entries >= params.e820_table.len() as u8 {
599 return Err(Error::E820Configuration);
600 }
601
602 let size = range.len().ok_or(Error::E820Configuration)?;
603
604 params.e820_table[params.e820_entries as usize].addr = range.start;
605 params.e820_table[params.e820_entries as usize].size = size;
606 params.e820_table[params.e820_entries as usize].type_ = mem_type as u32;
607 params.e820_entries += 1;
608
609 Ok(())
610 }
611
612 /// Returns a Vec of the valid memory addresses.
613 /// These should be used to configure the GuestMemory structure for the platform.
614 /// For x86_64 all addresses are valid from the start of the kernel except a
615 /// carve out at the end of 32bit address space.
arch_memory_regions( size: u64, bios_size: Option<u64>, ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)>616 pub fn arch_memory_regions(
617 size: u64,
618 bios_size: Option<u64>,
619 ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)> {
620 let mem_start = START_OF_RAM_32BITS;
621 let mem_end = GuestAddress(size + mem_start);
622
623 let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS);
624 let end_32bit_gap_start = GuestAddress(read_pci_mmio_before_32bit().start);
625
626 let mut regions = Vec::new();
627 if mem_end <= end_32bit_gap_start {
628 regions.push((GuestAddress(mem_start), size, Default::default()));
629 if let Some(bios_size) = bios_size {
630 regions.push((bios_start(bios_size), bios_size, Default::default()));
631 }
632 } else {
633 regions.push((
634 GuestAddress(mem_start),
635 end_32bit_gap_start.offset() - mem_start,
636 Default::default(),
637 ));
638 if let Some(bios_size) = bios_size {
639 regions.push((bios_start(bios_size), bios_size, Default::default()));
640 }
641 regions.push((
642 first_addr_past_32bits,
643 mem_end.offset_from(end_32bit_gap_start),
644 Default::default(),
645 ));
646 }
647
648 regions
649 }
650
651 impl arch::LinuxArch for X8664arch {
652 type Error = Error;
653
guest_memory_layout( components: &VmComponents, _hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>654 fn guest_memory_layout(
655 components: &VmComponents,
656 _hypervisor: &impl Hypervisor,
657 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
658 init_low_memory_layout(components.pcie_ecam, components.pci_low_start);
659
660 let bios_size = match &components.vm_image {
661 VmImage::Bios(bios_file) => Some(bios_file.metadata().map_err(Error::LoadBios)?.len()),
662 VmImage::Kernel(_) => None,
663 };
664
665 Ok(arch_memory_regions(components.memory_size, bios_size))
666 }
667
get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig668 fn get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig {
669 SystemAllocatorConfig {
670 io: Some(AddressRange {
671 start: 0xc000,
672 end: 0xffff,
673 }),
674 low_mmio: read_pci_mmio_before_32bit(),
675 high_mmio: Self::get_high_mmio_range(vm),
676 platform_mmio: None,
677 first_irq: X86_64_IRQ_BASE,
678 }
679 }
680
build_vm<V, Vcpu>( mut components: VmComponents, vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipX86_64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, debugcon_jail: Option<Minijail>, pflash_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmX86_64, Vcpu: VcpuX86_64,681 fn build_vm<V, Vcpu>(
682 mut components: VmComponents,
683 vm_evt_wrtube: &SendTube,
684 system_allocator: &mut SystemAllocator,
685 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
686 serial_jail: Option<Minijail>,
687 battery: (Option<BatteryType>, Option<Minijail>),
688 mut vm: V,
689 ramoops_region: Option<arch::pstore::RamoopsRegion>,
690 devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
691 irq_chip: &mut dyn IrqChipX86_64,
692 vcpu_ids: &mut Vec<usize>,
693 dump_device_tree_blob: Option<PathBuf>,
694 debugcon_jail: Option<Minijail>,
695 pflash_jail: Option<Minijail>,
696 #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
697 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
698 where
699 V: VmX86_64,
700 Vcpu: VcpuX86_64,
701 {
702 if components.hv_cfg.protection_type != ProtectionType::Unprotected {
703 return Err(Error::UnsupportedProtectionType);
704 }
705
706 let mem = vm.get_memory().clone();
707
708 let vcpu_count = components.vcpu_count;
709
710 let tss_addr = GuestAddress(TSS_ADDR);
711 vm.set_tss_addr(tss_addr).map_err(Error::SetTssAddr)?;
712
713 // Use IRQ info in ACPI if provided by the user.
714 let mut noirq = true;
715 let mut mptable = true;
716 let mut sci_irq = X86_64_SCI_IRQ;
717
718 // punch pcie config mmio from pci low mmio, so that it couldn't be
719 // allocated to any device.
720 let pcie_cfg_mmio_range = read_pcie_cfg_mmio();
721 system_allocator
722 .reserve_mmio(pcie_cfg_mmio_range)
723 .map_err(Error::ReservePcieCfgMmio)?;
724
725 for sdt in components.acpi_sdts.iter() {
726 if sdt.is_signature(b"DSDT") || sdt.is_signature(b"APIC") {
727 noirq = false;
728 } else if sdt.is_signature(b"FACP") {
729 mptable = false;
730 let sci_irq_fadt: u16 = sdt.read(acpi::FADT_FIELD_SCI_INTERRUPT);
731 sci_irq = sci_irq_fadt.into();
732 if !system_allocator.reserve_irq(sci_irq) {
733 warn!("sci irq {} already reserved.", sci_irq);
734 }
735 }
736 }
737
738 let pcie_vcfg_range = Self::get_pcie_vcfg_mmio_range(&mem, &pcie_cfg_mmio_range);
739 let mmio_bus = Arc::new(devices::Bus::new());
740 let io_bus = Arc::new(devices::Bus::new());
741
742 let (pci_devices, devs): (Vec<_>, Vec<_>) = devs
743 .into_iter()
744 .partition(|(dev, _)| dev.as_pci_device().is_some());
745
746 let pci_devices = pci_devices
747 .into_iter()
748 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
749 .collect();
750
751 let (pci, pci_irqs, mut pid_debug_label_map, amls) = arch::generate_pci_root(
752 pci_devices,
753 irq_chip.as_irq_chip_mut(),
754 mmio_bus.clone(),
755 io_bus.clone(),
756 system_allocator,
757 &mut vm,
758 4, // Share the four pin interrupts (INTx#)
759 Some(pcie_vcfg_range.start),
760 #[cfg(feature = "swap")]
761 swap_controller,
762 )
763 .map_err(Error::CreatePciRoot)?;
764
765 let pci = Arc::new(Mutex::new(pci));
766 pci.lock().enable_pcie_cfg_mmio(pcie_cfg_mmio_range.start);
767 let pci_cfg = PciConfigIo::new(
768 pci.clone(),
769 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
770 );
771 let pci_bus = Arc::new(Mutex::new(pci_cfg));
772 io_bus.insert(pci_bus, 0xcf8, 0x8).unwrap();
773
774 let pcie_cfg_mmio = Arc::new(Mutex::new(PciConfigMmio::new(pci.clone(), 12)));
775 let pcie_cfg_mmio_len = pcie_cfg_mmio_range.len().unwrap();
776 mmio_bus
777 .insert(pcie_cfg_mmio, pcie_cfg_mmio_range.start, pcie_cfg_mmio_len)
778 .unwrap();
779
780 let pcie_vcfg_mmio = Arc::new(Mutex::new(PciVirtualConfigMmio::new(pci.clone(), 13)));
781 mmio_bus
782 .insert(
783 pcie_vcfg_mmio,
784 pcie_vcfg_range.start,
785 pcie_vcfg_range.len().unwrap(),
786 )
787 .unwrap();
788
789 let (virtio_mmio_devices, _others): (Vec<_>, Vec<_>) = devs
790 .into_iter()
791 .partition(|(dev, _)| dev.as_virtio_mmio_device().is_some());
792
793 let virtio_mmio_devices = virtio_mmio_devices
794 .into_iter()
795 .map(|(dev, jail_orig)| (*(dev.into_virtio_mmio_device().unwrap()), jail_orig))
796 .collect();
797 let (mut virtio_mmio_pid, sdts) = arch::generate_virtio_mmio_bus(
798 virtio_mmio_devices,
799 irq_chip.as_irq_chip_mut(),
800 &mmio_bus,
801 system_allocator,
802 &mut vm,
803 components.acpi_sdts,
804 #[cfg(feature = "swap")]
805 swap_controller,
806 )
807 .map_err(Error::CreateVirtioMmioBus)?;
808 components.acpi_sdts = sdts;
809 pid_debug_label_map.append(&mut virtio_mmio_pid);
810
811 // Event used to notify crosvm that guest OS is trying to suspend.
812 let suspend_evt = Event::new().map_err(Error::CreateEvent)?;
813
814 if !components.no_i8042 {
815 Self::setup_legacy_i8042_device(
816 &io_bus,
817 irq_chip.pit_uses_speaker_port(),
818 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
819 )?;
820 }
821 let vm_request_tube = if !components.no_rtc {
822 let (host_tube, device_tube) = Tube::pair()
823 .context("create tube")
824 .map_err(Error::SetupCmos)?;
825 Self::setup_legacy_cmos_device(&io_bus, irq_chip, device_tube, components.memory_size)
826 .map_err(Error::SetupCmos)?;
827 Some(host_tube)
828 } else {
829 None
830 };
831 Self::setup_serial_devices(
832 components.hv_cfg.protection_type,
833 irq_chip.as_irq_chip_mut(),
834 &io_bus,
835 serial_parameters,
836 serial_jail,
837 #[cfg(feature = "swap")]
838 swap_controller,
839 )?;
840 Self::setup_debugcon_devices(
841 components.hv_cfg.protection_type,
842 &io_bus,
843 serial_parameters,
844 debugcon_jail,
845 #[cfg(feature = "swap")]
846 swap_controller,
847 )?;
848
849 let bios_size = if let VmImage::Bios(ref bios) = components.vm_image {
850 bios.metadata().map_err(Error::LoadBios)?.len()
851 } else {
852 0
853 };
854 if let Some(pflash_image) = components.pflash_image {
855 Self::setup_pflash(
856 pflash_image,
857 components.pflash_block_size,
858 bios_size,
859 &mmio_bus,
860 pflash_jail,
861 #[cfg(feature = "swap")]
862 swap_controller,
863 )?;
864 }
865
866 // Functions that use/create jails MUST be used before the call to
867 // setup_acpi_devices below, as this move us into a multiprocessing state
868 // from which we can no longer fork.
869
870 let mut resume_notify_devices = Vec::new();
871
872 // each bus occupy 1MB mmio for pcie enhanced configuration
873 let max_bus = (pcie_cfg_mmio_len / 0x100000 - 1) as u8;
874 let (mut acpi_dev_resource, bat_control) = Self::setup_acpi_devices(
875 pci.clone(),
876 &mem,
877 &io_bus,
878 system_allocator,
879 suspend_evt.try_clone().map_err(Error::CloneEvent)?,
880 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
881 components.acpi_sdts,
882 #[cfg(feature = "direct")]
883 &components.direct_gpe,
884 #[cfg(feature = "direct")]
885 &components.direct_fixed_evts,
886 irq_chip.as_irq_chip_mut(),
887 sci_irq,
888 battery,
889 &mmio_bus,
890 max_bus,
891 &mut resume_notify_devices,
892 #[cfg(feature = "swap")]
893 swap_controller,
894 #[cfg(unix)]
895 components.ac_adapter,
896 )?;
897
898 // Create customized SSDT table
899 let sdt = acpi::create_customize_ssdt(pci.clone(), amls);
900 if let Some(sdt) = sdt {
901 acpi_dev_resource.sdts.push(sdt);
902 }
903
904 irq_chip
905 .finalize_devices(system_allocator, &io_bus, &mmio_bus)
906 .map_err(Error::RegisterIrqfd)?;
907
908 // All of these bios generated tables are set manually for the benefit of the kernel boot
909 // flow (since there's no BIOS to set it) and for the BIOS boot flow since crosvm doesn't
910 // have a way to pass the BIOS these configs.
911 // This works right now because the only guest BIOS used with crosvm (u-boot) ignores these
912 // tables and the guest OS picks them up.
913 // If another guest does need a way to pass these tables down to it's BIOS, this approach
914 // should be rethought.
915
916 if mptable {
917 // Note that this puts the mptable at 0x9FC00 in guest physical memory.
918 mptable::setup_mptable(&mem, vcpu_count as u8, &pci_irqs)
919 .map_err(Error::SetupMptable)?;
920 }
921 smbios::setup_smbios(&mem, components.dmi_path, &components.oem_strings)
922 .map_err(Error::SetupSmbios)?;
923
924 let host_cpus = if components.host_cpu_topology {
925 components.vcpu_affinity.clone()
926 } else {
927 None
928 };
929
930 // TODO (tjeznach) Write RSDP to bootconfig before writing to memory
931 acpi::create_acpi_tables(
932 &mem,
933 vcpu_count as u8,
934 sci_irq,
935 0xcf9,
936 6, // RST_CPU|SYS_RST
937 &acpi_dev_resource,
938 host_cpus,
939 vcpu_ids,
940 &pci_irqs,
941 pcie_cfg_mmio_range.start,
942 max_bus,
943 components.force_s2idle,
944 )
945 .ok_or(Error::CreateAcpi)?;
946
947 let mut cmdline = Self::get_base_linux_cmdline();
948
949 if noirq {
950 cmdline.insert_str("acpi=noirq").unwrap();
951 }
952
953 get_serial_cmdline(&mut cmdline, serial_parameters, "io")
954 .map_err(Error::GetSerialCmdline)?;
955
956 for param in components.extra_kernel_params {
957 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
958 }
959
960 if let Some(ramoops_region) = ramoops_region {
961 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
962 .map_err(Error::Cmdline)?;
963 }
964
965 let pci_start = read_pci_mmio_before_32bit().start;
966
967 let mut vcpu_init = vec![VcpuInitX86_64::default(); vcpu_count];
968
969 let mut msrs;
970 match components.vm_image {
971 VmImage::Bios(ref mut bios) => {
972 // Allow a bios to hardcode CMDLINE_OFFSET and read the kernel command line from it.
973 kernel_loader::load_cmdline(
974 &mem,
975 GuestAddress(CMDLINE_OFFSET),
976 &CString::new(cmdline).unwrap(),
977 )
978 .map_err(Error::LoadCmdline)?;
979 Self::load_bios(&mem, bios)?;
980 msrs = regs::default_msrs();
981 // The default values for `Regs` and `Sregs` already set up the reset vector.
982 }
983 VmImage::Kernel(ref mut kernel_image) => {
984 let (params, kernel_end, kernel_entry) = Self::load_kernel(&mem, kernel_image)?;
985
986 Self::setup_system_memory(
987 &mem,
988 &CString::new(cmdline).unwrap(),
989 components.initrd_image,
990 components.android_fstab,
991 kernel_end,
992 params,
993 dump_device_tree_blob,
994 )?;
995
996 // Configure the bootstrap VCPU for the Linux/x86 64-bit boot protocol.
997 // <https://www.kernel.org/doc/html/latest/x86/boot.html>
998 vcpu_init[0].regs.rip = kernel_entry.offset();
999 vcpu_init[0].regs.rsp = BOOT_STACK_POINTER;
1000 vcpu_init[0].regs.rsi = ZERO_PAGE_OFFSET;
1001
1002 msrs = regs::long_mode_msrs();
1003 msrs.append(&mut regs::mtrr_msrs(&vm, pci_start));
1004
1005 // Set up long mode and enable paging.
1006 regs::configure_segments_and_sregs(&mem, &mut vcpu_init[0].sregs)
1007 .map_err(Error::ConfigureSegments)?;
1008 regs::setup_page_tables(&mem, &mut vcpu_init[0].sregs)
1009 .map_err(Error::SetupPageTables)?;
1010 }
1011 }
1012
1013 // Initialize MSRs for all VCPUs.
1014 for vcpu in vcpu_init.iter_mut() {
1015 vcpu.msrs = msrs.clone();
1016 }
1017
1018 Ok(RunnableLinuxVm {
1019 vm,
1020 vcpu_count,
1021 vcpus: None,
1022 vcpu_affinity: components.vcpu_affinity,
1023 vcpu_init,
1024 no_smt: components.no_smt,
1025 irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1026 has_bios: matches!(components.vm_image, VmImage::Bios(_)),
1027 io_bus,
1028 mmio_bus,
1029 pid_debug_label_map,
1030 suspend_evt,
1031 resume_notify_devices,
1032 rt_cpus: components.rt_cpus,
1033 delay_rt: components.delay_rt,
1034 bat_control,
1035 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1036 gdb: components.gdb,
1037 pm: Some(acpi_dev_resource.pm),
1038 root_config: pci,
1039 #[cfg(unix)]
1040 platform_devices: Vec::new(),
1041 hotplug_bus: BTreeMap::new(),
1042 devices_thread: None,
1043 vm_request_tube,
1044 })
1045 }
1046
configure_vcpu<V: Vm>( vm: &V, hypervisor: &dyn HypervisorX86_64, irq_chip: &mut dyn IrqChipX86_64, vcpu: &mut dyn VcpuX86_64, vcpu_init: VcpuInitX86_64, vcpu_id: usize, num_cpus: usize, _has_bios: bool, cpu_config: Option<CpuConfigX86_64>, ) -> Result<()>1047 fn configure_vcpu<V: Vm>(
1048 vm: &V,
1049 hypervisor: &dyn HypervisorX86_64,
1050 irq_chip: &mut dyn IrqChipX86_64,
1051 vcpu: &mut dyn VcpuX86_64,
1052 vcpu_init: VcpuInitX86_64,
1053 vcpu_id: usize,
1054 num_cpus: usize,
1055 _has_bios: bool,
1056 cpu_config: Option<CpuConfigX86_64>,
1057 ) -> Result<()> {
1058 let cpu_config = match cpu_config {
1059 Some(config) => config,
1060 None => return Err(Error::InvalidCpuConfig),
1061 };
1062 if !vm.check_capability(VmCap::EarlyInitCpuid) {
1063 cpuid::setup_cpuid(hypervisor, irq_chip, vcpu, vcpu_id, num_cpus, cpu_config)
1064 .map_err(Error::SetupCpuid)?;
1065 }
1066
1067 vcpu.set_regs(&vcpu_init.regs).map_err(Error::WriteRegs)?;
1068
1069 vcpu.set_sregs(&vcpu_init.sregs)
1070 .map_err(Error::SetupSregs)?;
1071
1072 vcpu.set_fpu(&vcpu_init.fpu).map_err(Error::SetupFpu)?;
1073
1074 let vcpu_supported_var_mtrrs = regs::vcpu_supported_variable_mtrrs(vcpu);
1075 let num_var_mtrrs = regs::count_variable_mtrrs(&vcpu_init.msrs);
1076 let msrs = if num_var_mtrrs > vcpu_supported_var_mtrrs {
1077 warn!(
1078 "Too many variable MTRR entries ({} required, {} supported),
1079 please check pci_start addr, guest with pass through device may be very slow",
1080 num_var_mtrrs, vcpu_supported_var_mtrrs,
1081 );
1082 // Filter out the MTRR entries from the MSR list.
1083 vcpu_init
1084 .msrs
1085 .into_iter()
1086 .filter(|&msr| !regs::is_mtrr_msr(msr.id))
1087 .collect()
1088 } else {
1089 vcpu_init.msrs
1090 };
1091
1092 vcpu.set_msrs(&msrs).map_err(Error::SetupMsrs)?;
1093
1094 interrupts::set_lint(vcpu_id, irq_chip).map_err(Error::SetLint)?;
1095
1096 Ok(())
1097 }
1098
register_pci_device<V: VmX86_64, Vcpu: VcpuX86_64>( linux: &mut RunnableLinuxVm<V, Vcpu>, device: Box<dyn PciDevice>, #[cfg(unix)] minijail: Option<Minijail>, resources: &mut SystemAllocator, hp_control_tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result<PciAddress>1099 fn register_pci_device<V: VmX86_64, Vcpu: VcpuX86_64>(
1100 linux: &mut RunnableLinuxVm<V, Vcpu>,
1101 device: Box<dyn PciDevice>,
1102 #[cfg(unix)] minijail: Option<Minijail>,
1103 resources: &mut SystemAllocator,
1104 hp_control_tube: &mpsc::Sender<PciRootCommand>,
1105 #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1106 ) -> Result<PciAddress> {
1107 arch::configure_pci_device(
1108 linux,
1109 device,
1110 #[cfg(unix)]
1111 minijail,
1112 resources,
1113 hp_control_tube,
1114 #[cfg(feature = "swap")]
1115 swap_controller,
1116 )
1117 .map_err(Error::ConfigurePciDevice)
1118 }
1119 }
1120
1121 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1122 impl<T: VcpuX86_64> arch::GdbOps<T> for X8664arch {
1123 type Error = Error;
1124
read_registers(vcpu: &T) -> Result<X86_64CoreRegs>1125 fn read_registers(vcpu: &T) -> Result<X86_64CoreRegs> {
1126 // General registers: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15
1127 let gregs = vcpu.get_regs().map_err(Error::ReadRegs)?;
1128 let regs = [
1129 gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rbp, gregs.rsp,
1130 gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, gregs.r15,
1131 ];
1132
1133 // GDB exposes 32-bit eflags instead of 64-bit rflags.
1134 // https://github.com/bminor/binutils-gdb/blob/master/gdb/features/i386/64bit-core.xml
1135 let eflags = gregs.rflags as u32;
1136 let rip = gregs.rip;
1137
1138 // Segment registers: CS, SS, DS, ES, FS, GS
1139 let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
1140 let segments = X86SegmentRegs {
1141 cs: sregs.cs.selector as u32,
1142 ss: sregs.ss.selector as u32,
1143 ds: sregs.ds.selector as u32,
1144 es: sregs.es.selector as u32,
1145 fs: sregs.fs.selector as u32,
1146 gs: sregs.gs.selector as u32,
1147 };
1148
1149 // x87 FPU internal state
1150 // TODO(dverkamp): floating point tag word, instruction pointer, and data pointer
1151 let fpu = vcpu.get_fpu().map_err(Error::ReadRegs)?;
1152 let fpu_internal = X87FpuInternalRegs {
1153 fctrl: u32::from(fpu.fcw),
1154 fstat: u32::from(fpu.fsw),
1155 fop: u32::from(fpu.last_opcode),
1156 ..Default::default()
1157 };
1158
1159 let mut regs = X86_64CoreRegs {
1160 regs,
1161 eflags,
1162 rip,
1163 segments,
1164 st: Default::default(),
1165 fpu: fpu_internal,
1166 xmm: Default::default(),
1167 mxcsr: fpu.mxcsr,
1168 };
1169
1170 // x87 FPU registers: ST0-ST7
1171 for (dst, src) in regs.st.iter_mut().zip(fpu.fpr.iter()) {
1172 // `fpr` contains the x87 floating point registers in FXSAVE format.
1173 // Each element contains an 80-bit floating point value in the low 10 bytes.
1174 // The upper 6 bytes are reserved and can be ignored.
1175 dst.copy_from_slice(&src[0..10])
1176 }
1177
1178 // SSE registers: XMM0-XMM15
1179 for (dst, src) in regs.xmm.iter_mut().zip(fpu.xmm.iter()) {
1180 *dst = u128::from_le_bytes(*src);
1181 }
1182
1183 Ok(regs)
1184 }
1185
write_registers(vcpu: &T, regs: &X86_64CoreRegs) -> Result<()>1186 fn write_registers(vcpu: &T, regs: &X86_64CoreRegs) -> Result<()> {
1187 // General purpose registers (RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15) + RIP + rflags
1188 let orig_gregs = vcpu.get_regs().map_err(Error::ReadRegs)?;
1189 let gregs = Regs {
1190 rax: regs.regs[0],
1191 rbx: regs.regs[1],
1192 rcx: regs.regs[2],
1193 rdx: regs.regs[3],
1194 rsi: regs.regs[4],
1195 rdi: regs.regs[5],
1196 rbp: regs.regs[6],
1197 rsp: regs.regs[7],
1198 r8: regs.regs[8],
1199 r9: regs.regs[9],
1200 r10: regs.regs[10],
1201 r11: regs.regs[11],
1202 r12: regs.regs[12],
1203 r13: regs.regs[13],
1204 r14: regs.regs[14],
1205 r15: regs.regs[15],
1206 rip: regs.rip,
1207 // Update the lower 32 bits of rflags.
1208 rflags: (orig_gregs.rflags & !(u32::MAX as u64)) | (regs.eflags as u64),
1209 };
1210 vcpu.set_regs(&gregs).map_err(Error::WriteRegs)?;
1211
1212 // Segment registers: CS, SS, DS, ES, FS, GS
1213 // Since GDB care only selectors, we call get_sregs() first.
1214 let mut sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
1215 sregs.cs.selector = regs.segments.cs as u16;
1216 sregs.ss.selector = regs.segments.ss as u16;
1217 sregs.ds.selector = regs.segments.ds as u16;
1218 sregs.es.selector = regs.segments.es as u16;
1219 sregs.fs.selector = regs.segments.fs as u16;
1220 sregs.gs.selector = regs.segments.gs as u16;
1221
1222 vcpu.set_sregs(&sregs).map_err(Error::WriteRegs)?;
1223
1224 // FPU and SSE registers
1225 let mut fpu = vcpu.get_fpu().map_err(Error::ReadRegs)?;
1226 fpu.fcw = regs.fpu.fctrl as u16;
1227 fpu.fsw = regs.fpu.fstat as u16;
1228 fpu.last_opcode = regs.fpu.fop as u16;
1229 // TODO(dverkamp): floating point tag word, instruction pointer, and data pointer
1230
1231 // x87 FPU registers: ST0-ST7
1232 for (dst, src) in fpu.fpr.iter_mut().zip(regs.st.iter()) {
1233 dst[0..10].copy_from_slice(src);
1234 }
1235
1236 // SSE registers: XMM0-XMM15
1237 for (dst, src) in fpu.xmm.iter_mut().zip(regs.xmm.iter()) {
1238 dst.copy_from_slice(&src.to_le_bytes());
1239 }
1240
1241 vcpu.set_fpu(&fpu).map_err(Error::WriteRegs)?;
1242
1243 Ok(())
1244 }
1245
1246 #[inline]
read_register(_vcpu: &T, _reg: X86_64CoreRegId) -> Result<Vec<u8>>1247 fn read_register(_vcpu: &T, _reg: X86_64CoreRegId) -> Result<Vec<u8>> {
1248 Err(Error::ReadRegIsUnsupported)
1249 }
1250
1251 #[inline]
write_register(_vcpu: &T, _reg: X86_64CoreRegId, _buf: &[u8]) -> Result<()>1252 fn write_register(_vcpu: &T, _reg: X86_64CoreRegId, _buf: &[u8]) -> Result<()> {
1253 Err(Error::WriteRegIsUnsupported)
1254 }
1255
read_memory( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>1256 fn read_memory(
1257 vcpu: &T,
1258 guest_mem: &GuestMemory,
1259 vaddr: GuestAddress,
1260 len: usize,
1261 ) -> Result<Vec<u8>> {
1262 let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
1263 let mut buf = vec![0; len];
1264 let mut total_read = 0u64;
1265 // Handle reads across page boundaries.
1266
1267 while total_read < len as u64 {
1268 let (paddr, psize) = phys_addr(guest_mem, vaddr.0 + total_read, &sregs)?;
1269 let read_len = std::cmp::min(len as u64 - total_read, psize - (paddr & (psize - 1)));
1270 guest_mem
1271 .get_slice_at_addr(GuestAddress(paddr), read_len as usize)
1272 .map_err(Error::ReadingGuestMemory)?
1273 .copy_to(&mut buf[total_read as usize..]);
1274 total_read += read_len;
1275 }
1276 Ok(buf)
1277 }
1278
write_memory( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>1279 fn write_memory(
1280 vcpu: &T,
1281 guest_mem: &GuestMemory,
1282 vaddr: GuestAddress,
1283 buf: &[u8],
1284 ) -> Result<()> {
1285 let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
1286 let mut total_written = 0u64;
1287 // Handle writes across page boundaries.
1288 while total_written < buf.len() as u64 {
1289 let (paddr, psize) = phys_addr(guest_mem, vaddr.0 + total_written, &sregs)?;
1290 let write_len = std::cmp::min(
1291 buf.len() as u64 - total_written,
1292 psize - (paddr & (psize - 1)),
1293 );
1294
1295 guest_mem
1296 .write_all_at_addr(
1297 &buf[total_written as usize..(total_written as usize + write_len as usize)],
1298 GuestAddress(paddr),
1299 )
1300 .map_err(Error::WritingGuestMemory)?;
1301 total_written += write_len;
1302 }
1303 Ok(())
1304 }
1305
enable_singlestep(vcpu: &T) -> Result<()>1306 fn enable_singlestep(vcpu: &T) -> Result<()> {
1307 vcpu.set_guest_debug(&[], true /* enable_singlestep */)
1308 .map_err(Error::EnableSinglestep)
1309 }
1310
get_max_hw_breakpoints(_vcpu: &T) -> Result<usize>1311 fn get_max_hw_breakpoints(_vcpu: &T) -> Result<usize> {
1312 Ok(4usize)
1313 }
1314
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()>1315 fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
1316 vcpu.set_guest_debug(breakpoints, false /* enable_singlestep */)
1317 .map_err(Error::SetHwBreakpoint)
1318 }
1319 }
1320
1321 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1322 // return the translated address and the size of the page it resides in.
phys_addr(mem: &GuestMemory, vaddr: u64, sregs: &Sregs) -> Result<(u64, u64)>1323 fn phys_addr(mem: &GuestMemory, vaddr: u64, sregs: &Sregs) -> Result<(u64, u64)> {
1324 const CR0_PG_MASK: u64 = 1 << 31;
1325 const CR4_PAE_MASK: u64 = 1 << 5;
1326 const CR4_LA57_MASK: u64 = 1 << 12;
1327 const MSR_EFER_LMA: u64 = 1 << 10;
1328 // bits 12 through 51 are the address in a PTE.
1329 const PTE_ADDR_MASK: u64 = ((1 << 52) - 1) & !0x0fff;
1330 const PAGE_PRESENT: u64 = 0x1;
1331 const PAGE_PSE_MASK: u64 = 0x1 << 7;
1332
1333 const PAGE_SIZE_4K: u64 = 4 * 1024;
1334 const PAGE_SIZE_2M: u64 = 2 * 1024 * 1024;
1335 const PAGE_SIZE_1G: u64 = 1024 * 1024 * 1024;
1336
1337 fn next_pte(mem: &GuestMemory, curr_table_addr: u64, vaddr: u64, level: usize) -> Result<u64> {
1338 let ent: u64 = mem
1339 .read_obj_from_addr(GuestAddress(
1340 (curr_table_addr & PTE_ADDR_MASK) + page_table_offset(vaddr, level),
1341 ))
1342 .map_err(|_| Error::TranslatingVirtAddr)?;
1343 /* TODO - convert to a trace
1344 println!(
1345 "level {} vaddr {:x} table-addr {:x} mask {:x} ent {:x} offset {:x}",
1346 level,
1347 vaddr,
1348 curr_table_addr,
1349 PTE_ADDR_MASK,
1350 ent,
1351 page_table_offset(vaddr, level)
1352 );
1353 */
1354 if ent & PAGE_PRESENT == 0 {
1355 return Err(Error::PageNotPresent);
1356 }
1357 Ok(ent)
1358 }
1359
1360 // Get the offset in to the page of `vaddr`.
1361 fn page_offset(vaddr: u64, page_size: u64) -> u64 {
1362 vaddr & (page_size - 1)
1363 }
1364
1365 // Get the offset in to the page table of the given `level` specified by the virtual `address`.
1366 // `level` is 1 through 5 in x86_64 to handle the five levels of paging.
1367 fn page_table_offset(addr: u64, level: usize) -> u64 {
1368 let offset = (level - 1) * 9 + 12;
1369 ((addr >> offset) & 0x1ff) << 3
1370 }
1371
1372 if sregs.cr0 & CR0_PG_MASK == 0 {
1373 return Ok((vaddr, PAGE_SIZE_4K));
1374 }
1375
1376 if sregs.cr4 & CR4_PAE_MASK == 0 {
1377 return Err(Error::TranslatingVirtAddr);
1378 }
1379
1380 if sregs.efer & MSR_EFER_LMA != 0 {
1381 // TODO - check LA57
1382 if sregs.cr4 & CR4_LA57_MASK != 0 {}
1383 let p4_ent = next_pte(mem, sregs.cr3, vaddr, 4)?;
1384 let p3_ent = next_pte(mem, p4_ent, vaddr, 3)?;
1385 // TODO check if it's a 1G page with the PSE bit in p2_ent
1386 if p3_ent & PAGE_PSE_MASK != 0 {
1387 // It's a 1G page with the PSE bit in p3_ent
1388 let paddr = p3_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_1G);
1389 return Ok((paddr, PAGE_SIZE_1G));
1390 }
1391 let p2_ent = next_pte(mem, p3_ent, vaddr, 2)?;
1392 if p2_ent & PAGE_PSE_MASK != 0 {
1393 // It's a 2M page with the PSE bit in p2_ent
1394 let paddr = p2_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_2M);
1395 return Ok((paddr, PAGE_SIZE_2M));
1396 }
1397 let p1_ent = next_pte(mem, p2_ent, vaddr, 1)?;
1398 let paddr = p1_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_4K);
1399 return Ok((paddr, PAGE_SIZE_4K));
1400 }
1401 Err(Error::TranslatingVirtAddr)
1402 }
1403
1404 // OSC returned status register in CDW1
1405 const OSC_STATUS_UNSUPPORT_UUID: u32 = 0x4;
1406 // pci host bridge OSC returned control register in CDW3
1407 #[allow(dead_code)]
1408 const PCI_HB_OSC_CONTROL_PCIE_HP: u32 = 0x1;
1409 const PCI_HB_OSC_CONTROL_SHPC_HP: u32 = 0x2;
1410 #[allow(dead_code)]
1411 const PCI_HB_OSC_CONTROL_PCIE_PME: u32 = 0x4;
1412 const PCI_HB_OSC_CONTROL_PCIE_AER: u32 = 0x8;
1413 #[allow(dead_code)]
1414 const PCI_HB_OSC_CONTROL_PCIE_CAP: u32 = 0x10;
1415
1416 struct PciRootOSC {}
1417
1418 // Method (_OSC, 4, NotSerialized) // _OSC: Operating System Capabilities
1419 // {
1420 // CreateDWordField (Arg3, Zero, CDW1) // flag and return value
1421 // If (Arg0 == ToUUID ("33db4d5b-1ff7-401c-9657-7441c03dd766"))
1422 // {
1423 // CreateDWordField (Arg3, 8, CDW3) // control field
1424 // if ( 0 == (CDW1 & 0x01)) // Query flag ?
1425 // {
1426 // CDW3 &= !(SHPC_HP | AER)
1427 // }
1428 // } Else {
1429 // CDW1 |= UNSUPPORT_UUID
1430 // }
1431 // Return (Arg3)
1432 // }
1433 impl Aml for PciRootOSC {
to_aml_bytes(&self, aml: &mut Vec<u8>)1434 fn to_aml_bytes(&self, aml: &mut Vec<u8>) {
1435 let osc_uuid = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
1436 // virtual pcie root port supports hotplug, pme, and pcie cap register, clear all
1437 // the other bits.
1438 let mask = !(PCI_HB_OSC_CONTROL_SHPC_HP | PCI_HB_OSC_CONTROL_PCIE_AER);
1439 aml::Method::new(
1440 "_OSC".into(),
1441 4,
1442 false,
1443 vec![
1444 &aml::CreateDWordField::new(
1445 &aml::Name::new_field_name("CDW1"),
1446 &aml::Arg(3),
1447 &aml::ZERO,
1448 ),
1449 &aml::If::new(
1450 &aml::Equal::new(&aml::Arg(0), &aml::Uuid::new(osc_uuid)),
1451 vec![
1452 &aml::CreateDWordField::new(
1453 &aml::Name::new_field_name("CDW3"),
1454 &aml::Arg(3),
1455 &(8_u8),
1456 ),
1457 &aml::If::new(
1458 &aml::Equal::new(
1459 &aml::ZERO,
1460 &aml::And::new(
1461 &aml::ZERO,
1462 &aml::Name::new_field_name("CDW1"),
1463 &aml::ONE,
1464 ),
1465 ),
1466 vec![&aml::And::new(
1467 &aml::Name::new_field_name("CDW3"),
1468 &mask,
1469 &aml::Name::new_field_name("CDW3"),
1470 )],
1471 ),
1472 ],
1473 ),
1474 &aml::Else::new(vec![&aml::Or::new(
1475 &aml::Name::new_field_name("CDW1"),
1476 &OSC_STATUS_UNSUPPORT_UUID,
1477 &aml::Name::new_field_name("CDW1"),
1478 )]),
1479 &aml::Return::new(&aml::Arg(3)),
1480 ],
1481 )
1482 .to_aml_bytes(aml)
1483 }
1484 }
1485
1486 impl X8664arch {
1487 /// Loads the bios from an open file.
1488 ///
1489 /// # Arguments
1490 ///
1491 /// * `mem` - The memory to be used by the guest.
1492 /// * `bios_image` - the File object for the specified bios
load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()>1493 fn load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()> {
1494 let bios_image_length = bios_image
1495 .seek(io::SeekFrom::End(0))
1496 .map_err(Error::LoadBios)?;
1497 if bios_image_length >= FIRST_ADDR_PAST_32BITS {
1498 return Err(Error::LoadBios(io::Error::new(
1499 io::ErrorKind::InvalidData,
1500 format!(
1501 "bios was {} bytes, expected less than {}",
1502 bios_image_length, FIRST_ADDR_PAST_32BITS,
1503 ),
1504 )));
1505 }
1506 bios_image
1507 .seek(io::SeekFrom::Start(0))
1508 .map_err(Error::LoadBios)?;
1509 mem.read_to_memory(
1510 bios_start(bios_image_length),
1511 bios_image,
1512 bios_image_length as usize,
1513 )
1514 .map_err(Error::SetupGuestMemory)?;
1515 Ok(())
1516 }
1517
setup_pflash( pflash_image: File, block_size: u32, bios_size: u64, mmio_bus: &devices::Bus, jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result<()>1518 fn setup_pflash(
1519 pflash_image: File,
1520 block_size: u32,
1521 bios_size: u64,
1522 mmio_bus: &devices::Bus,
1523 jail: Option<Minijail>,
1524 #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1525 ) -> Result<()> {
1526 let size = pflash_image.metadata().map_err(Error::LoadPflash)?.len();
1527 let start = FIRST_ADDR_PAST_32BITS - bios_size - size;
1528 let pflash_image = Box::new(pflash_image);
1529
1530 #[cfg(unix)]
1531 let fds = pflash_image.as_raw_descriptors();
1532
1533 let pflash = Pflash::new(pflash_image, block_size).map_err(Error::SetupPflash)?;
1534 let pflash: Arc<Mutex<dyn BusDevice>> = match jail {
1535 #[cfg(unix)]
1536 Some(jail) => Arc::new(Mutex::new(
1537 ProxyDevice::new(
1538 pflash,
1539 jail,
1540 fds,
1541 #[cfg(feature = "swap")]
1542 swap_controller,
1543 )
1544 .map_err(Error::CreateProxyDevice)?,
1545 )),
1546 #[cfg(windows)]
1547 Some(_) => unreachable!(),
1548 None => Arc::new(Mutex::new(pflash)),
1549 };
1550 mmio_bus
1551 .insert(pflash, start, size)
1552 .map_err(Error::InsertBus)?;
1553
1554 Ok(())
1555 }
1556
1557 /// Loads the kernel from an open file.
1558 ///
1559 /// # Arguments
1560 ///
1561 /// * `mem` - The memory to be used by the guest.
1562 /// * `kernel_image` - the File object for the specified kernel.
1563 ///
1564 /// # Returns
1565 ///
1566 /// On success, returns the Linux x86_64 boot protocol parameters, the first address past the
1567 /// end of the kernel, and the entry point (initial `RIP` value).
load_kernel( mem: &GuestMemory, kernel_image: &mut File, ) -> Result<(boot_params, u64, GuestAddress)>1568 fn load_kernel(
1569 mem: &GuestMemory,
1570 kernel_image: &mut File,
1571 ) -> Result<(boot_params, u64, GuestAddress)> {
1572 let kernel_start = GuestAddress(KERNEL_START_OFFSET);
1573 match kernel_loader::load_elf64(mem, kernel_start, kernel_image, 0) {
1574 Ok(loaded_kernel) => {
1575 // ELF kernels don't contain a `boot_params` structure, so synthesize a default one.
1576 let boot_params = Default::default();
1577 Ok((
1578 boot_params,
1579 loaded_kernel.address_range.end,
1580 loaded_kernel.entry,
1581 ))
1582 }
1583 Err(kernel_loader::Error::InvalidMagicNumber) => {
1584 // The image failed to parse as ELF, so try to load it as a bzImage.
1585 let (boot_params, bzimage_end) =
1586 bzimage::load_bzimage(mem, kernel_start, kernel_image)
1587 .map_err(Error::LoadBzImage)?;
1588 let bzimage_entry = mem
1589 .checked_offset(kernel_start, KERNEL_64BIT_ENTRY_OFFSET)
1590 .ok_or(Error::KernelOffsetPastEnd)?;
1591 Ok((boot_params, bzimage_end, bzimage_entry))
1592 }
1593 Err(e) => Err(Error::LoadKernel(e)),
1594 }
1595 }
1596
1597 /// Configures the system memory space should be called once per vm before
1598 /// starting vcpu threads.
1599 ///
1600 /// # Arguments
1601 ///
1602 /// * `mem` - The memory to be used by the guest.
1603 /// * `cmdline` - the kernel commandline
1604 /// * `initrd_file` - an initial ramdisk image
setup_system_memory( mem: &GuestMemory, cmdline: &CStr, initrd_file: Option<File>, android_fstab: Option<File>, kernel_end: u64, params: boot_params, dump_device_tree_blob: Option<PathBuf>, ) -> Result<()>1605 pub fn setup_system_memory(
1606 mem: &GuestMemory,
1607 cmdline: &CStr,
1608 initrd_file: Option<File>,
1609 android_fstab: Option<File>,
1610 kernel_end: u64,
1611 params: boot_params,
1612 dump_device_tree_blob: Option<PathBuf>,
1613 ) -> Result<()> {
1614 kernel_loader::load_cmdline(mem, GuestAddress(CMDLINE_OFFSET), cmdline)
1615 .map_err(Error::LoadCmdline)?;
1616
1617 let mut setup_data = Vec::<SetupData>::new();
1618 if let Some(android_fstab) = android_fstab {
1619 setup_data.push(
1620 fdt::create_fdt(android_fstab, dump_device_tree_blob).map_err(Error::CreateFdt)?,
1621 );
1622 }
1623 setup_data.push(setup_data_rng_seed());
1624
1625 let setup_data = write_setup_data(
1626 mem,
1627 GuestAddress(SETUP_DATA_START),
1628 GuestAddress(SETUP_DATA_END),
1629 &setup_data,
1630 )?;
1631
1632 let initrd = match initrd_file {
1633 Some(mut initrd_file) => {
1634 let mut initrd_addr_max = u64::from(params.hdr.initrd_addr_max);
1635 // Default initrd_addr_max for old kernels (see Documentation/x86/boot.txt).
1636 if initrd_addr_max == 0 {
1637 initrd_addr_max = 0x37FFFFFF;
1638 }
1639
1640 let mem_max = mem.end_addr().offset() - 1;
1641 if initrd_addr_max > mem_max {
1642 initrd_addr_max = mem_max;
1643 }
1644
1645 let (initrd_start, initrd_size) = arch::load_image_high(
1646 mem,
1647 &mut initrd_file,
1648 GuestAddress(kernel_end),
1649 GuestAddress(initrd_addr_max),
1650 base::pagesize() as u64,
1651 )
1652 .map_err(Error::LoadInitrd)?;
1653 Some((initrd_start, initrd_size))
1654 }
1655 None => None,
1656 };
1657
1658 configure_system(
1659 mem,
1660 GuestAddress(KERNEL_START_OFFSET),
1661 GuestAddress(CMDLINE_OFFSET),
1662 cmdline.to_bytes().len() + 1,
1663 setup_data,
1664 initrd,
1665 params,
1666 )?;
1667 Ok(())
1668 }
1669
get_pcie_vcfg_mmio_range(mem: &GuestMemory, pcie_cfg_mmio: &AddressRange) -> AddressRange1670 fn get_pcie_vcfg_mmio_range(mem: &GuestMemory, pcie_cfg_mmio: &AddressRange) -> AddressRange {
1671 // Put PCIe VCFG region at a 2MB boundary after physical memory or 4gb, whichever is greater.
1672 let ram_end_round_2mb = (mem.end_addr().offset() + 2 * MB - 1) / (2 * MB) * (2 * MB);
1673 let start = std::cmp::max(ram_end_round_2mb, 4 * GB);
1674 // Each pci device's ECAM size is 4kb and its vcfg size is 8kb
1675 let end = start + pcie_cfg_mmio.len().unwrap() * 2 - 1;
1676 AddressRange { start, end }
1677 }
1678
1679 /// Returns the high mmio range
get_high_mmio_range<V: Vm>(vm: &V) -> AddressRange1680 fn get_high_mmio_range<V: Vm>(vm: &V) -> AddressRange {
1681 let mem = vm.get_memory();
1682 let start = Self::get_pcie_vcfg_mmio_range(mem, &read_pcie_cfg_mmio()).end + 1;
1683
1684 let phys_mem_end = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
1685 let high_mmio_end = std::cmp::min(phys_mem_end, HIGH_MMIO_MAX_END);
1686
1687 AddressRange {
1688 start,
1689 end: high_mmio_end,
1690 }
1691 }
1692
1693 /// This returns a minimal kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline1694 pub fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1695 let mut cmdline = kernel_cmdline::Cmdline::new(CMDLINE_MAX_SIZE as usize);
1696 cmdline.insert_str("panic=-1").unwrap();
1697
1698 cmdline
1699 }
1700
1701 /// Sets up the legacy x86 i8042/KBD platform device
1702 ///
1703 /// # Arguments
1704 ///
1705 /// * - `io_bus` - the IO bus object
1706 /// * - `pit_uses_speaker_port` - does the PIT use port 0x61 for the PC speaker
1707 /// * - `vm_evt_wrtube` - the event object which should receive exit events
setup_legacy_i8042_device( io_bus: &devices::Bus, pit_uses_speaker_port: bool, vm_evt_wrtube: SendTube, ) -> Result<()>1708 pub fn setup_legacy_i8042_device(
1709 io_bus: &devices::Bus,
1710 pit_uses_speaker_port: bool,
1711 vm_evt_wrtube: SendTube,
1712 ) -> Result<()> {
1713 let i8042 = Arc::new(Mutex::new(devices::I8042Device::new(
1714 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1715 )));
1716
1717 if pit_uses_speaker_port {
1718 io_bus.insert(i8042, 0x062, 0x3).unwrap();
1719 } else {
1720 io_bus.insert(i8042, 0x061, 0x4).unwrap();
1721 }
1722
1723 Ok(())
1724 }
1725
1726 /// Sets up the legacy x86 CMOS/RTC platform device
1727 /// # Arguments
1728 ///
1729 /// * - `io_bus` - the IO bus object
1730 /// * - `mem_size` - the size in bytes of physical ram for the guest
setup_legacy_cmos_device( io_bus: &devices::Bus, irq_chip: &mut dyn IrqChipX86_64, vm_control: Tube, mem_size: u64, ) -> anyhow::Result<()>1731 pub fn setup_legacy_cmos_device(
1732 io_bus: &devices::Bus,
1733 irq_chip: &mut dyn IrqChipX86_64,
1734 vm_control: Tube,
1735 mem_size: u64,
1736 ) -> anyhow::Result<()> {
1737 let mem_regions = arch_memory_regions(mem_size, None);
1738
1739 let mem_below_4g = mem_regions
1740 .iter()
1741 .filter(|r| r.0.offset() < FIRST_ADDR_PAST_32BITS)
1742 .map(|r| r.1)
1743 .sum();
1744
1745 let mem_above_4g = mem_regions
1746 .iter()
1747 .filter(|r| r.0.offset() >= FIRST_ADDR_PAST_32BITS)
1748 .map(|r| r.1)
1749 .sum();
1750
1751 let irq_evt = devices::IrqEdgeEvent::new().context("cmos irq")?;
1752 let cmos = devices::cmos::Cmos::new(
1753 mem_below_4g,
1754 mem_above_4g,
1755 Utc::now,
1756 vm_control,
1757 irq_evt.try_clone().context("cmos irq clone")?,
1758 )
1759 .context("create cmos")?;
1760
1761 irq_chip
1762 .register_edge_irq_event(
1763 devices::cmos::RTC_IRQ as u32,
1764 &irq_evt,
1765 IrqEventSource::from_device(&cmos),
1766 )
1767 .context("cmos register irq")?;
1768 io_bus
1769 .insert(Arc::new(Mutex::new(cmos)), 0x70, 0x2)
1770 .context("cmos insert irq")?;
1771
1772 Ok(())
1773 }
1774
1775 /// Sets up the acpi devices for this platform and
1776 /// return the resources which is used to set the ACPI tables.
1777 ///
1778 /// # Arguments
1779 ///
1780 /// * - `io_bus` the I/O bus to add the devices to
1781 /// * - `resources` the SystemAllocator to allocate IO and MMIO for acpi
1782 /// devices.
1783 /// * - `suspend_evt` the event object which used to suspend the vm
1784 /// * - `sdts` ACPI system description tables
1785 /// * - `irq_chip` the IrqChip object for registering irq events
1786 /// * - `battery` indicate whether to create the battery
1787 /// * - `mmio_bus` the MMIO bus to add the devices to
setup_acpi_devices( pci_root: Arc<Mutex<PciRoot>>, mem: &GuestMemory, io_bus: &devices::Bus, resources: &mut SystemAllocator, suspend_evt: Event, vm_evt_wrtube: SendTube, sdts: Vec<SDT>, #[cfg(feature = "direct")] direct_gpe: &[u32], #[cfg(feature = "direct")] direct_fixed_evts: &[devices::ACPIPMFixedEvent], irq_chip: &mut dyn IrqChip, sci_irq: u32, battery: (Option<BatteryType>, Option<Minijail>), #[cfg_attr(windows, allow(unused_variables))] mmio_bus: &devices::Bus, max_bus: u8, resume_notify_devices: &mut Vec<Arc<Mutex<dyn BusResumeDevice>>>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, #[cfg(unix)] ac_adapter: bool, ) -> Result<(acpi::AcpiDevResource, Option<BatControl>)>1788 pub fn setup_acpi_devices(
1789 pci_root: Arc<Mutex<PciRoot>>,
1790 mem: &GuestMemory,
1791 io_bus: &devices::Bus,
1792 resources: &mut SystemAllocator,
1793 suspend_evt: Event,
1794 vm_evt_wrtube: SendTube,
1795 sdts: Vec<SDT>,
1796 #[cfg(feature = "direct")] direct_gpe: &[u32],
1797 #[cfg(feature = "direct")] direct_fixed_evts: &[devices::ACPIPMFixedEvent],
1798 irq_chip: &mut dyn IrqChip,
1799 sci_irq: u32,
1800 battery: (Option<BatteryType>, Option<Minijail>),
1801 #[cfg_attr(windows, allow(unused_variables))] mmio_bus: &devices::Bus,
1802 max_bus: u8,
1803 resume_notify_devices: &mut Vec<Arc<Mutex<dyn BusResumeDevice>>>,
1804 #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1805 #[cfg(unix)] ac_adapter: bool,
1806 ) -> Result<(acpi::AcpiDevResource, Option<BatControl>)> {
1807 // The AML data for the acpi devices
1808 let mut amls = Vec::new();
1809
1810 let bat_control = if let Some(battery_type) = battery.0 {
1811 match battery_type {
1812 #[cfg(unix)]
1813 BatteryType::Goldfish => {
1814 let irq_num = resources.allocate_irq().ok_or(Error::CreateBatDevices(
1815 arch::DeviceRegistrationError::AllocateIrq,
1816 ))?;
1817 let (control_tube, _mmio_base) = arch::sys::unix::add_goldfish_battery(
1818 &mut amls,
1819 battery.1,
1820 mmio_bus,
1821 irq_chip,
1822 irq_num,
1823 resources,
1824 #[cfg(feature = "swap")]
1825 swap_controller,
1826 )
1827 .map_err(Error::CreateBatDevices)?;
1828 Some(BatControl {
1829 type_: BatteryType::Goldfish,
1830 control_tube,
1831 })
1832 }
1833 #[cfg(windows)]
1834 _ => None,
1835 }
1836 } else {
1837 None
1838 };
1839
1840 let pm_alloc = resources.get_anon_alloc();
1841 let pm_iobase = match resources.io_allocator() {
1842 Some(io) => io
1843 .allocate_with_align(
1844 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1845 pm_alloc,
1846 "ACPIPM".to_string(),
1847 4, // must be 32-bit aligned
1848 )
1849 .map_err(Error::AllocateIOResouce)?,
1850 None => 0x600,
1851 };
1852
1853 let pcie_vcfg = aml::Name::new(
1854 "VCFG".into(),
1855 &Self::get_pcie_vcfg_mmio_range(mem, &read_pcie_cfg_mmio()).start,
1856 );
1857 pcie_vcfg.to_aml_bytes(&mut amls);
1858
1859 #[cfg(feature = "direct")]
1860 let direct_evt_info = if direct_gpe.is_empty() && direct_fixed_evts.is_empty() {
1861 None
1862 } else {
1863 let direct_sci_evt = devices::IrqLevelEvent::new().map_err(Error::CreateEvent)?;
1864 let mut sci_devirq =
1865 devices::DirectIrq::new_level(&direct_sci_evt).map_err(Error::EnableAcpiEvent)?;
1866
1867 sci_devirq
1868 .sci_irq_prepare()
1869 .map_err(Error::EnableAcpiEvent)?;
1870
1871 for gpe in direct_gpe {
1872 sci_devirq
1873 .gpe_enable_forwarding(*gpe)
1874 .map_err(Error::EnableAcpiEvent)?;
1875 }
1876
1877 for evt in direct_fixed_evts {
1878 sci_devirq
1879 .fixed_event_enable_forwarding(*evt)
1880 .map_err(Error::EnableAcpiEvent)?;
1881 }
1882
1883 Some((direct_sci_evt, direct_gpe, direct_fixed_evts))
1884 };
1885
1886 let pm_sci_evt = devices::IrqLevelEvent::new().map_err(Error::CreateEvent)?;
1887
1888 #[cfg(unix)]
1889 let acdc = if ac_adapter {
1890 // Allocate GPE for AC adapter notfication
1891 let gpe = resources.allocate_gpe().ok_or(Error::AllocateGpe)?;
1892
1893 let alloc = resources.get_anon_alloc();
1894 let mmio_base = resources
1895 .allocate_mmio(
1896 devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
1897 alloc,
1898 "AcAdapter".to_string(),
1899 resources::AllocOptions::new().align(devices::ac_adapter::ACDC_VIRT_MMIO_SIZE),
1900 )
1901 .unwrap();
1902 let ac_adapter_dev = devices::ac_adapter::AcAdapter::new(mmio_base, gpe);
1903 let ac_dev = Arc::new(Mutex::new(ac_adapter_dev));
1904 mmio_bus
1905 .insert(
1906 ac_dev.clone(),
1907 mmio_base,
1908 devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
1909 )
1910 .unwrap();
1911
1912 ac_dev.lock().to_aml_bytes(&mut amls);
1913 Some(ac_dev)
1914 } else {
1915 None
1916 };
1917 #[cfg(windows)]
1918 let acdc = None;
1919
1920 let mut pmresource = devices::ACPIPMResource::new(
1921 pm_sci_evt.try_clone().map_err(Error::CloneEvent)?,
1922 #[cfg(feature = "direct")]
1923 direct_evt_info,
1924 suspend_evt,
1925 vm_evt_wrtube,
1926 acdc,
1927 );
1928 pmresource.to_aml_bytes(&mut amls);
1929 irq_chip
1930 .register_level_irq_event(
1931 sci_irq,
1932 &pm_sci_evt,
1933 IrqEventSource::from_device(&pmresource),
1934 )
1935 .map_err(Error::RegisterIrqfd)?;
1936 pmresource.start();
1937
1938 let mut crs_entries: Vec<Box<dyn Aml>> = vec![
1939 Box::new(aml::AddressSpace::new_bus_number(0x0u16, max_bus as u16)),
1940 Box::new(aml::IO::new(0xcf8, 0xcf8, 1, 0x8)),
1941 ];
1942 for r in resources.mmio_pools() {
1943 let entry: Box<dyn Aml> = match (u32::try_from(r.start), u32::try_from(r.end)) {
1944 (Ok(start), Ok(end)) => Box::new(aml::AddressSpace::new_memory(
1945 aml::AddressSpaceCachable::NotCacheable,
1946 true,
1947 start,
1948 end,
1949 )),
1950 _ => Box::new(aml::AddressSpace::new_memory(
1951 aml::AddressSpaceCachable::NotCacheable,
1952 true,
1953 r.start,
1954 r.end,
1955 )),
1956 };
1957 crs_entries.push(entry);
1958 }
1959
1960 aml::Device::new(
1961 "_SB_.PC00".into(),
1962 vec![
1963 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A08")),
1964 &aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A03")),
1965 &aml::Name::new("_ADR".into(), &aml::ZERO),
1966 &aml::Name::new("_SEG".into(), &aml::ZERO),
1967 &aml::Name::new("_UID".into(), &aml::ZERO),
1968 &aml::Name::new("SUPP".into(), &aml::ZERO),
1969 &aml::Name::new(
1970 "_CRS".into(),
1971 &aml::ResourceTemplate::new(crs_entries.iter().map(|b| b.as_ref()).collect()),
1972 ),
1973 &PciRootOSC {},
1974 ],
1975 )
1976 .to_aml_bytes(&mut amls);
1977
1978 let root_bus = pci_root.lock().get_root_bus();
1979 let addresses = root_bus.lock().get_downstream_devices();
1980 for address in addresses {
1981 if let Some(acpi_path) = pci_root.lock().acpi_path(&address) {
1982 aml::Device::new(
1983 (*acpi_path).into(),
1984 vec![&aml::Name::new("_ADR".into(), &address.acpi_adr())],
1985 )
1986 .to_aml_bytes(&mut amls);
1987 }
1988 }
1989
1990 let pm = Arc::new(Mutex::new(pmresource));
1991 io_bus
1992 .insert(
1993 pm.clone(),
1994 pm_iobase as u64,
1995 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1996 )
1997 .unwrap();
1998 resume_notify_devices.push(pm.clone());
1999
2000 Ok((
2001 acpi::AcpiDevResource {
2002 amls,
2003 pm_iobase,
2004 pm,
2005 sdts,
2006 },
2007 bat_control,
2008 ))
2009 }
2010
2011 /// Sets up the serial devices for this platform. Returns the serial port number and serial
2012 /// device to be used for stdout
2013 ///
2014 /// # Arguments
2015 ///
2016 /// * - `irq_chip` the IrqChip object for registering irq events
2017 /// * - `io_bus` the I/O bus to add the devices to
2018 /// * - `serial_parmaters` - definitions for how the serial devices should be configured
setup_serial_devices( protection_type: ProtectionType, irq_chip: &mut dyn IrqChip, io_bus: &devices::Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result<()>2019 pub fn setup_serial_devices(
2020 protection_type: ProtectionType,
2021 irq_chip: &mut dyn IrqChip,
2022 io_bus: &devices::Bus,
2023 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2024 serial_jail: Option<Minijail>,
2025 #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
2026 ) -> Result<()> {
2027 let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2028 let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2029
2030 arch::add_serial_devices(
2031 protection_type,
2032 io_bus,
2033 com_evt_1_3.get_trigger(),
2034 com_evt_2_4.get_trigger(),
2035 serial_parameters,
2036 serial_jail,
2037 #[cfg(feature = "swap")]
2038 swap_controller,
2039 )
2040 .map_err(Error::CreateSerialDevices)?;
2041
2042 let source = IrqEventSource {
2043 device_id: Serial::device_id(),
2044 queue_id: 0,
2045 device_name: Serial::debug_label(),
2046 };
2047 irq_chip
2048 .register_edge_irq_event(X86_64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
2049 .map_err(Error::RegisterIrqfd)?;
2050 irq_chip
2051 .register_edge_irq_event(X86_64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
2052 .map_err(Error::RegisterIrqfd)?;
2053
2054 Ok(())
2055 }
2056
setup_debugcon_devices( protection_type: ProtectionType, io_bus: &devices::Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result<()>2057 fn setup_debugcon_devices(
2058 protection_type: ProtectionType,
2059 io_bus: &devices::Bus,
2060 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2061 debugcon_jail: Option<Minijail>,
2062 #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
2063 ) -> Result<()> {
2064 for param in serial_parameters.values() {
2065 if param.hardware != SerialHardware::Debugcon {
2066 continue;
2067 }
2068
2069 let mut preserved_fds = Vec::new();
2070 let con = param
2071 .create_serial_device::<Debugcon>(
2072 protection_type,
2073 // Debugcon doesn't use the interrupt event
2074 &Event::new().map_err(Error::CreateEvent)?,
2075 &mut preserved_fds,
2076 )
2077 .map_err(Error::CreateDebugconDevice)?;
2078
2079 let con: Arc<Mutex<dyn BusDevice>> = match debugcon_jail.as_ref() {
2080 #[cfg(unix)]
2081 Some(jail) => {
2082 let jail_clone = jail.try_clone().map_err(Error::CloneJail)?;
2083 #[cfg(feature = "seccomp_trace")]
2084 debug!(
2085 "seccomp_trace {{\"event\": \"minijail_clone\", \"src_jail_addr\": \"0x{:x}\", \"dst_jail_addr\": \"0x{:x}\"}}",
2086 read_jail_addr(jail),
2087 read_jail_addr(&jail_clone)
2088 );
2089 Arc::new(Mutex::new(
2090 ProxyDevice::new(
2091 con,
2092 jail_clone,
2093 preserved_fds,
2094 #[cfg(feature = "swap")]
2095 swap_controller,
2096 )
2097 .map_err(Error::CreateProxyDevice)?,
2098 ))
2099 }
2100 #[cfg(windows)]
2101 Some(_) => unreachable!(),
2102 None => Arc::new(Mutex::new(con)),
2103 };
2104 io_bus
2105 .insert(con.clone(), param.debugcon_port.into(), 1)
2106 .map_err(Error::InsertBus)?;
2107 }
2108
2109 Ok(())
2110 }
2111 }
2112
2113 #[sorted]
2114 #[derive(Error, Debug)]
2115 pub enum MsrError {
2116 #[error("CPU not support. Only intel CPUs support ITMT.")]
2117 CpuUnSupport,
2118 #[error("msr must be unique: {0}")]
2119 MsrDuplicate(u32),
2120 }
2121
insert_msr( msr_map: &mut BTreeMap<u32, MsrConfig>, key: u32, msr_config: MsrConfig, ) -> std::result::Result<(), MsrError>2122 fn insert_msr(
2123 msr_map: &mut BTreeMap<u32, MsrConfig>,
2124 key: u32,
2125 msr_config: MsrConfig,
2126 ) -> std::result::Result<(), MsrError> {
2127 if msr_map.insert(key, msr_config).is_some() {
2128 Err(MsrError::MsrDuplicate(key))
2129 } else {
2130 Ok(())
2131 }
2132 }
2133
insert_msrs( msr_map: &mut BTreeMap<u32, MsrConfig>, msrs: &[(u32, MsrRWType, MsrAction, MsrValueFrom, MsrFilter)], ) -> std::result::Result<(), MsrError>2134 fn insert_msrs(
2135 msr_map: &mut BTreeMap<u32, MsrConfig>,
2136 msrs: &[(u32, MsrRWType, MsrAction, MsrValueFrom, MsrFilter)],
2137 ) -> std::result::Result<(), MsrError> {
2138 for msr in msrs {
2139 insert_msr(
2140 msr_map,
2141 msr.0,
2142 MsrConfig {
2143 rw_type: msr.1,
2144 action: msr.2,
2145 from: msr.3,
2146 filter: msr.4,
2147 },
2148 )?;
2149 }
2150
2151 Ok(())
2152 }
2153
set_enable_pnp_data_msr_config( msr_map: &mut BTreeMap<u32, MsrConfig>, ) -> std::result::Result<(), MsrError>2154 pub fn set_enable_pnp_data_msr_config(
2155 msr_map: &mut BTreeMap<u32, MsrConfig>,
2156 ) -> std::result::Result<(), MsrError> {
2157 let msrs = vec![
2158 (
2159 MSR_IA32_APERF,
2160 MsrRWType::ReadOnly,
2161 MsrAction::MsrPassthrough,
2162 MsrValueFrom::RWFromRunningCPU,
2163 MsrFilter::Default,
2164 ),
2165 (
2166 MSR_IA32_MPERF,
2167 MsrRWType::ReadOnly,
2168 MsrAction::MsrPassthrough,
2169 MsrValueFrom::RWFromRunningCPU,
2170 MsrFilter::Default,
2171 ),
2172 ];
2173
2174 insert_msrs(msr_map, &msrs)?;
2175
2176 Ok(())
2177 }
2178
2179 #[derive(Error, Debug)]
2180 pub enum HybridSupportError {
2181 #[error("Host CPU doesn't support hybrid architecture.")]
2182 UnsupportedHostCpu,
2183 }
2184
2185 /// The wrapper for CPUID call functions.
2186 pub struct CpuIdCall {
2187 /// __cpuid_count or a fake function for test.
2188 cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2189 /// __cpuid or a fake function for test.
2190 cpuid: unsafe fn(u32) -> CpuidResult,
2191 }
2192
2193 impl CpuIdCall {
new( cpuid_count: unsafe fn(u32, u32) -> CpuidResult, cpuid: unsafe fn(u32) -> CpuidResult, ) -> CpuIdCall2194 pub fn new(
2195 cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2196 cpuid: unsafe fn(u32) -> CpuidResult,
2197 ) -> CpuIdCall {
2198 CpuIdCall { cpuid_count, cpuid }
2199 }
2200 }
2201
2202 /// Check if host supports hybrid CPU feature. The check include:
2203 /// 1. Check if CPUID.1AH exists. CPUID.1AH is hybrid information enumeration leaf.
2204 /// 2. Check if CPUID.07H.00H:EDX[bit 15] sets. This bit means the processor is
2205 /// identified as a hybrid part.
2206 /// 3. Check if CPUID.1AH:EAX sets. The hybrid core type is set in EAX.
2207 ///
2208 /// # Arguments
2209 ///
2210 /// * - `cpuid` the wrapped cpuid functions used to get CPUID info.
check_host_hybrid_support(cpuid: &CpuIdCall) -> std::result::Result<(), HybridSupportError>2211 pub fn check_host_hybrid_support(cpuid: &CpuIdCall) -> std::result::Result<(), HybridSupportError> {
2212 // CPUID.0H.EAX returns maximum input value for basic CPUID information.
2213 //
2214 // Safe because we pass 0 for this call and the host supports the
2215 // `cpuid` instruction.
2216 let mut cpuid_entry = unsafe { (cpuid.cpuid)(0x0) };
2217 if cpuid_entry.eax < 0x1A {
2218 return Err(HybridSupportError::UnsupportedHostCpu);
2219 }
2220 // Safe because we pass 0x7 and 0 for this call and the host supports the
2221 // `cpuid` instruction.
2222 cpuid_entry = unsafe { (cpuid.cpuid_count)(0x7, 0) };
2223 if cpuid_entry.edx & 1 << EDX_HYBRID_CPU_SHIFT == 0 {
2224 return Err(HybridSupportError::UnsupportedHostCpu);
2225 }
2226 // From SDM, if a value entered for CPUID.EAX is less than or equal to the
2227 // maximum input value and the leaf is not supported on that processor then
2228 // 0 is returned in all the registers.
2229 // For the CPU with hybrid support, its CPUID.1AH.EAX shouldn't be zero.
2230 //
2231 // Safe because we pass 0 for this call and the host supports the
2232 // `cpuid` instruction.
2233 cpuid_entry = unsafe { (cpuid.cpuid)(0x1A) };
2234 if cpuid_entry.eax == 0 {
2235 return Err(HybridSupportError::UnsupportedHostCpu);
2236 }
2237 Ok(())
2238 }
2239
2240 #[cfg(test)]
2241 mod tests {
2242 use std::mem::size_of;
2243
2244 use super::*;
2245
2246 const TEST_MEMORY_SIZE: u64 = 2 * GB;
2247
setup()2248 fn setup() {
2249 let pcie_ecam = Some(AddressRange::from_start_and_size(3 * GB, 256 * MB).unwrap());
2250 let pci_start = Some(2 * GB);
2251 init_low_memory_layout(pcie_ecam, pci_start);
2252 }
2253
2254 #[test]
regions_lt_4gb_nobios()2255 fn regions_lt_4gb_nobios() {
2256 setup();
2257 let regions = arch_memory_regions(512 * MB, /* bios_size */ None);
2258 assert_eq!(1, regions.len());
2259 assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
2260 assert_eq!(1u64 << 29, regions[0].1);
2261 }
2262
2263 #[test]
regions_gt_4gb_nobios()2264 fn regions_gt_4gb_nobios() {
2265 setup();
2266 let size = 4 * GB + 0x8000;
2267 let regions = arch_memory_regions(size, /* bios_size */ None);
2268 assert_eq!(2, regions.len());
2269 assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
2270 assert_eq!(GuestAddress(4 * GB), regions[1].0);
2271 assert_eq!(4 * GB + 0x8000, regions[0].1 + regions[1].1);
2272 }
2273
2274 #[test]
regions_lt_4gb_bios()2275 fn regions_lt_4gb_bios() {
2276 setup();
2277 let bios_len = 1 * MB;
2278 let regions = arch_memory_regions(512 * MB, Some(bios_len));
2279 assert_eq!(2, regions.len());
2280 assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
2281 assert_eq!(512 * MB, regions[0].1);
2282 assert_eq!(
2283 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
2284 regions[1].0
2285 );
2286 assert_eq!(bios_len, regions[1].1);
2287 }
2288
2289 #[test]
regions_gt_4gb_bios()2290 fn regions_gt_4gb_bios() {
2291 setup();
2292 let bios_len = 1 * MB;
2293 let regions = arch_memory_regions(4 * GB + 0x8000, Some(bios_len));
2294 assert_eq!(3, regions.len());
2295 assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
2296 assert_eq!(
2297 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
2298 regions[1].0
2299 );
2300 assert_eq!(bios_len, regions[1].1);
2301 assert_eq!(GuestAddress(4 * GB), regions[2].0);
2302 }
2303
2304 #[test]
regions_eq_4gb_nobios()2305 fn regions_eq_4gb_nobios() {
2306 setup();
2307 // Test with exact size of 4GB - the overhead.
2308 let regions = arch_memory_regions(
2309 TEST_MEMORY_SIZE - START_OF_RAM_32BITS,
2310 /* bios_size */ None,
2311 );
2312 dbg!(®ions);
2313 assert_eq!(1, regions.len());
2314 assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
2315 assert_eq!(TEST_MEMORY_SIZE - START_OF_RAM_32BITS, regions[0].1);
2316 }
2317
2318 #[test]
regions_eq_4gb_bios()2319 fn regions_eq_4gb_bios() {
2320 setup();
2321 // Test with exact size of 4GB - the overhead.
2322 let bios_len = 1 * MB;
2323 let regions = arch_memory_regions(TEST_MEMORY_SIZE - START_OF_RAM_32BITS, Some(bios_len));
2324 assert_eq!(2, regions.len());
2325 assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
2326 assert_eq!(TEST_MEMORY_SIZE - START_OF_RAM_32BITS, regions[0].1);
2327 assert_eq!(
2328 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
2329 regions[1].0
2330 );
2331 assert_eq!(bios_len, regions[1].1);
2332 }
2333
2334 #[test]
check_pci_mmio_layout()2335 fn check_pci_mmio_layout() {
2336 setup();
2337
2338 assert_eq!(read_pci_mmio_before_32bit().start, 2 * GB);
2339 assert_eq!(read_pcie_cfg_mmio().start, 3 * GB);
2340 assert_eq!(read_pcie_cfg_mmio().len().unwrap(), 256 * MB);
2341 }
2342
2343 #[test]
2344 #[cfg(feature = "direct")]
2345 #[ignore] // TODO(b/236253615): Fix and re-enable this test.
end_addr_before_32bits()2346 fn end_addr_before_32bits() {
2347 setup();
2348 // On volteer, type16 (coreboot) region is at 0x00000000769f3000-0x0000000076ffffff.
2349 // On brya, type16 region is at 0x0000000076876000-0x00000000803fffff
2350 let brya_type16_address = 0x7687_6000;
2351 assert!(
2352 read_pci_mmio_before_32bit().start < brya_type16_address,
2353 "{} < {}",
2354 read_pci_mmio_before_32bit().start,
2355 brya_type16_address
2356 );
2357 }
2358
2359 #[test]
check_32bit_gap_size_alignment()2360 fn check_32bit_gap_size_alignment() {
2361 setup();
2362 // pci_low_start is 256 MB aligned to be friendly for MTRR mappings.
2363 assert_eq!(read_pci_mmio_before_32bit().start % (256 * MB), 0);
2364 }
2365
2366 #[test]
write_setup_data_empty()2367 fn write_setup_data_empty() {
2368 let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2369 let setup_data = [];
2370 let setup_data_addr = write_setup_data(
2371 &mem,
2372 GuestAddress(0x1000),
2373 GuestAddress(0x2000),
2374 &setup_data,
2375 )
2376 .expect("write_setup_data");
2377 assert_eq!(setup_data_addr, None);
2378 }
2379
2380 #[test]
write_setup_data_two_of_them()2381 fn write_setup_data_two_of_them() {
2382 let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2383
2384 let entry1_addr = GuestAddress(0x1000);
2385 let entry1_next_addr = entry1_addr;
2386 let entry1_len_addr = entry1_addr.checked_add(12).unwrap();
2387 let entry1_data_addr = entry1_addr.checked_add(16).unwrap();
2388 let entry1_data = [0x55u8; 13];
2389 let entry1_size = (size_of::<setup_data_hdr>() + entry1_data.len()) as u64;
2390 let entry1_align = 3;
2391
2392 let entry2_addr = GuestAddress(entry1_addr.offset() + entry1_size + entry1_align);
2393 let entry2_next_addr = entry2_addr;
2394 let entry2_len_addr = entry2_addr.checked_add(12).unwrap();
2395 let entry2_data_addr = entry2_addr.checked_add(16).unwrap();
2396 let entry2_data = [0xAAu8; 9];
2397
2398 let setup_data = [
2399 SetupData {
2400 data: entry1_data.to_vec(),
2401 type_: SetupDataType::Dtb,
2402 },
2403 SetupData {
2404 data: entry2_data.to_vec(),
2405 type_: SetupDataType::Dtb,
2406 },
2407 ];
2408
2409 let setup_data_head_addr = write_setup_data(
2410 &mem,
2411 GuestAddress(0x1000),
2412 GuestAddress(0x2000),
2413 &setup_data,
2414 )
2415 .expect("write_setup_data");
2416 assert_eq!(setup_data_head_addr, Some(entry1_addr));
2417
2418 assert_eq!(
2419 mem.read_obj_from_addr::<u64>(entry1_next_addr).unwrap(),
2420 entry2_addr.offset()
2421 );
2422 assert_eq!(
2423 mem.read_obj_from_addr::<u32>(entry1_len_addr).unwrap(),
2424 entry1_data.len() as u32
2425 );
2426 assert_eq!(
2427 mem.read_obj_from_addr::<[u8; 13]>(entry1_data_addr)
2428 .unwrap(),
2429 entry1_data
2430 );
2431
2432 assert_eq!(mem.read_obj_from_addr::<u64>(entry2_next_addr).unwrap(), 0);
2433 assert_eq!(
2434 mem.read_obj_from_addr::<u32>(entry2_len_addr).unwrap(),
2435 entry2_data.len() as u32
2436 );
2437 assert_eq!(
2438 mem.read_obj_from_addr::<[u8; 9]>(entry2_data_addr).unwrap(),
2439 entry2_data
2440 );
2441 }
2442 }
2443