• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! x86 architecture support.
6 
7 #![cfg(target_arch = "x86_64")]
8 
9 mod fdt;
10 
11 #[cfg(feature = "gdb")]
12 mod gdb;
13 
14 const SETUP_DTB: u32 = 2;
15 const SETUP_RNG_SEED: u32 = 9;
16 
17 #[allow(dead_code)]
18 #[allow(non_upper_case_globals)]
19 #[allow(non_camel_case_types)]
20 #[allow(non_snake_case)]
21 pub mod bootparam;
22 
23 #[allow(dead_code)]
24 #[allow(non_upper_case_globals)]
25 mod msr_index;
26 
27 #[allow(dead_code)]
28 #[allow(non_upper_case_globals)]
29 #[allow(non_camel_case_types)]
30 #[allow(clippy::all)]
31 mod mpspec;
32 
33 pub mod multiboot_spec;
34 
35 pub mod acpi;
36 mod bzimage;
37 pub mod cpuid;
38 mod gdt;
39 pub mod interrupts;
40 pub mod mptable;
41 pub mod regs;
42 pub mod smbios;
43 
44 use std::arch::x86_64::CpuidResult;
45 use std::collections::BTreeMap;
46 use std::fmt;
47 use std::fs::File;
48 use std::io;
49 use std::io::Write;
50 use std::mem;
51 use std::path::PathBuf;
52 use std::sync::mpsc;
53 use std::sync::Arc;
54 
55 use acpi_tables::aml;
56 use acpi_tables::aml::Aml;
57 use acpi_tables::sdt::SDT;
58 use anyhow::Context;
59 use arch::get_serial_cmdline;
60 use arch::serial::SerialDeviceInfo;
61 use arch::CpuSet;
62 use arch::DtbOverlay;
63 use arch::FdtPosition;
64 use arch::GetSerialCmdlineError;
65 use arch::MemoryRegionConfig;
66 use arch::PciConfig;
67 use arch::RunnableLinuxVm;
68 use arch::VmComponents;
69 use arch::VmImage;
70 use base::debug;
71 use base::info;
72 use base::warn;
73 #[cfg(any(target_os = "android", target_os = "linux"))]
74 use base::AsRawDescriptors;
75 use base::Event;
76 use base::FileGetLen;
77 use base::FileReadWriteAtVolatile;
78 use base::SendTube;
79 use base::Tube;
80 use base::TubeError;
81 use chrono::Utc;
82 pub use cpuid::adjust_cpuid;
83 pub use cpuid::CpuIdContext;
84 use devices::acpi::PM_WAKEUP_GPIO;
85 use devices::Bus;
86 use devices::BusDevice;
87 use devices::BusDeviceObj;
88 use devices::BusResumeDevice;
89 use devices::BusType;
90 use devices::Debugcon;
91 use devices::FwCfgParameters;
92 use devices::IrqChip;
93 use devices::IrqChipX86_64;
94 use devices::IrqEventSource;
95 use devices::PciAddress;
96 use devices::PciConfigIo;
97 use devices::PciConfigMmio;
98 use devices::PciDevice;
99 use devices::PciInterruptPin;
100 use devices::PciRoot;
101 use devices::PciRootCommand;
102 use devices::PciVirtualConfigMmio;
103 use devices::Pflash;
104 #[cfg(any(target_os = "android", target_os = "linux"))]
105 use devices::ProxyDevice;
106 use devices::Serial;
107 use devices::SerialHardware;
108 use devices::SerialParameters;
109 use devices::VirtualPmc;
110 use devices::FW_CFG_BASE_PORT;
111 use devices::FW_CFG_MAX_FILE_SLOTS;
112 use devices::FW_CFG_WIDTH;
113 use hypervisor::CpuConfigX86_64;
114 use hypervisor::Hypervisor;
115 use hypervisor::HypervisorX86_64;
116 use hypervisor::ProtectionType;
117 use hypervisor::VcpuInitX86_64;
118 use hypervisor::VcpuX86_64;
119 use hypervisor::Vm;
120 use hypervisor::VmCap;
121 use hypervisor::VmX86_64;
122 #[cfg(feature = "seccomp_trace")]
123 use jail::read_jail_addr;
124 #[cfg(windows)]
125 use jail::FakeMinijailStub as Minijail;
126 #[cfg(any(target_os = "android", target_os = "linux"))]
127 use minijail::Minijail;
128 use mptable::MPTABLE_START;
129 use multiboot_spec::MultibootInfo;
130 use multiboot_spec::MultibootMmapEntry;
131 use multiboot_spec::MULTIBOOT_BOOTLOADER_MAGIC;
132 use rand::rngs::OsRng;
133 use rand::RngCore;
134 use remain::sorted;
135 use resources::AddressRange;
136 use resources::SystemAllocator;
137 use resources::SystemAllocatorConfig;
138 use sync::Condvar;
139 use sync::Mutex;
140 use thiserror::Error;
141 use vm_control::BatControl;
142 use vm_control::BatteryType;
143 use vm_memory::GuestAddress;
144 use vm_memory::GuestMemory;
145 use vm_memory::GuestMemoryError;
146 use vm_memory::MemoryRegionOptions;
147 use vm_memory::MemoryRegionPurpose;
148 use zerocopy::FromBytes;
149 use zerocopy::Immutable;
150 use zerocopy::IntoBytes;
151 use zerocopy::KnownLayout;
152 
153 use crate::bootparam::boot_params;
154 use crate::bootparam::setup_header;
155 use crate::bootparam::XLF_CAN_BE_LOADED_ABOVE_4G;
156 use crate::cpuid::EDX_HYBRID_CPU_SHIFT;
157 
158 #[sorted]
159 #[derive(Error, Debug)]
160 pub enum Error {
161     #[error("error allocating a single gpe")]
162     AllocateGpe,
163     #[error("error allocating IO resource: {0}")]
164     AllocateIOResouce(resources::Error),
165     #[error("error allocating a single irq")]
166     AllocateIrq,
167     #[error("unable to clone an Event: {0}")]
168     CloneEvent(base::Error),
169     #[error("failed to clone IRQ chip: {0}")]
170     CloneIrqChip(base::Error),
171     #[cfg(any(target_os = "android", target_os = "linux"))]
172     #[error("failed to clone jail: {0}")]
173     CloneJail(minijail::Error),
174     #[error("unable to clone a Tube: {0}")]
175     CloneTube(TubeError),
176     #[error("the given kernel command line was invalid: {0}")]
177     Cmdline(kernel_cmdline::Error),
178     #[error("failed writing command line to guest memory")]
179     CommandLineCopy,
180     #[error("command line overflowed guest memory")]
181     CommandLineOverflow,
182     #[error("failed to configure hotplugged pci device: {0}")]
183     ConfigurePciDevice(arch::DeviceRegistrationError),
184     #[error("bad PCI ECAM configuration: {0}")]
185     ConfigurePciEcam(String),
186     #[error("bad PCI mem configuration: {0}")]
187     ConfigurePciMem(String),
188     #[error("failed to configure segment registers: {0}")]
189     ConfigureSegments(regs::Error),
190     #[error("error configuring the system")]
191     ConfigureSystem,
192     #[error("unable to create ACPI tables")]
193     CreateAcpi,
194     #[error("unable to create battery devices: {0}")]
195     CreateBatDevices(arch::DeviceRegistrationError),
196     #[error("could not create debugcon device: {0}")]
197     CreateDebugconDevice(devices::SerialError),
198     #[error("unable to make an Event: {0}")]
199     CreateEvent(base::Error),
200     #[error("failed to create fdt: {0}")]
201     CreateFdt(cros_fdt::Error),
202     #[error("failed to create fw_cfg device: {0}")]
203     CreateFwCfgDevice(devices::FwCfgError),
204     #[error("failed to create IOAPIC device: {0}")]
205     CreateIoapicDevice(base::Error),
206     #[error("failed to create a PCI root hub: {0}")]
207     CreatePciRoot(arch::DeviceRegistrationError),
208     #[error("unable to create PIT: {0}")]
209     CreatePit(base::Error),
210     #[error("unable to make PIT device: {0}")]
211     CreatePitDevice(devices::PitError),
212     #[cfg(any(target_os = "android", target_os = "linux"))]
213     #[error("unable to create proxy device: {0}")]
214     CreateProxyDevice(devices::ProxyError),
215     #[error("unable to create serial devices: {0}")]
216     CreateSerialDevices(arch::DeviceRegistrationError),
217     #[error("failed to create socket: {0}")]
218     CreateSocket(io::Error),
219     #[error("failed to create tube: {0}")]
220     CreateTube(base::TubeError),
221     #[error("failed to create VCPU: {0}")]
222     CreateVcpu(base::Error),
223     #[error("invalid e820 setup params")]
224     E820Configuration,
225     #[error("failed to enable singlestep execution: {0}")]
226     EnableSinglestep(base::Error),
227     #[error("failed to enable split irqchip: {0}")]
228     EnableSplitIrqchip(base::Error),
229     #[error("failed to get serial cmdline: {0}")]
230     GetSerialCmdline(GetSerialCmdlineError),
231     #[error("failed to insert device onto bus: {0}")]
232     InsertBus(devices::BusError),
233     #[error("the kernel extends past the end of RAM")]
234     InvalidCpuConfig,
235     #[error("invalid CPU config parameters")]
236     KernelOffsetPastEnd,
237     #[error("error loading bios: {0}")]
238     LoadBios(io::Error),
239     #[error("error loading kernel bzImage: {0}")]
240     LoadBzImage(bzimage::Error),
241     #[error("error loading custom pVM firmware: {0}")]
242     LoadCustomPvmFw(arch::LoadImageError),
243     #[error("error loading initrd: {0}")]
244     LoadInitrd(arch::LoadImageError),
245     #[error("error loading Kernel: {0}")]
246     LoadKernel(kernel_loader::Error),
247     #[error("error loading pflash: {0}")]
248     LoadPflash(io::Error),
249     #[error("error loading pVM firmware: {0}")]
250     LoadPvmFw(base::Error),
251     #[error("error in multiboot_info setup")]
252     MultibootInfoSetup,
253     #[error("error translating address: Page not present")]
254     PageNotPresent,
255     #[error("pci mmio overlaps with pVM firmware memory")]
256     PciMmioOverlapPvmFw,
257     #[error("pVM firmware not supported when bios is used on x86_64")]
258     PvmFwBiosUnsupported,
259     #[error("error reading guest memory {0}")]
260     ReadingGuestMemory(vm_memory::GuestMemoryError),
261     #[error("single register read not supported on x86_64")]
262     ReadRegIsUnsupported,
263     #[error("error reading CPU registers {0}")]
264     ReadRegs(base::Error),
265     #[error("error registering an IrqFd: {0}")]
266     RegisterIrqfd(base::Error),
267     #[error("error registering virtual socket device: {0}")]
268     RegisterVsock(arch::DeviceRegistrationError),
269     #[error("error reserved pcie config mmio")]
270     ReservePcieCfgMmio(resources::Error),
271     #[error("failed to set a hardware breakpoint: {0}")]
272     SetHwBreakpoint(base::Error),
273     #[error("failed to set identity map addr: {0}")]
274     SetIdentityMapAddr(base::Error),
275     #[error("failed to set interrupts: {0}")]
276     SetLint(interrupts::Error),
277     #[error("failed to set tss addr: {0}")]
278     SetTssAddr(base::Error),
279     #[error("failed to set up cmos: {0}")]
280     SetupCmos(anyhow::Error),
281     #[error("failed to set up cpuid: {0}")]
282     SetupCpuid(cpuid::Error),
283     #[error("setup data too large")]
284     SetupDataTooLarge,
285     #[error("failed to set up FPU: {0}")]
286     SetupFpu(base::Error),
287     #[error("failed to set up guest memory: {0}")]
288     SetupGuestMemory(GuestMemoryError),
289     #[error("failed to set up mptable: {0}")]
290     SetupMptable(mptable::Error),
291     #[error("failed to set up MSRs: {0}")]
292     SetupMsrs(base::Error),
293     #[error("failed to set up page tables: {0}")]
294     SetupPageTables(regs::Error),
295     #[error("failed to set up pflash: {0}")]
296     SetupPflash(anyhow::Error),
297     #[error("failed to set up registers: {0}")]
298     SetupRegs(regs::Error),
299     #[error("failed to set up SMBIOS: {0}")]
300     SetupSmbios(smbios::Error),
301     #[error("failed to set up sregs: {0}")]
302     SetupSregs(base::Error),
303     #[error("too many vCPUs")]
304     TooManyVcpus,
305     #[error("failed to translate virtual address")]
306     TranslatingVirtAddr,
307     #[error("protected VMs not supported on x86_64")]
308     UnsupportedProtectionType,
309     #[error("single register write not supported on x86_64")]
310     WriteRegIsUnsupported,
311     #[error("error writing CPU registers {0}")]
312     WriteRegs(base::Error),
313     #[error("error writing guest memory {0}")]
314     WritingGuestMemory(GuestMemoryError),
315     #[error("error writing setup_data: {0}")]
316     WritingSetupData(GuestMemoryError),
317     #[error("the zero page extends past the end of guest_mem")]
318     ZeroPagePastRamEnd,
319     #[error("error writing the zero page of guest memory")]
320     ZeroPageSetup,
321 }
322 
323 pub type Result<T> = std::result::Result<T, Error>;
324 
325 pub struct X8664arch;
326 
327 // Like `bootparam::setup_data` without the incomplete array field at the end, which allows us to
328 // safely implement Copy, Clone
329 #[repr(C)]
330 #[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
331 struct setup_data_hdr {
332     pub next: u64,
333     pub type_: u32,
334     pub len: u32,
335 }
336 
337 #[repr(u32)]
338 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
339 pub enum SetupDataType {
340     Dtb = SETUP_DTB,
341     RngSeed = SETUP_RNG_SEED,
342 }
343 
344 /// A single entry to be inserted in the bootparam `setup_data` linked list.
345 pub struct SetupData {
346     pub data: Vec<u8>,
347     pub type_: SetupDataType,
348 }
349 
350 #[derive(Copy, Clone, Debug)]
351 enum E820Type {
352     Ram = 0x01,
353     Reserved = 0x2,
354 }
355 
356 #[derive(Copy, Clone, Debug)]
357 struct E820Entry {
358     pub address: GuestAddress,
359     pub len: u64,
360     pub mem_type: E820Type,
361 }
362 
363 const KB: u64 = 1 << 10;
364 const MB: u64 = 1 << 20;
365 const GB: u64 = 1 << 30;
366 
367 pub const BOOT_STACK_POINTER: u64 = 0x8000;
368 const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32;
369 // Make sure it align to 256MB for MTRR convenient
370 const MEM_32BIT_GAP_SIZE: u64 = 768 * MB;
371 // Reserved memory for nand_bios/LAPIC/IOAPIC/HPET/.....
372 const RESERVED_MEM_SIZE: u64 = 0x800_0000;
373 const DEFAULT_PCI_MEM_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
374 // Reserve 64MB for pcie enhanced configuration
375 const DEFAULT_PCIE_CFG_MMIO_SIZE: u64 = 0x400_0000;
376 const DEFAULT_PCIE_CFG_MMIO_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
377 const DEFAULT_PCIE_CFG_MMIO_START: u64 = DEFAULT_PCIE_CFG_MMIO_END - DEFAULT_PCIE_CFG_MMIO_SIZE + 1;
378 // Linux (with 4-level paging) has a physical memory limit of 46 bits (64 TiB).
379 const HIGH_MMIO_MAX_END: u64 = (1u64 << 46) - 1;
380 pub const KERNEL_32BIT_ENTRY_OFFSET: u64 = 0x0;
381 pub const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
382 pub const MULTIBOOT_INFO_OFFSET: u64 = 0x6000;
383 pub const MULTIBOOT_INFO_SIZE: u64 = 0x1000;
384 pub const ZERO_PAGE_OFFSET: u64 = 0x7000;
385 // Set BIOS max size to 16M: this is used only when `unrestricted guest` is disabled
386 const BIOS_MAX_SIZE: u64 = 0x1000000;
387 
388 pub const KERNEL_START_OFFSET: u64 = 0x20_0000;
389 const CMDLINE_OFFSET: u64 = 0x2_0000;
390 const CMDLINE_MAX_SIZE: u64 = 0x800; // including terminating zero
391 const SETUP_DATA_START: u64 = CMDLINE_OFFSET + CMDLINE_MAX_SIZE;
392 const SETUP_DATA_END: u64 = MPTABLE_START;
393 const X86_64_SERIAL_1_3_IRQ: u32 = 4;
394 const X86_64_SERIAL_2_4_IRQ: u32 = 3;
395 // X86_64_SCI_IRQ is used to fill the ACPI FACP table.
396 // The sci_irq number is better to be a legacy
397 // IRQ number which is less than 16(actually most of the
398 // platforms have fixed IRQ number 9). So we can
399 // reserve the IRQ number 5 for SCI and let the
400 // the other devices starts from next.
401 pub const X86_64_SCI_IRQ: u32 = 5;
402 // The CMOS RTC uses IRQ 8; start allocating IRQs at 9.
403 pub const X86_64_IRQ_BASE: u32 = 9;
404 const ACPI_HI_RSDP_WINDOW_BASE: u64 = 0x000E_0000;
405 
406 // pVM firmware memory. Should be within the low 4GB, so that it is identity-mapped
407 // by setup_page_tables() when a protected VM boots in long mode, since the pVM firmware is
408 // the VM entry point.
409 const PROTECTED_VM_FW_MAX_SIZE: u64 = 0x40_0000;
410 // Load the pVM firmware just below 2 GB to allow use of `-mcmodel=small`.
411 const PROTECTED_VM_FW_START: u64 = 0x8000_0000 - PROTECTED_VM_FW_MAX_SIZE;
412 
413 #[derive(Debug, PartialEq, Eq)]
414 pub enum CpuManufacturer {
415     Intel,
416     Amd,
417     Unknown,
418 }
419 
get_cpu_manufacturer() -> CpuManufacturer420 pub fn get_cpu_manufacturer() -> CpuManufacturer {
421     cpuid::cpu_manufacturer()
422 }
423 
424 pub struct ArchMemoryLayout {
425     // the pci mmio range below 4G
426     pci_mmio_before_32bit: AddressRange,
427     // the pcie cfg mmio range
428     pcie_cfg_mmio: AddressRange,
429     // the pVM firmware memory (if running a protected VM)
430     pvmfw_mem: Option<AddressRange>,
431 }
432 
create_arch_memory_layout( pci_config: &PciConfig, has_protected_vm_firmware: bool, ) -> Result<ArchMemoryLayout>433 pub fn create_arch_memory_layout(
434     pci_config: &PciConfig,
435     has_protected_vm_firmware: bool,
436 ) -> Result<ArchMemoryLayout> {
437     // the max bus number is 256 and each bus occupy 1MB, so the max pcie cfg mmio size = 256M
438     const MAX_PCIE_ECAM_SIZE: u64 = 256 * MB;
439     let pcie_cfg_mmio = match pci_config.ecam {
440         Some(MemoryRegionConfig {
441             start,
442             size: Some(size),
443         }) => AddressRange::from_start_and_size(start, size.min(MAX_PCIE_ECAM_SIZE)).unwrap(),
444         Some(MemoryRegionConfig { start, size: None }) => {
445             AddressRange::from_start_and_end(start, DEFAULT_PCIE_CFG_MMIO_END)
446         }
447         None => {
448             AddressRange::from_start_and_end(DEFAULT_PCIE_CFG_MMIO_START, DEFAULT_PCIE_CFG_MMIO_END)
449         }
450     };
451     if pcie_cfg_mmio.start % pcie_cfg_mmio.len().unwrap() != 0
452         || pcie_cfg_mmio.start % MB != 0
453         || pcie_cfg_mmio.len().unwrap() % MB != 0
454     {
455         return Err(Error::ConfigurePciEcam(
456             "base and len must be aligned to 1MB and base must be a multiple of len".to_string(),
457         ));
458     }
459     if pcie_cfg_mmio.end >= 0x1_0000_0000 {
460         return Err(Error::ConfigurePciEcam(
461             "end address can't go beyond 4G".to_string(),
462         ));
463     }
464 
465     let pci_mmio_before_32bit = match pci_config.mem {
466         Some(MemoryRegionConfig {
467             start,
468             size: Some(size),
469         }) => AddressRange::from_start_and_size(start, size)
470             .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
471         Some(MemoryRegionConfig { start, size: None }) => {
472             AddressRange::from_start_and_end(start, DEFAULT_PCI_MEM_END)
473         }
474         None => AddressRange::from_start_and_end(
475             pcie_cfg_mmio
476                 .start
477                 .min(FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE),
478             DEFAULT_PCI_MEM_END,
479         ),
480     };
481 
482     let pvmfw_mem = if has_protected_vm_firmware {
483         let range = AddressRange {
484             start: PROTECTED_VM_FW_START,
485             end: PROTECTED_VM_FW_START + PROTECTED_VM_FW_MAX_SIZE - 1,
486         };
487         if !pci_mmio_before_32bit.intersect(range).is_empty() {
488             return Err(Error::PciMmioOverlapPvmFw);
489         }
490 
491         Some(range)
492     } else {
493         None
494     };
495 
496     Ok(ArchMemoryLayout {
497         pci_mmio_before_32bit,
498         pcie_cfg_mmio,
499         pvmfw_mem,
500     })
501 }
502 
503 /// The x86 reset vector for i386+ and x86_64 puts the processor into an "unreal mode" where it
504 /// can access the last 1 MB of the 32-bit address space in 16-bit mode, and starts the instruction
505 /// pointer at the effective physical address 0xFFFF_FFF0.
bios_start(bios_size: u64) -> GuestAddress506 fn bios_start(bios_size: u64) -> GuestAddress {
507     GuestAddress(FIRST_ADDR_PAST_32BITS - bios_size)
508 }
509 
identity_map_addr_start() -> GuestAddress510 fn identity_map_addr_start() -> GuestAddress {
511     // Set Identity map address 4 pages before the max BIOS size
512     GuestAddress(FIRST_ADDR_PAST_32BITS - BIOS_MAX_SIZE - 4 * 0x1000)
513 }
514 
tss_addr_start() -> GuestAddress515 fn tss_addr_start() -> GuestAddress {
516     // Set TSS address one page after identity map address
517     GuestAddress(identity_map_addr_start().offset() + 0x1000)
518 }
519 
tss_addr_end() -> GuestAddress520 fn tss_addr_end() -> GuestAddress {
521     // Set TSS address section to have 3 pages
522     GuestAddress(tss_addr_start().offset() + 0x3000)
523 }
524 
configure_boot_params( guest_mem: &GuestMemory, cmdline_addr: GuestAddress, setup_data: Option<GuestAddress>, initrd: Option<(GuestAddress, usize)>, mut params: boot_params, e820_entries: &[E820Entry], ) -> Result<()>525 fn configure_boot_params(
526     guest_mem: &GuestMemory,
527     cmdline_addr: GuestAddress,
528     setup_data: Option<GuestAddress>,
529     initrd: Option<(GuestAddress, usize)>,
530     mut params: boot_params,
531     e820_entries: &[E820Entry],
532 ) -> Result<()> {
533     const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
534     const KERNEL_HDR_MAGIC: u32 = 0x5372_6448;
535     const KERNEL_LOADER_OTHER: u8 = 0xff;
536     const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x100_0000; // Must be non-zero.
537 
538     params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
539     params.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
540     params.hdr.header = KERNEL_HDR_MAGIC;
541     params.hdr.cmd_line_ptr = cmdline_addr.offset() as u32;
542     params.ext_cmd_line_ptr = (cmdline_addr.offset() >> 32) as u32;
543     params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
544     if let Some(setup_data) = setup_data {
545         params.hdr.setup_data = setup_data.offset();
546     }
547     if let Some((initrd_addr, initrd_size)) = initrd {
548         params.hdr.ramdisk_image = initrd_addr.offset() as u32;
549         params.ext_ramdisk_image = (initrd_addr.offset() >> 32) as u32;
550         params.hdr.ramdisk_size = initrd_size as u32;
551         params.ext_ramdisk_size = (initrd_size as u64 >> 32) as u32;
552     }
553 
554     if e820_entries.len() >= params.e820_table.len() {
555         return Err(Error::E820Configuration);
556     }
557 
558     for (src, dst) in e820_entries.iter().zip(params.e820_table.iter_mut()) {
559         dst.addr = src.address.offset();
560         dst.size = src.len;
561         dst.type_ = src.mem_type as u32;
562     }
563     params.e820_entries = e820_entries.len() as u8;
564 
565     let zero_page_addr = GuestAddress(ZERO_PAGE_OFFSET);
566     if !guest_mem.is_valid_range(zero_page_addr, mem::size_of::<boot_params>() as u64) {
567         return Err(Error::ZeroPagePastRamEnd);
568     }
569 
570     guest_mem
571         .write_obj_at_addr(params, zero_page_addr)
572         .map_err(|_| Error::ZeroPageSetup)?;
573 
574     Ok(())
575 }
576 
configure_multiboot_info( guest_mem: &GuestMemory, cmdline_addr: GuestAddress, e820_entries: &[E820Entry], ) -> Result<()>577 fn configure_multiboot_info(
578     guest_mem: &GuestMemory,
579     cmdline_addr: GuestAddress,
580     e820_entries: &[E820Entry],
581 ) -> Result<()> {
582     let mut multiboot_info = MultibootInfo {
583         ..Default::default()
584     };
585 
586     // Extra Multiboot-related data is added directly after the info structure.
587     let mut multiboot_data_addr =
588         GuestAddress(MULTIBOOT_INFO_OFFSET + mem::size_of_val(&multiboot_info) as u64);
589     multiboot_data_addr = multiboot_data_addr
590         .align(16)
591         .ok_or(Error::MultibootInfoSetup)?;
592 
593     // mem_lower is the amount of RAM below 1 MB, in units of KiB.
594     let mem_lower = guest_mem
595         .regions()
596         .filter(|r| {
597             r.options.purpose == MemoryRegionPurpose::GuestMemoryRegion
598                 && r.guest_addr.offset() < 1 * MB
599         })
600         .map(|r| r.size as u64)
601         .sum::<u64>()
602         / KB;
603 
604     // mem_upper is the amount of RAM above 1 MB up to the first memory hole, in units of KiB.
605     // We don't have the ISA 15-16 MB hole, so this includes all RAM from 1 MB up to the
606     // beginning of the PCI hole just below 4 GB.
607     let mem_upper = guest_mem
608         .regions()
609         .filter(|r| {
610             r.options.purpose == MemoryRegionPurpose::GuestMemoryRegion
611                 && r.guest_addr.offset() >= 1 * MB
612                 && r.guest_addr.offset() < 4 * GB
613         })
614         .map(|r| r.size as u64)
615         .sum::<u64>()
616         / KB;
617 
618     multiboot_info.mem_lower = mem_lower as u32;
619     multiboot_info.mem_upper = mem_upper as u32;
620     multiboot_info.flags |= MultibootInfo::F_MEM;
621 
622     // Memory map - convert from params.e820_table to Multiboot format.
623     let multiboot_mmap: Vec<MultibootMmapEntry> = e820_entries
624         .iter()
625         .map(|e820_entry| MultibootMmapEntry {
626             size: 20, // size of the entry, not including the size field itself
627             base_addr: e820_entry.address.offset(),
628             length: e820_entry.len,
629             type_: e820_entry.mem_type as u32,
630         })
631         .collect();
632     let multiboot_mmap_bytes = multiboot_mmap.as_bytes();
633     let multiboot_mmap_addr =
634         append_multiboot_info(guest_mem, &mut multiboot_data_addr, multiboot_mmap_bytes)?;
635     multiboot_info.mmap_addr = multiboot_mmap_addr.offset() as u32;
636     multiboot_info.mmap_length = multiboot_mmap_bytes.len() as u32;
637     multiboot_info.flags |= MultibootInfo::F_MMAP;
638 
639     // Command line
640     multiboot_info.cmdline = cmdline_addr.offset() as u32;
641     multiboot_info.flags |= MultibootInfo::F_CMDLINE;
642 
643     // Boot loader name
644     let boot_loader_name_addr =
645         append_multiboot_info(guest_mem, &mut multiboot_data_addr, b"crosvm\0")?;
646     multiboot_info.boot_loader_name = boot_loader_name_addr.offset() as u32;
647     multiboot_info.flags |= MultibootInfo::F_BOOT_LOADER_NAME;
648 
649     guest_mem
650         .write_obj_at_addr(multiboot_info, GuestAddress(MULTIBOOT_INFO_OFFSET))
651         .map_err(|_| Error::MultibootInfoSetup)?;
652 
653     Ok(())
654 }
655 
append_multiboot_info( guest_mem: &GuestMemory, addr: &mut GuestAddress, data: &[u8], ) -> Result<GuestAddress>656 fn append_multiboot_info(
657     guest_mem: &GuestMemory,
658     addr: &mut GuestAddress,
659     data: &[u8],
660 ) -> Result<GuestAddress> {
661     let data_addr = *addr;
662     let new_addr = addr
663         .checked_add(data.len() as u64)
664         .and_then(|a| a.align(16))
665         .ok_or(Error::MultibootInfoSetup)?;
666 
667     // Make sure we don't write beyond the region reserved for Multiboot info.
668     if new_addr.offset() - MULTIBOOT_INFO_OFFSET > MULTIBOOT_INFO_SIZE {
669         return Err(Error::MultibootInfoSetup);
670     }
671 
672     guest_mem
673         .write_all_at_addr(data, data_addr)
674         .map_err(|_| Error::MultibootInfoSetup)?;
675 
676     *addr = new_addr;
677     Ok(data_addr)
678 }
679 
680 /// Write setup_data entries in guest memory and link them together with the `next` field.
681 ///
682 /// Returns the guest address of the first entry in the setup_data list, if any.
write_setup_data( guest_mem: &GuestMemory, setup_data_start: GuestAddress, setup_data_end: GuestAddress, setup_data: &[SetupData], ) -> Result<Option<GuestAddress>>683 fn write_setup_data(
684     guest_mem: &GuestMemory,
685     setup_data_start: GuestAddress,
686     setup_data_end: GuestAddress,
687     setup_data: &[SetupData],
688 ) -> Result<Option<GuestAddress>> {
689     let mut setup_data_list_head = None;
690 
691     // Place the first setup_data at the first 64-bit aligned offset following setup_data_start.
692     let mut setup_data_addr = setup_data_start.align(8).ok_or(Error::SetupDataTooLarge)?;
693 
694     let mut entry_iter = setup_data.iter().peekable();
695     while let Some(entry) = entry_iter.next() {
696         if setup_data_list_head.is_none() {
697             setup_data_list_head = Some(setup_data_addr);
698         }
699 
700         // Ensure the entry (header plus data) fits into guest memory.
701         let entry_size = (mem::size_of::<setup_data_hdr>() + entry.data.len()) as u64;
702         let entry_end = setup_data_addr
703             .checked_add(entry_size)
704             .ok_or(Error::SetupDataTooLarge)?;
705 
706         if entry_end >= setup_data_end {
707             return Err(Error::SetupDataTooLarge);
708         }
709 
710         let next_setup_data_addr = if entry_iter.peek().is_some() {
711             // Place the next setup_data at a 64-bit aligned address.
712             setup_data_addr
713                 .checked_add(entry_size)
714                 .and_then(|addr| addr.align(8))
715                 .ok_or(Error::SetupDataTooLarge)?
716         } else {
717             // This is the final entry. Terminate the list with next == 0.
718             GuestAddress(0)
719         };
720 
721         let hdr = setup_data_hdr {
722             next: next_setup_data_addr.offset(),
723             type_: entry.type_ as u32,
724             len: entry
725                 .data
726                 .len()
727                 .try_into()
728                 .map_err(|_| Error::SetupDataTooLarge)?,
729         };
730 
731         guest_mem
732             .write_obj_at_addr(hdr, setup_data_addr)
733             .map_err(Error::WritingSetupData)?;
734         guest_mem
735             .write_all_at_addr(
736                 &entry.data,
737                 setup_data_addr.unchecked_add(mem::size_of::<setup_data_hdr>() as u64),
738             )
739             .map_err(Error::WritingSetupData)?;
740 
741         setup_data_addr = next_setup_data_addr;
742     }
743 
744     Ok(setup_data_list_head)
745 }
746 
747 /// Generate a SETUP_RNG_SEED SetupData with random seed data.
setup_data_rng_seed() -> SetupData748 fn setup_data_rng_seed() -> SetupData {
749     let mut data = vec![0u8; 256];
750     OsRng.fill_bytes(&mut data);
751     SetupData {
752         data,
753         type_: SetupDataType::RngSeed,
754     }
755 }
756 
757 /// Add an e820 region to the e820 map.
add_e820_entry( e820_entries: &mut Vec<E820Entry>, range: AddressRange, mem_type: E820Type, ) -> Result<()>758 fn add_e820_entry(
759     e820_entries: &mut Vec<E820Entry>,
760     range: AddressRange,
761     mem_type: E820Type,
762 ) -> Result<()> {
763     e820_entries.push(E820Entry {
764         address: GuestAddress(range.start),
765         len: range.len().ok_or(Error::E820Configuration)?,
766         mem_type,
767     });
768 
769     Ok(())
770 }
771 
772 /// Generate a memory map in INT 0x15 AX=0xE820 format.
generate_e820_memory_map( arch_memory_layout: &ArchMemoryLayout, guest_mem: &GuestMemory, ) -> Result<Vec<E820Entry>>773 fn generate_e820_memory_map(
774     arch_memory_layout: &ArchMemoryLayout,
775     guest_mem: &GuestMemory,
776 ) -> Result<Vec<E820Entry>> {
777     let mut e820_entries = Vec::new();
778 
779     for r in guest_mem.regions() {
780         let range = AddressRange::from_start_and_size(r.guest_addr.offset(), r.size as u64)
781             .expect("invalid guest mem region");
782         let mem_type = match r.options.purpose {
783             MemoryRegionPurpose::Bios => E820Type::Reserved,
784             MemoryRegionPurpose::GuestMemoryRegion => E820Type::Ram,
785             // After the pVM firmware jumped to the guest, the pVM firmware itself is no longer
786             // running, so its memory is reusable by the guest OS. So add this memory as RAM rather
787             // than Reserved.
788             MemoryRegionPurpose::ProtectedFirmwareRegion => E820Type::Ram,
789             MemoryRegionPurpose::ReservedMemory => E820Type::Reserved,
790         };
791         add_e820_entry(&mut e820_entries, range, mem_type)?;
792     }
793 
794     let pcie_cfg_mmio_range = arch_memory_layout.pcie_cfg_mmio;
795     add_e820_entry(&mut e820_entries, pcie_cfg_mmio_range, E820Type::Reserved)?;
796 
797     add_e820_entry(
798         &mut e820_entries,
799         X8664arch::get_pcie_vcfg_mmio_range(guest_mem, &pcie_cfg_mmio_range),
800         E820Type::Reserved,
801     )?;
802 
803     // Reserve memory section for Identity map and TSS
804     add_e820_entry(
805         &mut e820_entries,
806         AddressRange {
807             start: identity_map_addr_start().offset(),
808             end: tss_addr_end().offset() - 1,
809         },
810         E820Type::Reserved,
811     )?;
812 
813     Ok(e820_entries)
814 }
815 
816 /// Returns a Vec of the valid memory addresses.
817 /// These should be used to configure the GuestMemory structure for the platform.
818 /// For x86_64 all addresses are valid from the start of the kernel except a
819 /// carve out at the end of 32bit address space.
arch_memory_regions( arch_memory_layout: &ArchMemoryLayout, mem_size: u64, bios_size: Option<u64>, ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)>820 pub fn arch_memory_regions(
821     arch_memory_layout: &ArchMemoryLayout,
822     mem_size: u64,
823     bios_size: Option<u64>,
824 ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)> {
825     let mut regions = Vec::new();
826 
827     // Some guest kernels expect a typical PC memory layout where the region between 640 KB and
828     // 1 MB is reserved for device memory/ROMs and get confused if there is a RAM region
829     // spanning this area, so we provide the traditional 640 KB low memory and 1 MB+
830     // high memory regions.
831     let mem_below_1m = 640 * KB;
832     regions.push((
833         GuestAddress(0),
834         mem_below_1m,
835         MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
836     ));
837 
838     // Reserved/BIOS data area between 640 KB and 1 MB.
839     // This needs to be backed by an actual GuestMemory region so we can write BIOS tables here, but
840     // it should be reported as "reserved" in the e820 memory map to match PC architecture
841     // expectations.
842     regions.push((
843         GuestAddress(640 * KB),
844         (1 * MB) - (640 * KB),
845         MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ReservedMemory),
846     ));
847 
848     // RAM between 1 MB and 4 GB
849     let mem_1m_to_4g = arch_memory_layout.pci_mmio_before_32bit.start.min(mem_size) - 1 * MB;
850     regions.push((
851         GuestAddress(1 * MB),
852         mem_1m_to_4g,
853         MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
854     ));
855 
856     // RAM above 4 GB
857     let mem_above_4g = mem_size.saturating_sub(1 * MB + mem_1m_to_4g);
858     if mem_above_4g > 0 {
859         regions.push((
860             GuestAddress(FIRST_ADDR_PAST_32BITS),
861             mem_above_4g,
862             MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
863         ));
864     }
865 
866     if let Some(bios_size) = bios_size {
867         regions.push((
868             bios_start(bios_size),
869             bios_size,
870             MemoryRegionOptions::new().purpose(MemoryRegionPurpose::Bios),
871         ));
872     }
873 
874     if let Some(pvmfw_mem) = arch_memory_layout.pvmfw_mem {
875         // Remove any areas of guest memory regions that overlap the pVM firmware range.
876         while let Some(overlapping_region_index) = regions.iter().position(|(addr, size, _opts)| {
877             let region_addr_range = AddressRange::from_start_and_size(addr.offset(), *size)
878                 .expect("invalid GuestMemory range");
879             region_addr_range.overlaps(pvmfw_mem)
880         }) {
881             let overlapping_region = regions.swap_remove(overlapping_region_index);
882             let overlapping_region_range = AddressRange::from_start_and_size(
883                 overlapping_region.0.offset(),
884                 overlapping_region.1,
885             )
886             .unwrap();
887             let (first, second) = overlapping_region_range.non_overlapping_ranges(pvmfw_mem);
888             if !first.is_empty() {
889                 regions.push((
890                     GuestAddress(first.start),
891                     first.len().unwrap(),
892                     overlapping_region.2.clone(),
893                 ));
894             }
895             if !second.is_empty() {
896                 regions.push((
897                     GuestAddress(second.start),
898                     second.len().unwrap(),
899                     overlapping_region.2,
900                 ));
901             }
902         }
903 
904         // Insert a region for the pVM firmware area.
905         regions.push((
906             GuestAddress(pvmfw_mem.start),
907             pvmfw_mem.len().expect("invalid pvmfw region"),
908             MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
909         ));
910     }
911 
912     regions.sort_unstable_by_key(|(addr, _, _)| *addr);
913 
914     for (addr, size, options) in &regions {
915         debug!(
916             "{:#018x}-{:#018x} {:?}",
917             addr.offset(),
918             addr.offset() + size - 1,
919             options.purpose,
920         );
921     }
922 
923     regions
924 }
925 
926 impl arch::LinuxArch for X8664arch {
927     type Error = Error;
928     type ArchMemoryLayout = ArchMemoryLayout;
929 
arch_memory_layout( components: &VmComponents, ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>930     fn arch_memory_layout(
931         components: &VmComponents,
932     ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
933         create_arch_memory_layout(
934             &components.pci_config,
935             components.hv_cfg.protection_type.runs_firmware(),
936         )
937     }
938 
guest_memory_layout( components: &VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, _hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>939     fn guest_memory_layout(
940         components: &VmComponents,
941         arch_memory_layout: &Self::ArchMemoryLayout,
942         _hypervisor: &impl Hypervisor,
943     ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
944         let bios_size = match &components.vm_image {
945             VmImage::Bios(bios_file) => Some(bios_file.metadata().map_err(Error::LoadBios)?.len()),
946             VmImage::Kernel(_) => None,
947         };
948 
949         Ok(arch_memory_regions(
950             arch_memory_layout,
951             components.memory_size,
952             bios_size,
953         ))
954     }
955 
get_system_allocator_config<V: Vm>( vm: &V, arch_memory_layout: &Self::ArchMemoryLayout, ) -> SystemAllocatorConfig956     fn get_system_allocator_config<V: Vm>(
957         vm: &V,
958         arch_memory_layout: &Self::ArchMemoryLayout,
959     ) -> SystemAllocatorConfig {
960         SystemAllocatorConfig {
961             io: Some(AddressRange {
962                 start: 0xc000,
963                 end: 0xffff,
964             }),
965             low_mmio: arch_memory_layout.pci_mmio_before_32bit,
966             high_mmio: Self::get_high_mmio_range(vm, arch_memory_layout),
967             platform_mmio: None,
968             first_irq: X86_64_IRQ_BASE,
969         }
970     }
971 
build_vm<V, Vcpu>( mut components: VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipX86_64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, debugcon_jail: Option<Minijail>, pflash_jail: Option<Minijail>, fw_cfg_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, device_tree_overlays: Vec<DtbOverlay>, _fdt_position: Option<FdtPosition>, _no_pmu: bool, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmX86_64, Vcpu: VcpuX86_64,972     fn build_vm<V, Vcpu>(
973         mut components: VmComponents,
974         arch_memory_layout: &Self::ArchMemoryLayout,
975         vm_evt_wrtube: &SendTube,
976         system_allocator: &mut SystemAllocator,
977         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
978         serial_jail: Option<Minijail>,
979         battery: (Option<BatteryType>, Option<Minijail>),
980         mut vm: V,
981         ramoops_region: Option<arch::pstore::RamoopsRegion>,
982         devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
983         irq_chip: &mut dyn IrqChipX86_64,
984         vcpu_ids: &mut Vec<usize>,
985         dump_device_tree_blob: Option<PathBuf>,
986         debugcon_jail: Option<Minijail>,
987         pflash_jail: Option<Minijail>,
988         fw_cfg_jail: Option<Minijail>,
989         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
990         guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
991         device_tree_overlays: Vec<DtbOverlay>,
992         _fdt_position: Option<FdtPosition>,
993         _no_pmu: bool,
994     ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
995     where
996         V: VmX86_64,
997         Vcpu: VcpuX86_64,
998     {
999         let mem = vm.get_memory().clone();
1000 
1001         let vcpu_count = components.vcpu_count;
1002 
1003         vm.set_identity_map_addr(identity_map_addr_start())
1004             .map_err(Error::SetIdentityMapAddr)?;
1005 
1006         vm.set_tss_addr(tss_addr_start())
1007             .map_err(Error::SetTssAddr)?;
1008 
1009         // Use IRQ info in ACPI if provided by the user.
1010         let mut mptable = true;
1011         let mut sci_irq = X86_64_SCI_IRQ;
1012 
1013         // punch pcie config mmio from pci low mmio, so that it couldn't be
1014         // allocated to any device.
1015         let pcie_cfg_mmio_range = arch_memory_layout.pcie_cfg_mmio;
1016         system_allocator
1017             .reserve_mmio(pcie_cfg_mmio_range)
1018             .map_err(Error::ReservePcieCfgMmio)?;
1019 
1020         for sdt in components.acpi_sdts.iter() {
1021             if sdt.is_signature(b"FACP") {
1022                 mptable = false;
1023                 let sci_irq_fadt: u16 = sdt.read(acpi::FADT_FIELD_SCI_INTERRUPT);
1024                 sci_irq = sci_irq_fadt.into();
1025                 if !system_allocator.reserve_irq(sci_irq) {
1026                     warn!("sci irq {} already reserved.", sci_irq);
1027                 }
1028             }
1029         }
1030 
1031         let pcie_vcfg_range = Self::get_pcie_vcfg_mmio_range(&mem, &pcie_cfg_mmio_range);
1032         let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
1033         let io_bus = Arc::new(Bus::new(BusType::Io));
1034 
1035         let (pci_devices, _devs): (Vec<_>, Vec<_>) = devs
1036             .into_iter()
1037             .partition(|(dev, _)| dev.as_pci_device().is_some());
1038 
1039         let pci_devices = pci_devices
1040             .into_iter()
1041             .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
1042             .collect();
1043 
1044         let (pci, pci_irqs, pid_debug_label_map, amls, gpe_scope_amls) = arch::generate_pci_root(
1045             pci_devices,
1046             irq_chip.as_irq_chip_mut(),
1047             mmio_bus.clone(),
1048             GuestAddress(pcie_cfg_mmio_range.start),
1049             12,
1050             io_bus.clone(),
1051             system_allocator,
1052             &mut vm,
1053             4, // Share the four pin interrupts (INTx#)
1054             Some(pcie_vcfg_range.start),
1055             #[cfg(feature = "swap")]
1056             swap_controller,
1057         )
1058         .map_err(Error::CreatePciRoot)?;
1059 
1060         let pci = Arc::new(Mutex::new(pci));
1061         pci.lock().enable_pcie_cfg_mmio(pcie_cfg_mmio_range.start);
1062         let pci_cfg = PciConfigIo::new(
1063             pci.clone(),
1064             components.break_linux_pci_config_io,
1065             vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1066         );
1067         let pci_bus = Arc::new(Mutex::new(pci_cfg));
1068         io_bus.insert(pci_bus, 0xcf8, 0x8).unwrap();
1069 
1070         let pcie_cfg_mmio = Arc::new(Mutex::new(PciConfigMmio::new(pci.clone(), 12)));
1071         let pcie_cfg_mmio_len = pcie_cfg_mmio_range.len().unwrap();
1072         mmio_bus
1073             .insert(pcie_cfg_mmio, pcie_cfg_mmio_range.start, pcie_cfg_mmio_len)
1074             .unwrap();
1075 
1076         let pcie_vcfg_mmio = Arc::new(Mutex::new(PciVirtualConfigMmio::new(pci.clone(), 13)));
1077         mmio_bus
1078             .insert(
1079                 pcie_vcfg_mmio,
1080                 pcie_vcfg_range.start,
1081                 pcie_vcfg_range.len().unwrap(),
1082             )
1083             .unwrap();
1084 
1085         // Event used to notify crosvm that guest OS is trying to suspend.
1086         let (suspend_tube_send, suspend_tube_recv) =
1087             Tube::directional_pair().map_err(Error::CreateTube)?;
1088         let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
1089 
1090         if components.fw_cfg_enable {
1091             Self::setup_fw_cfg_device(
1092                 &io_bus,
1093                 components.fw_cfg_parameters.clone(),
1094                 components.bootorder_fw_cfg_blob.clone(),
1095                 fw_cfg_jail,
1096                 #[cfg(feature = "swap")]
1097                 swap_controller,
1098             )?;
1099         }
1100 
1101         if !components.no_i8042 {
1102             Self::setup_legacy_i8042_device(
1103                 &io_bus,
1104                 irq_chip.pit_uses_speaker_port(),
1105                 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1106             )?;
1107         }
1108         let mut vm_request_tube = if !components.no_rtc {
1109             let (host_tube, device_tube) = Tube::pair()
1110                 .context("create tube")
1111                 .map_err(Error::SetupCmos)?;
1112             Self::setup_legacy_cmos_device(
1113                 arch_memory_layout,
1114                 &io_bus,
1115                 irq_chip,
1116                 device_tube,
1117                 components.memory_size,
1118             )
1119             .map_err(Error::SetupCmos)?;
1120             Some(host_tube)
1121         } else {
1122             None
1123         };
1124         let serial_devices = Self::setup_serial_devices(
1125             components.hv_cfg.protection_type,
1126             irq_chip.as_irq_chip_mut(),
1127             &io_bus,
1128             serial_parameters,
1129             serial_jail,
1130             #[cfg(feature = "swap")]
1131             swap_controller,
1132         )?;
1133         Self::setup_debugcon_devices(
1134             components.hv_cfg.protection_type,
1135             &io_bus,
1136             serial_parameters,
1137             debugcon_jail,
1138             #[cfg(feature = "swap")]
1139             swap_controller,
1140         )?;
1141 
1142         let bios_size = if let VmImage::Bios(ref bios) = components.vm_image {
1143             bios.metadata().map_err(Error::LoadBios)?.len()
1144         } else {
1145             0
1146         };
1147         if let Some(pflash_image) = components.pflash_image {
1148             Self::setup_pflash(
1149                 pflash_image,
1150                 components.pflash_block_size,
1151                 bios_size,
1152                 &mmio_bus,
1153                 pflash_jail,
1154                 #[cfg(feature = "swap")]
1155                 swap_controller,
1156             )?;
1157         }
1158 
1159         // Functions that use/create jails MUST be used before the call to
1160         // setup_acpi_devices below, as this move us into a multiprocessing state
1161         // from which we can no longer fork.
1162 
1163         let mut resume_notify_devices = Vec::new();
1164 
1165         // each bus occupy 1MB mmio for pcie enhanced configuration
1166         let max_bus = (pcie_cfg_mmio_len / 0x100000 - 1) as u8;
1167         let (mut acpi_dev_resource, bat_control) = Self::setup_acpi_devices(
1168             arch_memory_layout,
1169             pci.clone(),
1170             &mem,
1171             &io_bus,
1172             system_allocator,
1173             suspend_tube_send.clone(),
1174             vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1175             components.acpi_sdts,
1176             irq_chip.as_irq_chip_mut(),
1177             sci_irq,
1178             battery,
1179             &mmio_bus,
1180             max_bus,
1181             &mut resume_notify_devices,
1182             #[cfg(feature = "swap")]
1183             swap_controller,
1184             #[cfg(any(target_os = "android", target_os = "linux"))]
1185             components.ac_adapter,
1186             guest_suspended_cvar,
1187             &pci_irqs,
1188         )?;
1189 
1190         // Create customized SSDT table
1191         let sdt = acpi::create_customize_ssdt(pci.clone(), amls, gpe_scope_amls);
1192         if let Some(sdt) = sdt {
1193             acpi_dev_resource.sdts.push(sdt);
1194         }
1195 
1196         irq_chip
1197             .finalize_devices(system_allocator, &io_bus, &mmio_bus)
1198             .map_err(Error::RegisterIrqfd)?;
1199 
1200         // All of these bios generated tables are set manually for the benefit of the kernel boot
1201         // flow (since there's no BIOS to set it) and for the BIOS boot flow since crosvm doesn't
1202         // have a way to pass the BIOS these configs.
1203         // This works right now because the only guest BIOS used with crosvm (u-boot) ignores these
1204         // tables and the guest OS picks them up.
1205         // If another guest does need a way to pass these tables down to it's BIOS, this approach
1206         // should be rethought.
1207 
1208         // Make sure the `vcpu_count` casts below and the arithmetic in `setup_mptable` are well
1209         // defined.
1210         if vcpu_count >= u8::MAX.into() {
1211             return Err(Error::TooManyVcpus);
1212         }
1213 
1214         if mptable {
1215             // Note that this puts the mptable at 0x9FC00 in guest physical memory.
1216             mptable::setup_mptable(&mem, vcpu_count as u8, &pci_irqs)
1217                 .map_err(Error::SetupMptable)?;
1218         }
1219         smbios::setup_smbios(&mem, &components.smbios, bios_size).map_err(Error::SetupSmbios)?;
1220 
1221         let host_cpus = if components.host_cpu_topology {
1222             components.vcpu_affinity.clone()
1223         } else {
1224             None
1225         };
1226 
1227         // TODO (tjeznach) Write RSDP to bootconfig before writing to memory
1228         acpi::create_acpi_tables(
1229             &mem,
1230             vcpu_count as u8,
1231             sci_irq,
1232             0xcf9,
1233             6, // RST_CPU|SYS_RST
1234             &acpi_dev_resource,
1235             host_cpus,
1236             vcpu_ids,
1237             &pci_irqs,
1238             pcie_cfg_mmio_range.start,
1239             max_bus,
1240             components.force_s2idle,
1241         )
1242         .ok_or(Error::CreateAcpi)?;
1243 
1244         let mut cmdline = Self::get_base_linux_cmdline();
1245 
1246         get_serial_cmdline(&mut cmdline, serial_parameters, "io", &serial_devices)
1247             .map_err(Error::GetSerialCmdline)?;
1248 
1249         for param in components.extra_kernel_params {
1250             cmdline.insert_str(&param).map_err(Error::Cmdline)?;
1251         }
1252 
1253         if let Some(ramoops_region) = ramoops_region {
1254             arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
1255                 .map_err(Error::Cmdline)?;
1256         }
1257 
1258         let pci_start = arch_memory_layout.pci_mmio_before_32bit.start;
1259 
1260         let mut vcpu_init = vec![VcpuInitX86_64::default(); vcpu_count];
1261         let mut msrs = BTreeMap::new();
1262 
1263         let protection_type = components.hv_cfg.protection_type;
1264 
1265         match components.vm_image {
1266             VmImage::Bios(ref mut bios) => {
1267                 if protection_type.runs_firmware() {
1268                     return Err(Error::PvmFwBiosUnsupported);
1269                 }
1270 
1271                 // Allow a bios to hardcode CMDLINE_OFFSET and read the kernel command line from it.
1272                 Self::load_cmdline(
1273                     &mem,
1274                     GuestAddress(CMDLINE_OFFSET),
1275                     cmdline,
1276                     CMDLINE_MAX_SIZE as usize - 1,
1277                 )?;
1278                 Self::load_bios(&mem, bios)?;
1279                 regs::set_default_msrs(&mut msrs);
1280                 // The default values for `Regs` and `Sregs` already set up the reset vector.
1281             }
1282             VmImage::Kernel(ref mut kernel_image) => {
1283                 let (params, kernel_region, kernel_entry, cpu_mode, kernel_type) =
1284                     Self::load_kernel(&mem, kernel_image)?;
1285 
1286                 info!("Loaded {} kernel", kernel_type);
1287 
1288                 Self::setup_system_memory(
1289                     arch_memory_layout,
1290                     &mem,
1291                     cmdline,
1292                     components.initrd_image,
1293                     components.android_fstab,
1294                     kernel_region,
1295                     params,
1296                     dump_device_tree_blob,
1297                     device_tree_overlays,
1298                     protection_type,
1299                 )?;
1300 
1301                 if protection_type.needs_firmware_loaded() {
1302                     arch::load_image(
1303                         &mem,
1304                         &mut components
1305                             .pvm_fw
1306                             .expect("pvmfw must be available if ProtectionType loads it"),
1307                         GuestAddress(PROTECTED_VM_FW_START),
1308                         PROTECTED_VM_FW_MAX_SIZE,
1309                     )
1310                     .map_err(Error::LoadCustomPvmFw)?;
1311                 } else if protection_type.runs_firmware() {
1312                     // Tell the hypervisor to load the pVM firmware.
1313                     vm.load_protected_vm_firmware(
1314                         GuestAddress(PROTECTED_VM_FW_START),
1315                         PROTECTED_VM_FW_MAX_SIZE,
1316                     )
1317                     .map_err(Error::LoadPvmFw)?;
1318                 }
1319 
1320                 let entry_addr = if protection_type.needs_firmware_loaded() {
1321                     Some(PROTECTED_VM_FW_START)
1322                 } else if protection_type.runs_firmware() {
1323                     None // Initial RIP value is set by the hypervisor
1324                 } else {
1325                     Some(kernel_entry.offset())
1326                 };
1327 
1328                 if let Some(entry) = entry_addr {
1329                     vcpu_init[0].regs.rip = entry;
1330                 }
1331 
1332                 match kernel_type {
1333                     KernelType::BzImage | KernelType::Elf => {
1334                         // Configure the bootstrap VCPU for the Linux/x86 boot protocol.
1335                         // <https://www.kernel.org/doc/html/latest/x86/boot.html>
1336                         vcpu_init[0].regs.rsp = BOOT_STACK_POINTER;
1337                         vcpu_init[0].regs.rsi = ZERO_PAGE_OFFSET;
1338                     }
1339                     KernelType::Multiboot => {
1340                         // Provide Multiboot-compatible bootloader information.
1341                         vcpu_init[0].regs.rax = MULTIBOOT_BOOTLOADER_MAGIC.into();
1342                         vcpu_init[0].regs.rbx = MULTIBOOT_INFO_OFFSET;
1343                     }
1344                 }
1345 
1346                 if protection_type.runs_firmware() {
1347                     // Pass pVM payload entry address to pVM firmware.
1348                     // NOTE: this is only for development purposes. An actual pvmfw
1349                     // implementation should not use this value and should instead receive
1350                     // the pVM payload start and size info from crosvm as the DTB properties
1351                     // /config/kernel-address and /config/kernel-size and determine the offset
1352                     // of the entry point on its own, not trust crosvm to provide it.
1353                     vcpu_init[0].regs.rdi = kernel_entry.offset();
1354                 }
1355 
1356                 match cpu_mode {
1357                     CpuMode::LongMode => {
1358                         regs::set_long_mode_msrs(&mut msrs);
1359 
1360                         // Set up long mode and enable paging.
1361                         regs::configure_segments_and_sregs(&mem, &mut vcpu_init[0].sregs)
1362                             .map_err(Error::ConfigureSegments)?;
1363                         regs::setup_page_tables(&mem, &mut vcpu_init[0].sregs)
1364                             .map_err(Error::SetupPageTables)?;
1365                     }
1366                     CpuMode::FlatProtectedMode => {
1367                         regs::set_default_msrs(&mut msrs);
1368 
1369                         // Set up 32-bit protected mode with paging disabled.
1370                         regs::configure_segments_and_sregs_flat32(&mem, &mut vcpu_init[0].sregs)
1371                             .map_err(Error::ConfigureSegments)?;
1372                     }
1373                 }
1374 
1375                 regs::set_mtrr_msrs(&mut msrs, &vm, pci_start);
1376             }
1377         }
1378 
1379         // Initialize MSRs for all VCPUs.
1380         for vcpu in vcpu_init.iter_mut() {
1381             vcpu.msrs = msrs.clone();
1382         }
1383 
1384         let mut vm_request_tubes = Vec::new();
1385         if let Some(req_tube) = vm_request_tube.take() {
1386             vm_request_tubes.push(req_tube);
1387         }
1388 
1389         Ok(RunnableLinuxVm {
1390             vm,
1391             vcpu_count,
1392             vcpus: None,
1393             vcpu_affinity: components.vcpu_affinity,
1394             vcpu_init,
1395             no_smt: components.no_smt,
1396             irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1397             io_bus,
1398             mmio_bus,
1399             pid_debug_label_map,
1400             suspend_tube: (suspend_tube_send, suspend_tube_recv),
1401             resume_notify_devices,
1402             rt_cpus: components.rt_cpus,
1403             delay_rt: components.delay_rt,
1404             bat_control,
1405             pm: Some(acpi_dev_resource.pm),
1406             root_config: pci,
1407             #[cfg(any(target_os = "android", target_os = "linux"))]
1408             platform_devices: Vec::new(),
1409             hotplug_bus: BTreeMap::new(),
1410             devices_thread: None,
1411             vm_request_tubes,
1412         })
1413     }
1414 
configure_vcpu<V: Vm>( vm: &V, hypervisor: &dyn HypervisorX86_64, irq_chip: &mut dyn IrqChipX86_64, vcpu: &mut dyn VcpuX86_64, vcpu_init: VcpuInitX86_64, vcpu_id: usize, num_cpus: usize, cpu_config: Option<CpuConfigX86_64>, ) -> Result<()>1415     fn configure_vcpu<V: Vm>(
1416         vm: &V,
1417         hypervisor: &dyn HypervisorX86_64,
1418         irq_chip: &mut dyn IrqChipX86_64,
1419         vcpu: &mut dyn VcpuX86_64,
1420         vcpu_init: VcpuInitX86_64,
1421         vcpu_id: usize,
1422         num_cpus: usize,
1423         cpu_config: Option<CpuConfigX86_64>,
1424     ) -> Result<()> {
1425         let cpu_config = match cpu_config {
1426             Some(config) => config,
1427             None => return Err(Error::InvalidCpuConfig),
1428         };
1429         if !vm.check_capability(VmCap::EarlyInitCpuid) {
1430             cpuid::setup_cpuid(hypervisor, irq_chip, vcpu, vcpu_id, num_cpus, cpu_config)
1431                 .map_err(Error::SetupCpuid)?;
1432         }
1433 
1434         vcpu.set_regs(&vcpu_init.regs).map_err(Error::WriteRegs)?;
1435 
1436         vcpu.set_sregs(&vcpu_init.sregs)
1437             .map_err(Error::SetupSregs)?;
1438 
1439         vcpu.set_fpu(&vcpu_init.fpu).map_err(Error::SetupFpu)?;
1440 
1441         let vcpu_supported_var_mtrrs = regs::vcpu_supported_variable_mtrrs(vcpu);
1442         let num_var_mtrrs = regs::count_variable_mtrrs(&vcpu_init.msrs);
1443         let skip_mtrr_msrs = if num_var_mtrrs > vcpu_supported_var_mtrrs {
1444             warn!(
1445                 "Too many variable MTRR entries ({} required, {} supported),
1446                 please check pci_start addr, guest with pass through device may be very slow",
1447                 num_var_mtrrs, vcpu_supported_var_mtrrs,
1448             );
1449             // Filter out the MTRR entries from the MSR list.
1450             true
1451         } else {
1452             false
1453         };
1454 
1455         for (msr_index, value) in vcpu_init.msrs.into_iter() {
1456             if skip_mtrr_msrs && regs::is_mtrr_msr(msr_index) {
1457                 continue;
1458             }
1459 
1460             vcpu.set_msr(msr_index, value).map_err(Error::SetupMsrs)?;
1461         }
1462 
1463         interrupts::set_lint(vcpu_id, irq_chip).map_err(Error::SetLint)?;
1464 
1465         Ok(())
1466     }
1467 
register_pci_device<V: VmX86_64, Vcpu: VcpuX86_64>( linux: &mut RunnableLinuxVm<V, Vcpu>, device: Box<dyn PciDevice>, #[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>, resources: &mut SystemAllocator, hp_control_tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<PciAddress>1468     fn register_pci_device<V: VmX86_64, Vcpu: VcpuX86_64>(
1469         linux: &mut RunnableLinuxVm<V, Vcpu>,
1470         device: Box<dyn PciDevice>,
1471         #[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>,
1472         resources: &mut SystemAllocator,
1473         hp_control_tube: &mpsc::Sender<PciRootCommand>,
1474         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1475     ) -> Result<PciAddress> {
1476         arch::configure_pci_device(
1477             linux,
1478             device,
1479             #[cfg(any(target_os = "android", target_os = "linux"))]
1480             minijail,
1481             resources,
1482             hp_control_tube,
1483             #[cfg(feature = "swap")]
1484             swap_controller,
1485         )
1486         .map_err(Error::ConfigurePciDevice)
1487     }
1488 
get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>>1489     fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
1490         Ok(BTreeMap::new())
1491     }
1492 
get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>>1493     fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
1494         Ok(BTreeMap::new())
1495     }
1496 
get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>>1497     fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
1498         Ok(BTreeMap::new())
1499     }
1500 
get_host_cpu_clusters() -> Result<Vec<CpuSet>>1501     fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
1502         Ok(Vec::new())
1503     }
1504 }
1505 
1506 // OSC returned status register in CDW1
1507 const OSC_STATUS_UNSUPPORT_UUID: u32 = 0x4;
1508 // pci host bridge OSC returned control register in CDW3
1509 #[allow(dead_code)]
1510 const PCI_HB_OSC_CONTROL_PCIE_HP: u32 = 0x1;
1511 const PCI_HB_OSC_CONTROL_SHPC_HP: u32 = 0x2;
1512 #[allow(dead_code)]
1513 const PCI_HB_OSC_CONTROL_PCIE_PME: u32 = 0x4;
1514 const PCI_HB_OSC_CONTROL_PCIE_AER: u32 = 0x8;
1515 #[allow(dead_code)]
1516 const PCI_HB_OSC_CONTROL_PCIE_CAP: u32 = 0x10;
1517 
1518 struct PciRootOSC {}
1519 
1520 // Method (_OSC, 4, NotSerialized)  // _OSC: Operating System Capabilities
1521 // {
1522 //     CreateDWordField (Arg3, Zero, CDW1)  // flag and return value
1523 //     If (Arg0 == ToUUID ("33db4d5b-1ff7-401c-9657-7441c03dd766"))
1524 //     {
1525 //         CreateDWordField (Arg3, 8, CDW3) // control field
1526 //         if ( 0 == (CDW1 & 0x01))  // Query flag ?
1527 //         {
1528 //              CDW3 &= !(SHPC_HP | AER)
1529 //         }
1530 //     } Else {
1531 //         CDW1 |= UNSUPPORT_UUID
1532 //     }
1533 //     Return (Arg3)
1534 // }
1535 impl Aml for PciRootOSC {
to_aml_bytes(&self, aml: &mut Vec<u8>)1536     fn to_aml_bytes(&self, aml: &mut Vec<u8>) {
1537         let osc_uuid = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
1538         // virtual pcie root port supports hotplug, pme, and pcie cap register, clear all
1539         // the other bits.
1540         let mask = !(PCI_HB_OSC_CONTROL_SHPC_HP | PCI_HB_OSC_CONTROL_PCIE_AER);
1541         aml::Method::new(
1542             "_OSC".into(),
1543             4,
1544             false,
1545             vec![
1546                 &aml::CreateDWordField::new(
1547                     &aml::Name::new_field_name("CDW1"),
1548                     &aml::Arg(3),
1549                     &aml::ZERO,
1550                 ),
1551                 &aml::If::new(
1552                     &aml::Equal::new(&aml::Arg(0), &aml::Uuid::new(osc_uuid)),
1553                     vec![
1554                         &aml::CreateDWordField::new(
1555                             &aml::Name::new_field_name("CDW3"),
1556                             &aml::Arg(3),
1557                             &(8_u8),
1558                         ),
1559                         &aml::If::new(
1560                             &aml::Equal::new(
1561                                 &aml::ZERO,
1562                                 &aml::And::new(
1563                                     &aml::ZERO,
1564                                     &aml::Name::new_field_name("CDW1"),
1565                                     &aml::ONE,
1566                                 ),
1567                             ),
1568                             vec![&aml::And::new(
1569                                 &aml::Name::new_field_name("CDW3"),
1570                                 &mask,
1571                                 &aml::Name::new_field_name("CDW3"),
1572                             )],
1573                         ),
1574                     ],
1575                 ),
1576                 &aml::Else::new(vec![&aml::Or::new(
1577                     &aml::Name::new_field_name("CDW1"),
1578                     &OSC_STATUS_UNSUPPORT_UUID,
1579                     &aml::Name::new_field_name("CDW1"),
1580                 )]),
1581                 &aml::Return::new(&aml::Arg(3)),
1582             ],
1583         )
1584         .to_aml_bytes(aml)
1585     }
1586 }
1587 
1588 pub enum CpuMode {
1589     /// 32-bit protected mode with paging disabled.
1590     FlatProtectedMode,
1591 
1592     /// 64-bit long mode.
1593     LongMode,
1594 }
1595 
1596 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
1597 pub enum KernelType {
1598     BzImage,
1599     Elf,
1600     Multiboot,
1601 }
1602 
1603 impl fmt::Display for KernelType {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1604     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1605         match self {
1606             KernelType::BzImage => write!(f, "bzImage"),
1607             KernelType::Elf => write!(f, "ELF"),
1608             KernelType::Multiboot => write!(f, "Multiboot"),
1609         }
1610     }
1611 }
1612 
1613 impl X8664arch {
1614     /// Loads the bios from an open file.
1615     ///
1616     /// # Arguments
1617     ///
1618     /// * `mem` - The memory to be used by the guest.
1619     /// * `bios_image` - the File object for the specified bios
load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()>1620     fn load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()> {
1621         let bios_image_length = bios_image.get_len().map_err(Error::LoadBios)?;
1622         if bios_image_length >= FIRST_ADDR_PAST_32BITS {
1623             return Err(Error::LoadBios(io::Error::new(
1624                 io::ErrorKind::InvalidData,
1625                 format!(
1626                     "bios was {} bytes, expected less than {}",
1627                     bios_image_length, FIRST_ADDR_PAST_32BITS,
1628                 ),
1629             )));
1630         }
1631 
1632         let guest_slice = mem
1633             .get_slice_at_addr(bios_start(bios_image_length), bios_image_length as usize)
1634             .map_err(Error::SetupGuestMemory)?;
1635         bios_image
1636             .read_exact_at_volatile(guest_slice, 0)
1637             .map_err(Error::LoadBios)?;
1638         Ok(())
1639     }
1640 
setup_pflash( pflash_image: File, block_size: u32, bios_size: u64, mmio_bus: &Bus, jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<()>1641     fn setup_pflash(
1642         pflash_image: File,
1643         block_size: u32,
1644         bios_size: u64,
1645         mmio_bus: &Bus,
1646         jail: Option<Minijail>,
1647         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1648     ) -> Result<()> {
1649         let size = pflash_image.metadata().map_err(Error::LoadPflash)?.len();
1650         let start = FIRST_ADDR_PAST_32BITS - bios_size - size;
1651         let pflash_image = Box::new(pflash_image);
1652 
1653         #[cfg(any(target_os = "android", target_os = "linux"))]
1654         let fds = pflash_image.as_raw_descriptors();
1655 
1656         let pflash = Pflash::new(pflash_image, block_size).map_err(Error::SetupPflash)?;
1657         let pflash: Arc<Mutex<dyn BusDevice>> = match jail {
1658             #[cfg(any(target_os = "android", target_os = "linux"))]
1659             Some(jail) => Arc::new(Mutex::new(
1660                 ProxyDevice::new(
1661                     pflash,
1662                     jail,
1663                     fds,
1664                     #[cfg(feature = "swap")]
1665                     swap_controller,
1666                 )
1667                 .map_err(Error::CreateProxyDevice)?,
1668             )),
1669             #[cfg(windows)]
1670             Some(_) => unreachable!(),
1671             None => Arc::new(Mutex::new(pflash)),
1672         };
1673         mmio_bus
1674             .insert(pflash, start, size)
1675             .map_err(Error::InsertBus)?;
1676 
1677         Ok(())
1678     }
1679 
1680     /// Writes the command line string to the given memory slice.
1681     ///
1682     /// # Arguments
1683     ///
1684     /// * `guest_mem` - A u8 slice that will be partially overwritten by the command line.
1685     /// * `guest_addr` - The address in `guest_mem` at which to load the command line.
1686     /// * `cmdline` - The kernel command line.
1687     /// * `kernel_max_cmdline_len` - The maximum command line length (without NUL terminator)
1688     ///   supported by the kernel.
load_cmdline( guest_mem: &GuestMemory, guest_addr: GuestAddress, cmdline: kernel_cmdline::Cmdline, kernel_max_cmdline_len: usize, ) -> Result<()>1689     fn load_cmdline(
1690         guest_mem: &GuestMemory,
1691         guest_addr: GuestAddress,
1692         cmdline: kernel_cmdline::Cmdline,
1693         kernel_max_cmdline_len: usize,
1694     ) -> Result<()> {
1695         let mut cmdline_guest_mem_slice = guest_mem
1696             .get_slice_at_addr(guest_addr, CMDLINE_MAX_SIZE as usize)
1697             .map_err(|_| Error::CommandLineOverflow)?;
1698 
1699         let mut cmdline_bytes: Vec<u8> = cmdline
1700             .into_bytes_with_max_len(kernel_max_cmdline_len)
1701             .map_err(Error::Cmdline)?;
1702         cmdline_bytes.push(0u8); // Add NUL terminator.
1703 
1704         cmdline_guest_mem_slice
1705             .write_all(&cmdline_bytes)
1706             .map_err(|_| Error::CommandLineOverflow)?;
1707 
1708         Ok(())
1709     }
1710 
1711     /// Loads the kernel from an open file.
1712     ///
1713     /// # Arguments
1714     ///
1715     /// * `mem` - The memory to be used by the guest.
1716     /// * `kernel_image` - the File object for the specified kernel.
1717     ///
1718     /// # Returns
1719     ///
1720     /// On success, returns the Linux x86_64 boot protocol parameters, the address range containing
1721     /// the kernel, the entry point (initial `RIP` value), the initial CPU mode, and the type of
1722     /// kernel.
load_kernel( mem: &GuestMemory, kernel_image: &mut File, ) -> Result<(boot_params, AddressRange, GuestAddress, CpuMode, KernelType)>1723     fn load_kernel(
1724         mem: &GuestMemory,
1725         kernel_image: &mut File,
1726     ) -> Result<(boot_params, AddressRange, GuestAddress, CpuMode, KernelType)> {
1727         let kernel_start = GuestAddress(KERNEL_START_OFFSET);
1728 
1729         let multiboot =
1730             kernel_loader::multiboot_header_from_file(kernel_image).map_err(Error::LoadKernel)?;
1731 
1732         if let Some(multiboot_load) = multiboot.as_ref().and_then(|m| m.load.as_ref()) {
1733             let loaded_kernel = kernel_loader::load_multiboot(mem, kernel_image, multiboot_load)
1734                 .map_err(Error::LoadKernel)?;
1735 
1736             let boot_params = boot_params {
1737                 hdr: setup_header {
1738                     cmdline_size: CMDLINE_MAX_SIZE as u32 - 1,
1739                     ..Default::default()
1740                 },
1741                 ..Default::default()
1742             };
1743             return Ok((
1744                 boot_params,
1745                 loaded_kernel.address_range,
1746                 loaded_kernel.entry,
1747                 CpuMode::FlatProtectedMode,
1748                 KernelType::Multiboot,
1749             ));
1750         }
1751 
1752         match kernel_loader::load_elf64(mem, kernel_start, kernel_image, 0) {
1753             Ok(loaded_kernel) => {
1754                 // ELF kernels don't contain a `boot_params` structure, so synthesize a default one.
1755                 let boot_params = boot_params {
1756                     hdr: setup_header {
1757                         cmdline_size: CMDLINE_MAX_SIZE as u32 - 1,
1758                         ..Default::default()
1759                     },
1760                     ..Default::default()
1761                 };
1762                 Ok((
1763                     boot_params,
1764                     loaded_kernel.address_range,
1765                     loaded_kernel.entry,
1766                     CpuMode::LongMode,
1767                     KernelType::Elf,
1768                 ))
1769             }
1770             Err(kernel_loader::Error::InvalidMagicNumber) => {
1771                 // The image failed to parse as ELF, so try to load it as a bzImage.
1772                 let (boot_params, bzimage_region, bzimage_entry, cpu_mode) =
1773                     bzimage::load_bzimage(mem, kernel_start, kernel_image)
1774                         .map_err(Error::LoadBzImage)?;
1775                 Ok((
1776                     boot_params,
1777                     bzimage_region,
1778                     bzimage_entry,
1779                     cpu_mode,
1780                     KernelType::BzImage,
1781                 ))
1782             }
1783             Err(e) => Err(Error::LoadKernel(e)),
1784         }
1785     }
1786 
1787     /// Configures the system memory space should be called once per vm before
1788     /// starting vcpu threads.
1789     ///
1790     /// # Arguments
1791     ///
1792     /// * `mem` - The memory to be used by the guest.
1793     /// * `cmdline` - the kernel commandline
1794     /// * `initrd_file` - an initial ramdisk image
setup_system_memory( arch_memory_layout: &ArchMemoryLayout, mem: &GuestMemory, cmdline: kernel_cmdline::Cmdline, initrd_file: Option<File>, android_fstab: Option<File>, kernel_region: AddressRange, params: boot_params, dump_device_tree_blob: Option<PathBuf>, device_tree_overlays: Vec<DtbOverlay>, protection_type: ProtectionType, ) -> Result<()>1795     pub fn setup_system_memory(
1796         arch_memory_layout: &ArchMemoryLayout,
1797         mem: &GuestMemory,
1798         cmdline: kernel_cmdline::Cmdline,
1799         initrd_file: Option<File>,
1800         android_fstab: Option<File>,
1801         kernel_region: AddressRange,
1802         params: boot_params,
1803         dump_device_tree_blob: Option<PathBuf>,
1804         device_tree_overlays: Vec<DtbOverlay>,
1805         protection_type: ProtectionType,
1806     ) -> Result<()> {
1807         let e820_entries = generate_e820_memory_map(arch_memory_layout, mem)?;
1808 
1809         let kernel_max_cmdline_len = if params.hdr.cmdline_size == 0 {
1810             // Old kernels have a maximum length of 255 bytes, not including the NUL.
1811             255
1812         } else {
1813             params.hdr.cmdline_size as usize
1814         };
1815         debug!("kernel_max_cmdline_len={kernel_max_cmdline_len}");
1816         Self::load_cmdline(
1817             mem,
1818             GuestAddress(CMDLINE_OFFSET),
1819             cmdline,
1820             kernel_max_cmdline_len,
1821         )?;
1822 
1823         let initrd = match initrd_file {
1824             Some(mut initrd_file) => {
1825                 let initrd_addr_max = if params.hdr.xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G != 0 {
1826                     u64::MAX
1827                 } else if params.hdr.initrd_addr_max == 0 {
1828                     // Default initrd_addr_max for old kernels (see Documentation/x86/boot.txt).
1829                     0x37FFFFFF
1830                 } else {
1831                     u64::from(params.hdr.initrd_addr_max)
1832                 };
1833 
1834                 let (initrd_start, initrd_size) = arch::load_image_high(
1835                     mem,
1836                     &mut initrd_file,
1837                     GuestAddress(kernel_region.end + 1),
1838                     GuestAddress(initrd_addr_max),
1839                     Some(|region| {
1840                         region.options.purpose != MemoryRegionPurpose::ProtectedFirmwareRegion
1841                     }),
1842                     base::pagesize() as u64,
1843                 )
1844                 .map_err(Error::LoadInitrd)?;
1845                 Some((initrd_start, initrd_size))
1846             }
1847             None => None,
1848         };
1849 
1850         let mut setup_data = Vec::<SetupData>::new();
1851         if android_fstab.is_some()
1852             || !device_tree_overlays.is_empty()
1853             || protection_type.runs_firmware()
1854         {
1855             let device_tree_blob = fdt::create_fdt(
1856                 android_fstab,
1857                 dump_device_tree_blob,
1858                 device_tree_overlays,
1859                 kernel_region,
1860                 initrd,
1861             )
1862             .map_err(Error::CreateFdt)?;
1863             setup_data.push(SetupData {
1864                 data: device_tree_blob,
1865                 type_: SetupDataType::Dtb,
1866             });
1867         }
1868 
1869         setup_data.push(setup_data_rng_seed());
1870 
1871         let setup_data = write_setup_data(
1872             mem,
1873             GuestAddress(SETUP_DATA_START),
1874             GuestAddress(SETUP_DATA_END),
1875             &setup_data,
1876         )?;
1877 
1878         configure_boot_params(
1879             mem,
1880             GuestAddress(CMDLINE_OFFSET),
1881             setup_data,
1882             initrd,
1883             params,
1884             &e820_entries,
1885         )?;
1886 
1887         configure_multiboot_info(mem, GuestAddress(CMDLINE_OFFSET), &e820_entries)?;
1888 
1889         Ok(())
1890     }
1891 
get_pcie_vcfg_mmio_range(mem: &GuestMemory, pcie_cfg_mmio: &AddressRange) -> AddressRange1892     fn get_pcie_vcfg_mmio_range(mem: &GuestMemory, pcie_cfg_mmio: &AddressRange) -> AddressRange {
1893         // Put PCIe VCFG region at a 2MB boundary after physical memory or 4gb, whichever is
1894         // greater.
1895         let ram_end_round_2mb = mem.end_addr().offset().next_multiple_of(2 * MB);
1896         let start = std::cmp::max(ram_end_round_2mb, 4 * GB);
1897         // Each pci device's ECAM size is 4kb and its vcfg size is 8kb
1898         let end = start + pcie_cfg_mmio.len().unwrap() * 2 - 1;
1899         AddressRange { start, end }
1900     }
1901 
1902     /// Returns the high mmio range
get_high_mmio_range<V: Vm>(vm: &V, arch_memory_layout: &ArchMemoryLayout) -> AddressRange1903     fn get_high_mmio_range<V: Vm>(vm: &V, arch_memory_layout: &ArchMemoryLayout) -> AddressRange {
1904         let mem = vm.get_memory();
1905         let start = Self::get_pcie_vcfg_mmio_range(mem, &arch_memory_layout.pcie_cfg_mmio).end + 1;
1906 
1907         let phys_mem_end = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
1908         let high_mmio_end = std::cmp::min(phys_mem_end, HIGH_MMIO_MAX_END);
1909 
1910         AddressRange {
1911             start,
1912             end: high_mmio_end,
1913         }
1914     }
1915 
1916     /// This returns a minimal kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline1917     pub fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1918         let mut cmdline = kernel_cmdline::Cmdline::new();
1919         cmdline.insert_str("panic=-1").unwrap();
1920 
1921         cmdline
1922     }
1923 
1924     /// Sets up fw_cfg device.
1925     ///  # Arguments
1926     ///
1927     /// * `io_bus` - the IO bus object
1928     /// * `fw_cfg_parameters` - command-line specified data to add to device. May contain all None
1929     ///   fields if user did not specify data to add to the device
setup_fw_cfg_device( io_bus: &Bus, fw_cfg_parameters: Vec<FwCfgParameters>, bootorder_fw_cfg_blob: Vec<u8>, fw_cfg_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<()>1930     fn setup_fw_cfg_device(
1931         io_bus: &Bus,
1932         fw_cfg_parameters: Vec<FwCfgParameters>,
1933         bootorder_fw_cfg_blob: Vec<u8>,
1934         fw_cfg_jail: Option<Minijail>,
1935         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1936     ) -> Result<()> {
1937         let fw_cfg = match devices::FwCfgDevice::new(FW_CFG_MAX_FILE_SLOTS, fw_cfg_parameters) {
1938             Ok(mut device) => {
1939                 // this condition will only be true if the user specified at least one bootindex
1940                 // option on the command line. If none were specified, bootorder_fw_cfg_blob will
1941                 // only have a null byte (null terminator)
1942                 if bootorder_fw_cfg_blob.len() > 1 {
1943                     // Add boot order file to the device. If the file is not present, firmware may
1944                     // not be able to boot.
1945                     if let Err(err) = device.add_file(
1946                         "bootorder",
1947                         bootorder_fw_cfg_blob,
1948                         devices::FwCfgItemType::GenericItem,
1949                     ) {
1950                         return Err(Error::CreateFwCfgDevice(err));
1951                     }
1952                 }
1953                 device
1954             }
1955             Err(err) => {
1956                 return Err(Error::CreateFwCfgDevice(err));
1957             }
1958         };
1959 
1960         let fw_cfg: Arc<Mutex<dyn BusDevice>> = match fw_cfg_jail.as_ref() {
1961             #[cfg(any(target_os = "android", target_os = "linux"))]
1962             Some(jail) => {
1963                 let jail_clone = jail.try_clone().map_err(Error::CloneJail)?;
1964                 #[cfg(feature = "seccomp_trace")]
1965                 debug!(
1966                     "seccomp_trace {{\"event\": \"minijail_clone\", \"src_jail_addr\": \"0x{:x}\", \"dst_jail_addr\": \"0x{:x}\"}}",
1967                     read_jail_addr(jail),
1968                     read_jail_addr(&jail_clone)
1969                 );
1970                 Arc::new(Mutex::new(
1971                     ProxyDevice::new(
1972                         fw_cfg,
1973                         jail_clone,
1974                         Vec::new(),
1975                         #[cfg(feature = "swap")]
1976                         swap_controller,
1977                     )
1978                     .map_err(Error::CreateProxyDevice)?,
1979                 ))
1980             }
1981             #[cfg(windows)]
1982             Some(_) => unreachable!(),
1983             None => Arc::new(Mutex::new(fw_cfg)),
1984         };
1985 
1986         io_bus
1987             .insert(fw_cfg, FW_CFG_BASE_PORT, FW_CFG_WIDTH)
1988             .map_err(Error::InsertBus)?;
1989 
1990         Ok(())
1991     }
1992 
1993     /// Sets up the legacy x86 i8042/KBD platform device
1994     ///
1995     /// # Arguments
1996     ///
1997     /// * - `io_bus` - the IO bus object
1998     /// * - `pit_uses_speaker_port` - does the PIT use port 0x61 for the PC speaker
1999     /// * - `vm_evt_wrtube` - the event object which should receive exit events
setup_legacy_i8042_device( io_bus: &Bus, pit_uses_speaker_port: bool, vm_evt_wrtube: SendTube, ) -> Result<()>2000     pub fn setup_legacy_i8042_device(
2001         io_bus: &Bus,
2002         pit_uses_speaker_port: bool,
2003         vm_evt_wrtube: SendTube,
2004     ) -> Result<()> {
2005         let i8042 = Arc::new(Mutex::new(devices::I8042Device::new(
2006             vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
2007         )));
2008 
2009         if pit_uses_speaker_port {
2010             io_bus.insert(i8042, 0x062, 0x3).unwrap();
2011         } else {
2012             io_bus.insert(i8042, 0x061, 0x4).unwrap();
2013         }
2014 
2015         Ok(())
2016     }
2017 
2018     /// Sets up the legacy x86 CMOS/RTC platform device
2019     /// # Arguments
2020     ///
2021     /// * - `io_bus` - the IO bus object
2022     /// * - `mem_size` - the size in bytes of physical ram for the guest
setup_legacy_cmos_device( arch_memory_layout: &ArchMemoryLayout, io_bus: &Bus, irq_chip: &mut dyn IrqChipX86_64, vm_control: Tube, mem_size: u64, ) -> anyhow::Result<()>2023     pub fn setup_legacy_cmos_device(
2024         arch_memory_layout: &ArchMemoryLayout,
2025         io_bus: &Bus,
2026         irq_chip: &mut dyn IrqChipX86_64,
2027         vm_control: Tube,
2028         mem_size: u64,
2029     ) -> anyhow::Result<()> {
2030         let mem_regions = arch_memory_regions(arch_memory_layout, mem_size, None);
2031 
2032         let mem_below_4g = mem_regions
2033             .iter()
2034             .filter(|r| r.0.offset() < FIRST_ADDR_PAST_32BITS)
2035             .map(|r| r.1)
2036             .sum();
2037 
2038         let mem_above_4g = mem_regions
2039             .iter()
2040             .filter(|r| r.0.offset() >= FIRST_ADDR_PAST_32BITS)
2041             .map(|r| r.1)
2042             .sum();
2043 
2044         let irq_evt = devices::IrqEdgeEvent::new().context("cmos irq")?;
2045         let cmos = devices::cmos::Cmos::new(
2046             mem_below_4g,
2047             mem_above_4g,
2048             Utc::now,
2049             vm_control,
2050             irq_evt.try_clone().context("cmos irq clone")?,
2051         )
2052         .context("create cmos")?;
2053 
2054         irq_chip
2055             .register_edge_irq_event(
2056                 devices::cmos::RTC_IRQ as u32,
2057                 &irq_evt,
2058                 IrqEventSource::from_device(&cmos),
2059             )
2060             .context("cmos register irq")?;
2061         io_bus
2062             .insert(Arc::new(Mutex::new(cmos)), 0x70, 0x2)
2063             .context("cmos insert irq")?;
2064 
2065         Ok(())
2066     }
2067 
2068     /// Sets up the acpi devices for this platform and
2069     /// return the resources which is used to set the ACPI tables.
2070     ///
2071     /// # Arguments
2072     ///
2073     /// * `io_bus` the I/O bus to add the devices to
2074     /// * `resources` the SystemAllocator to allocate IO and MMIO for acpi devices.
2075     /// * `suspend_tube` the tube object which used to suspend/resume the VM.
2076     /// * `sdts` ACPI system description tables
2077     /// * `irq_chip` the IrqChip object for registering irq events
2078     /// * `battery` indicate whether to create the battery
2079     /// * `mmio_bus` the MMIO bus to add the devices to
2080     /// * `pci_irqs` IRQ assignment of PCI devices. Tuples of (PCI address, gsi, PCI interrupt pin).
2081     ///   Note that this matches one of the return values of generate_pci_root.
setup_acpi_devices( arch_memory_layout: &ArchMemoryLayout, pci_root: Arc<Mutex<PciRoot>>, mem: &GuestMemory, io_bus: &Bus, resources: &mut SystemAllocator, suspend_tube: Arc<Mutex<SendTube>>, vm_evt_wrtube: SendTube, sdts: Vec<SDT>, irq_chip: &mut dyn IrqChip, sci_irq: u32, battery: (Option<BatteryType>, Option<Minijail>), #[cfg_attr(windows, allow(unused_variables))] mmio_bus: &Bus, max_bus: u8, resume_notify_devices: &mut Vec<Arc<Mutex<dyn BusResumeDevice>>>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, #[cfg(any(target_os = "android", target_os = "linux"))] ac_adapter: bool, guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, pci_irqs: &[(PciAddress, u32, PciInterruptPin)], ) -> Result<(acpi::AcpiDevResource, Option<BatControl>)>2082     pub fn setup_acpi_devices(
2083         arch_memory_layout: &ArchMemoryLayout,
2084         pci_root: Arc<Mutex<PciRoot>>,
2085         mem: &GuestMemory,
2086         io_bus: &Bus,
2087         resources: &mut SystemAllocator,
2088         suspend_tube: Arc<Mutex<SendTube>>,
2089         vm_evt_wrtube: SendTube,
2090         sdts: Vec<SDT>,
2091         irq_chip: &mut dyn IrqChip,
2092         sci_irq: u32,
2093         battery: (Option<BatteryType>, Option<Minijail>),
2094         #[cfg_attr(windows, allow(unused_variables))] mmio_bus: &Bus,
2095         max_bus: u8,
2096         resume_notify_devices: &mut Vec<Arc<Mutex<dyn BusResumeDevice>>>,
2097         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2098         #[cfg(any(target_os = "android", target_os = "linux"))] ac_adapter: bool,
2099         guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
2100         pci_irqs: &[(PciAddress, u32, PciInterruptPin)],
2101     ) -> Result<(acpi::AcpiDevResource, Option<BatControl>)> {
2102         // The AML data for the acpi devices
2103         let mut amls = Vec::new();
2104 
2105         let bat_control = if let Some(battery_type) = battery.0 {
2106             match battery_type {
2107                 #[cfg(any(target_os = "android", target_os = "linux"))]
2108                 BatteryType::Goldfish => {
2109                     let irq_num = resources.allocate_irq().ok_or(Error::CreateBatDevices(
2110                         arch::DeviceRegistrationError::AllocateIrq,
2111                     ))?;
2112                     let (control_tube, _mmio_base) = arch::sys::linux::add_goldfish_battery(
2113                         &mut amls,
2114                         battery.1,
2115                         mmio_bus,
2116                         irq_chip,
2117                         irq_num,
2118                         resources,
2119                         #[cfg(feature = "swap")]
2120                         swap_controller,
2121                     )
2122                     .map_err(Error::CreateBatDevices)?;
2123                     Some(BatControl {
2124                         type_: BatteryType::Goldfish,
2125                         control_tube,
2126                     })
2127                 }
2128                 #[cfg(windows)]
2129                 _ => None,
2130             }
2131         } else {
2132             None
2133         };
2134 
2135         let pm_alloc = resources.get_anon_alloc();
2136         let pm_iobase = match resources.io_allocator() {
2137             Some(io) => io
2138                 .allocate_with_align(
2139                     devices::acpi::ACPIPM_RESOURCE_LEN as u64,
2140                     pm_alloc,
2141                     "ACPIPM".to_string(),
2142                     4, // must be 32-bit aligned
2143                 )
2144                 .map_err(Error::AllocateIOResouce)?,
2145             None => 0x600,
2146         };
2147 
2148         let pcie_vcfg = aml::Name::new(
2149             "VCFG".into(),
2150             &Self::get_pcie_vcfg_mmio_range(mem, &arch_memory_layout.pcie_cfg_mmio).start,
2151         );
2152         pcie_vcfg.to_aml_bytes(&mut amls);
2153 
2154         let pm_sci_evt = devices::IrqLevelEvent::new().map_err(Error::CreateEvent)?;
2155 
2156         #[cfg(any(target_os = "android", target_os = "linux"))]
2157         let acdc = if ac_adapter {
2158             // Allocate GPE for AC adapter notfication
2159             let gpe = resources.allocate_gpe().ok_or(Error::AllocateGpe)?;
2160 
2161             let alloc = resources.get_anon_alloc();
2162             let mmio_base = resources
2163                 .allocate_mmio(
2164                     devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
2165                     alloc,
2166                     "AcAdapter".to_string(),
2167                     resources::AllocOptions::new().align(devices::ac_adapter::ACDC_VIRT_MMIO_SIZE),
2168                 )
2169                 .unwrap();
2170             let ac_adapter_dev = devices::ac_adapter::AcAdapter::new(mmio_base, gpe);
2171             let ac_dev = Arc::new(Mutex::new(ac_adapter_dev));
2172             mmio_bus
2173                 .insert(
2174                     ac_dev.clone(),
2175                     mmio_base,
2176                     devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
2177                 )
2178                 .unwrap();
2179 
2180             ac_dev.lock().to_aml_bytes(&mut amls);
2181             Some(ac_dev)
2182         } else {
2183             None
2184         };
2185         #[cfg(windows)]
2186         let acdc = None;
2187 
2188         //Virtual PMC
2189         if let Some(guest_suspended_cvar) = guest_suspended_cvar {
2190             let alloc = resources.get_anon_alloc();
2191             let mmio_base = resources
2192                 .allocate_mmio(
2193                     devices::pmc_virt::VPMC_VIRT_MMIO_SIZE,
2194                     alloc,
2195                     "VirtualPmc".to_string(),
2196                     resources::AllocOptions::new().align(devices::pmc_virt::VPMC_VIRT_MMIO_SIZE),
2197                 )
2198                 .unwrap();
2199 
2200             let pmc_virtio_mmio =
2201                 Arc::new(Mutex::new(VirtualPmc::new(mmio_base, guest_suspended_cvar)));
2202             mmio_bus
2203                 .insert(
2204                     pmc_virtio_mmio.clone(),
2205                     mmio_base,
2206                     devices::pmc_virt::VPMC_VIRT_MMIO_SIZE,
2207                 )
2208                 .unwrap();
2209             pmc_virtio_mmio.lock().to_aml_bytes(&mut amls);
2210         }
2211 
2212         let mut pmresource = devices::ACPIPMResource::new(
2213             pm_sci_evt.try_clone().map_err(Error::CloneEvent)?,
2214             suspend_tube,
2215             vm_evt_wrtube,
2216             acdc,
2217         );
2218         pmresource.to_aml_bytes(&mut amls);
2219         irq_chip
2220             .register_level_irq_event(
2221                 sci_irq,
2222                 &pm_sci_evt,
2223                 IrqEventSource::from_device(&pmresource),
2224             )
2225             .map_err(Error::RegisterIrqfd)?;
2226         pmresource.start();
2227 
2228         let mut crs_entries: Vec<Box<dyn Aml>> = vec![
2229             Box::new(aml::AddressSpace::new_bus_number(0x0u16, max_bus as u16)),
2230             Box::new(aml::IO::new(0xcf8, 0xcf8, 1, 0x8)),
2231         ];
2232         for r in resources.mmio_pools() {
2233             let entry: Box<dyn Aml> = match (u32::try_from(r.start), u32::try_from(r.end)) {
2234                 (Ok(start), Ok(end)) => Box::new(aml::AddressSpace::new_memory(
2235                     aml::AddressSpaceCachable::NotCacheable,
2236                     true,
2237                     start,
2238                     end,
2239                 )),
2240                 _ => Box::new(aml::AddressSpace::new_memory(
2241                     aml::AddressSpaceCachable::NotCacheable,
2242                     true,
2243                     r.start,
2244                     r.end,
2245                 )),
2246             };
2247             crs_entries.push(entry);
2248         }
2249 
2250         let prt_entries: Vec<aml::Package> = pci_irqs
2251             .iter()
2252             .map(|(pci_address, gsi, pci_intr_pin)| {
2253                 aml::Package::new(vec![
2254                     &pci_address.acpi_adr(),
2255                     &pci_intr_pin.to_mask(),
2256                     &aml::ZERO,
2257                     gsi,
2258                 ])
2259             })
2260             .collect();
2261 
2262         aml::Device::new(
2263             "_SB_.PC00".into(),
2264             vec![
2265                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A08")),
2266                 &aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A03")),
2267                 &aml::Name::new("_ADR".into(), &aml::ZERO),
2268                 &aml::Name::new("_SEG".into(), &aml::ZERO),
2269                 &aml::Name::new("_UID".into(), &aml::ZERO),
2270                 &aml::Name::new("SUPP".into(), &aml::ZERO),
2271                 &aml::Name::new(
2272                     "_CRS".into(),
2273                     &aml::ResourceTemplate::new(crs_entries.iter().map(|b| b.as_ref()).collect()),
2274                 ),
2275                 &PciRootOSC {},
2276                 &aml::Name::new(
2277                     "_PRT".into(),
2278                     &aml::Package::new(prt_entries.iter().map(|p| p as &dyn Aml).collect()),
2279                 ),
2280             ],
2281         )
2282         .to_aml_bytes(&mut amls);
2283 
2284         if let (Some(start), Some(len)) = (
2285             u32::try_from(arch_memory_layout.pcie_cfg_mmio.start).ok(),
2286             arch_memory_layout
2287                 .pcie_cfg_mmio
2288                 .len()
2289                 .and_then(|l| u32::try_from(l).ok()),
2290         ) {
2291             aml::Device::new(
2292                 "_SB_.MB00".into(),
2293                 vec![
2294                     &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
2295                     &aml::Name::new(
2296                         "_CRS".into(),
2297                         &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
2298                             true, start, len,
2299                         )]),
2300                     ),
2301                 ],
2302             )
2303             .to_aml_bytes(&mut amls);
2304         } else {
2305             warn!("Failed to create ACPI MMCFG region reservation");
2306         }
2307 
2308         let root_bus = pci_root.lock().get_root_bus();
2309         let addresses = root_bus.lock().get_downstream_devices();
2310         for address in addresses {
2311             if let Some(acpi_path) = pci_root.lock().acpi_path(&address) {
2312                 const DEEPEST_SLEEP_STATE: u32 = 3;
2313                 aml::Device::new(
2314                     (*acpi_path).into(),
2315                     vec![
2316                         &aml::Name::new("_ADR".into(), &address.acpi_adr()),
2317                         &aml::Name::new(
2318                             "_PRW".into(),
2319                             &aml::Package::new(vec![&PM_WAKEUP_GPIO, &DEEPEST_SLEEP_STATE]),
2320                         ),
2321                     ],
2322                 )
2323                 .to_aml_bytes(&mut amls);
2324             }
2325         }
2326 
2327         let pm = Arc::new(Mutex::new(pmresource));
2328         io_bus
2329             .insert(
2330                 pm.clone(),
2331                 pm_iobase,
2332                 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
2333             )
2334             .unwrap();
2335         resume_notify_devices.push(pm.clone());
2336 
2337         Ok((
2338             acpi::AcpiDevResource {
2339                 amls,
2340                 pm_iobase,
2341                 pm,
2342                 sdts,
2343             },
2344             bat_control,
2345         ))
2346     }
2347 
2348     /// Sets up the serial devices for this platform. Returns a list of configured serial devices.
2349     ///
2350     /// # Arguments
2351     ///
2352     /// * - `irq_chip` the IrqChip object for registering irq events
2353     /// * - `io_bus` the I/O bus to add the devices to
2354     /// * - `serial_parameters` - definitions for how the serial devices should be configured
setup_serial_devices( protection_type: ProtectionType, irq_chip: &mut dyn IrqChip, io_bus: &Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<Vec<SerialDeviceInfo>>2355     pub fn setup_serial_devices(
2356         protection_type: ProtectionType,
2357         irq_chip: &mut dyn IrqChip,
2358         io_bus: &Bus,
2359         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2360         serial_jail: Option<Minijail>,
2361         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2362     ) -> Result<Vec<SerialDeviceInfo>> {
2363         let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2364         let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2365 
2366         let serial_devices = arch::add_serial_devices(
2367             protection_type,
2368             io_bus,
2369             (X86_64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
2370             (X86_64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
2371             serial_parameters,
2372             serial_jail,
2373             #[cfg(feature = "swap")]
2374             swap_controller,
2375         )
2376         .map_err(Error::CreateSerialDevices)?;
2377 
2378         let source = IrqEventSource {
2379             device_id: Serial::device_id(),
2380             queue_id: 0,
2381             device_name: Serial::debug_label(),
2382         };
2383         irq_chip
2384             .register_edge_irq_event(X86_64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
2385             .map_err(Error::RegisterIrqfd)?;
2386         irq_chip
2387             .register_edge_irq_event(X86_64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
2388             .map_err(Error::RegisterIrqfd)?;
2389 
2390         Ok(serial_devices)
2391     }
2392 
setup_debugcon_devices( protection_type: ProtectionType, io_bus: &Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<()>2393     fn setup_debugcon_devices(
2394         protection_type: ProtectionType,
2395         io_bus: &Bus,
2396         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2397         debugcon_jail: Option<Minijail>,
2398         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2399     ) -> Result<()> {
2400         for param in serial_parameters.values() {
2401             if param.hardware != SerialHardware::Debugcon {
2402                 continue;
2403             }
2404 
2405             let mut preserved_fds = Vec::new();
2406             let con = param
2407                 .create_serial_device::<Debugcon>(
2408                     protection_type,
2409                     // Debugcon doesn't use the interrupt event
2410                     &Event::new().map_err(Error::CreateEvent)?,
2411                     &mut preserved_fds,
2412                 )
2413                 .map_err(Error::CreateDebugconDevice)?;
2414 
2415             let con: Arc<Mutex<dyn BusDevice>> = match debugcon_jail.as_ref() {
2416                 #[cfg(any(target_os = "android", target_os = "linux"))]
2417                 Some(jail) => {
2418                     let jail_clone = jail.try_clone().map_err(Error::CloneJail)?;
2419                     #[cfg(feature = "seccomp_trace")]
2420                     debug!(
2421                         "seccomp_trace {{\"event\": \"minijail_clone\", \"src_jail_addr\": \"0x{:x}\", \"dst_jail_addr\": \"0x{:x}\"}}",
2422                         read_jail_addr(jail),
2423                         read_jail_addr(&jail_clone)
2424                     );
2425                     Arc::new(Mutex::new(
2426                         ProxyDevice::new(
2427                             con,
2428                             jail_clone,
2429                             preserved_fds,
2430                             #[cfg(feature = "swap")]
2431                             swap_controller,
2432                         )
2433                         .map_err(Error::CreateProxyDevice)?,
2434                     ))
2435                 }
2436                 #[cfg(windows)]
2437                 Some(_) => unreachable!(),
2438                 None => Arc::new(Mutex::new(con)),
2439             };
2440             io_bus
2441                 .insert(con.clone(), param.debugcon_port.into(), 1)
2442                 .map_err(Error::InsertBus)?;
2443         }
2444 
2445         Ok(())
2446     }
2447 }
2448 
2449 #[sorted]
2450 #[derive(Error, Debug)]
2451 pub enum MsrError {
2452     #[error("CPU not support. Only intel CPUs support ITMT.")]
2453     CpuUnSupport,
2454     #[error("msr must be unique: {0}")]
2455     MsrDuplicate(u32),
2456 }
2457 
2458 #[derive(Error, Debug)]
2459 pub enum HybridSupportError {
2460     #[error("Host CPU doesn't support hybrid architecture.")]
2461     UnsupportedHostCpu,
2462 }
2463 
2464 /// The wrapper for CPUID call functions.
2465 pub struct CpuIdCall {
2466     /// __cpuid_count or a fake function for test.
2467     cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2468     /// __cpuid or a fake function for test.
2469     cpuid: unsafe fn(u32) -> CpuidResult,
2470 }
2471 
2472 impl CpuIdCall {
new( cpuid_count: unsafe fn(u32, u32) -> CpuidResult, cpuid: unsafe fn(u32) -> CpuidResult, ) -> CpuIdCall2473     pub fn new(
2474         cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2475         cpuid: unsafe fn(u32) -> CpuidResult,
2476     ) -> CpuIdCall {
2477         CpuIdCall { cpuid_count, cpuid }
2478     }
2479 }
2480 
2481 /// Check if host supports hybrid CPU feature. The check include:
2482 ///     1. Check if CPUID.1AH exists. CPUID.1AH is hybrid information enumeration leaf.
2483 ///     2. Check if CPUID.07H.00H:EDX[bit 15] sets. This bit means the processor is identified as a
2484 ///        hybrid part.
2485 ///     3. Check if CPUID.1AH:EAX sets. The hybrid core type is set in EAX.
2486 ///
2487 /// # Arguments
2488 ///
2489 /// * - `cpuid` the wrapped cpuid functions used to get CPUID info.
check_host_hybrid_support(cpuid: &CpuIdCall) -> std::result::Result<(), HybridSupportError>2490 pub fn check_host_hybrid_support(cpuid: &CpuIdCall) -> std::result::Result<(), HybridSupportError> {
2491     // CPUID.0H.EAX returns maximum input value for basic CPUID information.
2492     //
2493     // SAFETY:
2494     // Safe because we pass 0 for this call and the host supports the
2495     // `cpuid` instruction.
2496     let mut cpuid_entry = unsafe { (cpuid.cpuid)(0x0) };
2497     if cpuid_entry.eax < 0x1A {
2498         return Err(HybridSupportError::UnsupportedHostCpu);
2499     }
2500     // SAFETY:
2501     // Safe because we pass 0x7 and 0 for this call and the host supports the
2502     // `cpuid` instruction.
2503     cpuid_entry = unsafe { (cpuid.cpuid_count)(0x7, 0) };
2504     if cpuid_entry.edx & 1 << EDX_HYBRID_CPU_SHIFT == 0 {
2505         return Err(HybridSupportError::UnsupportedHostCpu);
2506     }
2507     // From SDM, if a value entered for CPUID.EAX is less than or equal to the
2508     // maximum input value and the leaf is not supported on that processor then
2509     // 0 is returned in all the registers.
2510     // For the CPU with hybrid support, its CPUID.1AH.EAX shouldn't be zero.
2511     //
2512     // SAFETY:
2513     // Safe because we pass 0 for this call and the host supports the
2514     // `cpuid` instruction.
2515     cpuid_entry = unsafe { (cpuid.cpuid)(0x1A) };
2516     if cpuid_entry.eax == 0 {
2517         return Err(HybridSupportError::UnsupportedHostCpu);
2518     }
2519     Ok(())
2520 }
2521 
2522 #[cfg(test)]
2523 mod tests {
2524     use std::mem::size_of;
2525 
2526     use super::*;
2527 
setup() -> ArchMemoryLayout2528     fn setup() -> ArchMemoryLayout {
2529         let pci_config = PciConfig {
2530             ecam: Some(MemoryRegionConfig {
2531                 start: 3 * GB,
2532                 size: Some(256 * MB),
2533             }),
2534             mem: Some(MemoryRegionConfig {
2535                 start: 2 * GB,
2536                 size: None,
2537             }),
2538         };
2539         create_arch_memory_layout(&pci_config, false).unwrap()
2540     }
2541 
2542     #[test]
regions_lt_4gb_nobios()2543     fn regions_lt_4gb_nobios() {
2544         let arch_memory_layout = setup();
2545         let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, /* bios_size */ None);
2546         assert_eq!(
2547             regions,
2548             [
2549                 (
2550                     GuestAddress(0),
2551                     640 * KB,
2552                     MemoryRegionOptions {
2553                         align: 0,
2554                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2555                         file_backed: None,
2556                     },
2557                 ),
2558                 (
2559                     GuestAddress(640 * KB),
2560                     384 * KB,
2561                     MemoryRegionOptions {
2562                         align: 0,
2563                         purpose: MemoryRegionPurpose::ReservedMemory,
2564                         file_backed: None,
2565                     },
2566                 ),
2567                 (
2568                     GuestAddress(1 * MB),
2569                     512 * MB - 1 * MB,
2570                     MemoryRegionOptions {
2571                         align: 0,
2572                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2573                         file_backed: None,
2574                     },
2575                 )
2576             ]
2577         );
2578     }
2579 
2580     #[test]
regions_gt_4gb_nobios()2581     fn regions_gt_4gb_nobios() {
2582         let arch_memory_layout = setup();
2583         let size = 4 * GB + 0x8000;
2584         let regions = arch_memory_regions(&arch_memory_layout, size, /* bios_size */ None);
2585         assert_eq!(
2586             regions,
2587             [
2588                 (
2589                     GuestAddress(0),
2590                     640 * KB,
2591                     MemoryRegionOptions {
2592                         align: 0,
2593                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2594                         file_backed: None,
2595                     },
2596                 ),
2597                 (
2598                     GuestAddress(640 * KB),
2599                     384 * KB,
2600                     MemoryRegionOptions {
2601                         align: 0,
2602                         purpose: MemoryRegionPurpose::ReservedMemory,
2603                         file_backed: None,
2604                     },
2605                 ),
2606                 (
2607                     GuestAddress(1 * MB),
2608                     2 * GB - 1 * MB,
2609                     MemoryRegionOptions {
2610                         align: 0,
2611                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2612                         file_backed: None,
2613                     },
2614                 ),
2615                 (
2616                     GuestAddress(4 * GB),
2617                     2 * GB + 0x8000,
2618                     MemoryRegionOptions {
2619                         align: 0,
2620                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2621                         file_backed: None,
2622                     },
2623                 ),
2624             ]
2625         );
2626     }
2627 
2628     #[test]
regions_lt_4gb_bios()2629     fn regions_lt_4gb_bios() {
2630         let arch_memory_layout = setup();
2631         let bios_len = 1 * MB;
2632         let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, Some(bios_len));
2633         assert_eq!(
2634             regions,
2635             [
2636                 (
2637                     GuestAddress(0),
2638                     640 * KB,
2639                     MemoryRegionOptions {
2640                         align: 0,
2641                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2642                         file_backed: None,
2643                     },
2644                 ),
2645                 (
2646                     GuestAddress(640 * KB),
2647                     384 * KB,
2648                     MemoryRegionOptions {
2649                         align: 0,
2650                         purpose: MemoryRegionPurpose::ReservedMemory,
2651                         file_backed: None,
2652                     },
2653                 ),
2654                 (
2655                     GuestAddress(1 * MB),
2656                     512 * MB - 1 * MB,
2657                     MemoryRegionOptions {
2658                         align: 0,
2659                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2660                         file_backed: None,
2661                     },
2662                 ),
2663                 (
2664                     GuestAddress(4 * GB - bios_len),
2665                     bios_len,
2666                     MemoryRegionOptions {
2667                         align: 0,
2668                         purpose: MemoryRegionPurpose::Bios,
2669                         file_backed: None,
2670                     },
2671                 ),
2672             ]
2673         );
2674     }
2675 
2676     #[test]
regions_gt_4gb_bios()2677     fn regions_gt_4gb_bios() {
2678         let arch_memory_layout = setup();
2679         let bios_len = 1 * MB;
2680         let regions = arch_memory_regions(&arch_memory_layout, 4 * GB + 0x8000, Some(bios_len));
2681         assert_eq!(
2682             regions,
2683             [
2684                 (
2685                     GuestAddress(0),
2686                     640 * KB,
2687                     MemoryRegionOptions {
2688                         align: 0,
2689                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2690                         file_backed: None,
2691                     },
2692                 ),
2693                 (
2694                     GuestAddress(640 * KB),
2695                     384 * KB,
2696                     MemoryRegionOptions {
2697                         align: 0,
2698                         purpose: MemoryRegionPurpose::ReservedMemory,
2699                         file_backed: None,
2700                     },
2701                 ),
2702                 (
2703                     GuestAddress(1 * MB),
2704                     2 * GB - 1 * MB,
2705                     MemoryRegionOptions {
2706                         align: 0,
2707                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2708                         file_backed: None,
2709                     },
2710                 ),
2711                 (
2712                     GuestAddress(4 * GB - bios_len),
2713                     bios_len,
2714                     MemoryRegionOptions {
2715                         align: 0,
2716                         purpose: MemoryRegionPurpose::Bios,
2717                         file_backed: None,
2718                     },
2719                 ),
2720                 (
2721                     GuestAddress(4 * GB),
2722                     2 * GB + 0x8000,
2723                     MemoryRegionOptions {
2724                         align: 0,
2725                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2726                         file_backed: None,
2727                     },
2728                 ),
2729             ]
2730         );
2731     }
2732 
2733     #[test]
regions_eq_4gb_nobios()2734     fn regions_eq_4gb_nobios() {
2735         let arch_memory_layout = setup();
2736         // Test with exact size of 4GB - the overhead.
2737         let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, /* bios_size */ None);
2738         assert_eq!(
2739             regions,
2740             [
2741                 (
2742                     GuestAddress(0),
2743                     640 * KB,
2744                     MemoryRegionOptions {
2745                         align: 0,
2746                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2747                         file_backed: None,
2748                     },
2749                 ),
2750                 (
2751                     GuestAddress(640 * KB),
2752                     384 * KB,
2753                     MemoryRegionOptions {
2754                         align: 0,
2755                         purpose: MemoryRegionPurpose::ReservedMemory,
2756                         file_backed: None,
2757                     },
2758                 ),
2759                 (
2760                     GuestAddress(1 * MB),
2761                     2 * GB - 1 * MB,
2762                     MemoryRegionOptions {
2763                         align: 0,
2764                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2765                         file_backed: None,
2766                     },
2767                 )
2768             ]
2769         );
2770     }
2771 
2772     #[test]
regions_eq_4gb_bios()2773     fn regions_eq_4gb_bios() {
2774         let arch_memory_layout = setup();
2775         // Test with exact size of 4GB - the overhead.
2776         let bios_len = 1 * MB;
2777         let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, Some(bios_len));
2778         assert_eq!(
2779             regions,
2780             [
2781                 (
2782                     GuestAddress(0),
2783                     640 * KB,
2784                     MemoryRegionOptions {
2785                         align: 0,
2786                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2787                         file_backed: None,
2788                     },
2789                 ),
2790                 (
2791                     GuestAddress(640 * KB),
2792                     384 * KB,
2793                     MemoryRegionOptions {
2794                         align: 0,
2795                         purpose: MemoryRegionPurpose::ReservedMemory,
2796                         file_backed: None,
2797                     },
2798                 ),
2799                 (
2800                     GuestAddress(1 * MB),
2801                     2 * GB - 1 * MB,
2802                     MemoryRegionOptions {
2803                         align: 0,
2804                         purpose: MemoryRegionPurpose::GuestMemoryRegion,
2805                         file_backed: None,
2806                     },
2807                 ),
2808                 (
2809                     GuestAddress(4 * GB - bios_len),
2810                     bios_len,
2811                     MemoryRegionOptions {
2812                         align: 0,
2813                         purpose: MemoryRegionPurpose::Bios,
2814                         file_backed: None,
2815                     },
2816                 ),
2817             ]
2818         );
2819     }
2820 
2821     #[test]
check_pci_mmio_layout()2822     fn check_pci_mmio_layout() {
2823         let arch_memory_layout = setup();
2824 
2825         assert_eq!(arch_memory_layout.pci_mmio_before_32bit.start, 2 * GB);
2826         assert_eq!(arch_memory_layout.pcie_cfg_mmio.start, 3 * GB);
2827         assert_eq!(arch_memory_layout.pcie_cfg_mmio.len().unwrap(), 256 * MB);
2828     }
2829 
2830     #[test]
check_32bit_gap_size_alignment()2831     fn check_32bit_gap_size_alignment() {
2832         let arch_memory_layout = setup();
2833         // pci_mmio_before_32bit is 256 MB aligned to be friendly for MTRR mappings.
2834         assert_eq!(
2835             arch_memory_layout.pci_mmio_before_32bit.start % (256 * MB),
2836             0
2837         );
2838     }
2839 
2840     #[test]
write_setup_data_empty()2841     fn write_setup_data_empty() {
2842         let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2843         let setup_data = [];
2844         let setup_data_addr = write_setup_data(
2845             &mem,
2846             GuestAddress(0x1000),
2847             GuestAddress(0x2000),
2848             &setup_data,
2849         )
2850         .expect("write_setup_data");
2851         assert_eq!(setup_data_addr, None);
2852     }
2853 
2854     #[test]
write_setup_data_two_of_them()2855     fn write_setup_data_two_of_them() {
2856         let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2857 
2858         let entry1_addr = GuestAddress(0x1000);
2859         let entry1_next_addr = entry1_addr;
2860         let entry1_len_addr = entry1_addr.checked_add(12).unwrap();
2861         let entry1_data_addr = entry1_addr.checked_add(16).unwrap();
2862         let entry1_data = [0x55u8; 13];
2863         let entry1_size = (size_of::<setup_data_hdr>() + entry1_data.len()) as u64;
2864         let entry1_align = 3;
2865 
2866         let entry2_addr = GuestAddress(entry1_addr.offset() + entry1_size + entry1_align);
2867         let entry2_next_addr = entry2_addr;
2868         let entry2_len_addr = entry2_addr.checked_add(12).unwrap();
2869         let entry2_data_addr = entry2_addr.checked_add(16).unwrap();
2870         let entry2_data = [0xAAu8; 9];
2871 
2872         let setup_data = [
2873             SetupData {
2874                 data: entry1_data.to_vec(),
2875                 type_: SetupDataType::Dtb,
2876             },
2877             SetupData {
2878                 data: entry2_data.to_vec(),
2879                 type_: SetupDataType::Dtb,
2880             },
2881         ];
2882 
2883         let setup_data_head_addr = write_setup_data(
2884             &mem,
2885             GuestAddress(0x1000),
2886             GuestAddress(0x2000),
2887             &setup_data,
2888         )
2889         .expect("write_setup_data");
2890         assert_eq!(setup_data_head_addr, Some(entry1_addr));
2891 
2892         assert_eq!(
2893             mem.read_obj_from_addr::<u64>(entry1_next_addr).unwrap(),
2894             entry2_addr.offset()
2895         );
2896         assert_eq!(
2897             mem.read_obj_from_addr::<u32>(entry1_len_addr).unwrap(),
2898             entry1_data.len() as u32
2899         );
2900         assert_eq!(
2901             mem.read_obj_from_addr::<[u8; 13]>(entry1_data_addr)
2902                 .unwrap(),
2903             entry1_data
2904         );
2905 
2906         assert_eq!(mem.read_obj_from_addr::<u64>(entry2_next_addr).unwrap(), 0);
2907         assert_eq!(
2908             mem.read_obj_from_addr::<u32>(entry2_len_addr).unwrap(),
2909             entry2_data.len() as u32
2910         );
2911         assert_eq!(
2912             mem.read_obj_from_addr::<[u8; 9]>(entry2_data_addr).unwrap(),
2913             entry2_data
2914         );
2915     }
2916 
2917     #[test]
cmdline_overflow()2918     fn cmdline_overflow() {
2919         const MEM_SIZE: u64 = 0x1000;
2920         let gm = GuestMemory::new(&[(GuestAddress(0x0), MEM_SIZE)]).unwrap();
2921         let mut cmdline = kernel_cmdline::Cmdline::new();
2922         cmdline.insert_str("12345").unwrap();
2923         let cmdline_address = GuestAddress(MEM_SIZE - 5);
2924         let err =
2925             X8664arch::load_cmdline(&gm, cmdline_address, cmdline, CMDLINE_MAX_SIZE as usize - 1)
2926                 .unwrap_err();
2927         assert!(matches!(err, Error::CommandLineOverflow));
2928     }
2929 
2930     #[test]
cmdline_write_end()2931     fn cmdline_write_end() {
2932         const MEM_SIZE: u64 = 0x1000;
2933         let gm = GuestMemory::new(&[(GuestAddress(0x0), MEM_SIZE)]).unwrap();
2934         let mut cmdline = kernel_cmdline::Cmdline::new();
2935         cmdline.insert_str("1234").unwrap();
2936         let mut cmdline_address = GuestAddress(45);
2937         X8664arch::load_cmdline(&gm, cmdline_address, cmdline, CMDLINE_MAX_SIZE as usize - 1)
2938             .unwrap();
2939         let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2940         assert_eq!(val, b'1');
2941         cmdline_address = cmdline_address.unchecked_add(1);
2942         let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2943         assert_eq!(val, b'2');
2944         cmdline_address = cmdline_address.unchecked_add(1);
2945         let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2946         assert_eq!(val, b'3');
2947         cmdline_address = cmdline_address.unchecked_add(1);
2948         let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2949         assert_eq!(val, b'4');
2950         cmdline_address = cmdline_address.unchecked_add(1);
2951         let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2952         assert_eq!(val, b'\0');
2953     }
2954 }
2955