1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! x86 architecture support.
6
7 #![cfg(target_arch = "x86_64")]
8
9 mod fdt;
10
11 #[cfg(feature = "gdb")]
12 mod gdb;
13
14 const SETUP_DTB: u32 = 2;
15 const SETUP_RNG_SEED: u32 = 9;
16
17 #[allow(dead_code)]
18 #[allow(non_upper_case_globals)]
19 #[allow(non_camel_case_types)]
20 #[allow(non_snake_case)]
21 pub mod bootparam;
22
23 #[allow(dead_code)]
24 #[allow(non_upper_case_globals)]
25 mod msr_index;
26
27 #[allow(dead_code)]
28 #[allow(non_upper_case_globals)]
29 #[allow(non_camel_case_types)]
30 #[allow(clippy::all)]
31 mod mpspec;
32
33 pub mod multiboot_spec;
34
35 pub mod acpi;
36 mod bzimage;
37 pub mod cpuid;
38 mod gdt;
39 pub mod interrupts;
40 pub mod mptable;
41 pub mod regs;
42 pub mod smbios;
43
44 use std::arch::x86_64::CpuidResult;
45 use std::collections::BTreeMap;
46 use std::fmt;
47 use std::fs::File;
48 use std::io;
49 use std::io::Write;
50 use std::mem;
51 use std::path::PathBuf;
52 use std::sync::mpsc;
53 use std::sync::Arc;
54
55 use acpi_tables::aml;
56 use acpi_tables::aml::Aml;
57 use acpi_tables::sdt::SDT;
58 use anyhow::Context;
59 use arch::get_serial_cmdline;
60 use arch::serial::SerialDeviceInfo;
61 use arch::CpuSet;
62 use arch::DtbOverlay;
63 use arch::FdtPosition;
64 use arch::GetSerialCmdlineError;
65 use arch::MemoryRegionConfig;
66 use arch::PciConfig;
67 use arch::RunnableLinuxVm;
68 use arch::VmComponents;
69 use arch::VmImage;
70 use base::debug;
71 use base::info;
72 use base::warn;
73 #[cfg(any(target_os = "android", target_os = "linux"))]
74 use base::AsRawDescriptors;
75 use base::Event;
76 use base::FileGetLen;
77 use base::FileReadWriteAtVolatile;
78 use base::SendTube;
79 use base::Tube;
80 use base::TubeError;
81 use chrono::Utc;
82 pub use cpuid::adjust_cpuid;
83 pub use cpuid::CpuIdContext;
84 use devices::acpi::PM_WAKEUP_GPIO;
85 use devices::Bus;
86 use devices::BusDevice;
87 use devices::BusDeviceObj;
88 use devices::BusResumeDevice;
89 use devices::BusType;
90 use devices::Debugcon;
91 use devices::FwCfgParameters;
92 use devices::IrqChip;
93 use devices::IrqChipX86_64;
94 use devices::IrqEventSource;
95 use devices::PciAddress;
96 use devices::PciConfigIo;
97 use devices::PciConfigMmio;
98 use devices::PciDevice;
99 use devices::PciInterruptPin;
100 use devices::PciRoot;
101 use devices::PciRootCommand;
102 use devices::PciVirtualConfigMmio;
103 use devices::Pflash;
104 #[cfg(any(target_os = "android", target_os = "linux"))]
105 use devices::ProxyDevice;
106 use devices::Serial;
107 use devices::SerialHardware;
108 use devices::SerialParameters;
109 use devices::VirtualPmc;
110 use devices::FW_CFG_BASE_PORT;
111 use devices::FW_CFG_MAX_FILE_SLOTS;
112 use devices::FW_CFG_WIDTH;
113 use hypervisor::CpuConfigX86_64;
114 use hypervisor::Hypervisor;
115 use hypervisor::HypervisorX86_64;
116 use hypervisor::ProtectionType;
117 use hypervisor::VcpuInitX86_64;
118 use hypervisor::VcpuX86_64;
119 use hypervisor::Vm;
120 use hypervisor::VmCap;
121 use hypervisor::VmX86_64;
122 #[cfg(feature = "seccomp_trace")]
123 use jail::read_jail_addr;
124 #[cfg(windows)]
125 use jail::FakeMinijailStub as Minijail;
126 #[cfg(any(target_os = "android", target_os = "linux"))]
127 use minijail::Minijail;
128 use mptable::MPTABLE_START;
129 use multiboot_spec::MultibootInfo;
130 use multiboot_spec::MultibootMmapEntry;
131 use multiboot_spec::MULTIBOOT_BOOTLOADER_MAGIC;
132 use rand::rngs::OsRng;
133 use rand::RngCore;
134 use remain::sorted;
135 use resources::AddressRange;
136 use resources::SystemAllocator;
137 use resources::SystemAllocatorConfig;
138 use sync::Condvar;
139 use sync::Mutex;
140 use thiserror::Error;
141 use vm_control::BatControl;
142 use vm_control::BatteryType;
143 use vm_memory::GuestAddress;
144 use vm_memory::GuestMemory;
145 use vm_memory::GuestMemoryError;
146 use vm_memory::MemoryRegionOptions;
147 use vm_memory::MemoryRegionPurpose;
148 use zerocopy::FromBytes;
149 use zerocopy::Immutable;
150 use zerocopy::IntoBytes;
151 use zerocopy::KnownLayout;
152
153 use crate::bootparam::boot_params;
154 use crate::bootparam::setup_header;
155 use crate::bootparam::XLF_CAN_BE_LOADED_ABOVE_4G;
156 use crate::cpuid::EDX_HYBRID_CPU_SHIFT;
157
158 #[sorted]
159 #[derive(Error, Debug)]
160 pub enum Error {
161 #[error("error allocating a single gpe")]
162 AllocateGpe,
163 #[error("error allocating IO resource: {0}")]
164 AllocateIOResouce(resources::Error),
165 #[error("error allocating a single irq")]
166 AllocateIrq,
167 #[error("unable to clone an Event: {0}")]
168 CloneEvent(base::Error),
169 #[error("failed to clone IRQ chip: {0}")]
170 CloneIrqChip(base::Error),
171 #[cfg(any(target_os = "android", target_os = "linux"))]
172 #[error("failed to clone jail: {0}")]
173 CloneJail(minijail::Error),
174 #[error("unable to clone a Tube: {0}")]
175 CloneTube(TubeError),
176 #[error("the given kernel command line was invalid: {0}")]
177 Cmdline(kernel_cmdline::Error),
178 #[error("failed writing command line to guest memory")]
179 CommandLineCopy,
180 #[error("command line overflowed guest memory")]
181 CommandLineOverflow,
182 #[error("failed to configure hotplugged pci device: {0}")]
183 ConfigurePciDevice(arch::DeviceRegistrationError),
184 #[error("bad PCI ECAM configuration: {0}")]
185 ConfigurePciEcam(String),
186 #[error("bad PCI mem configuration: {0}")]
187 ConfigurePciMem(String),
188 #[error("failed to configure segment registers: {0}")]
189 ConfigureSegments(regs::Error),
190 #[error("error configuring the system")]
191 ConfigureSystem,
192 #[error("unable to create ACPI tables")]
193 CreateAcpi,
194 #[error("unable to create battery devices: {0}")]
195 CreateBatDevices(arch::DeviceRegistrationError),
196 #[error("could not create debugcon device: {0}")]
197 CreateDebugconDevice(devices::SerialError),
198 #[error("unable to make an Event: {0}")]
199 CreateEvent(base::Error),
200 #[error("failed to create fdt: {0}")]
201 CreateFdt(cros_fdt::Error),
202 #[error("failed to create fw_cfg device: {0}")]
203 CreateFwCfgDevice(devices::FwCfgError),
204 #[error("failed to create IOAPIC device: {0}")]
205 CreateIoapicDevice(base::Error),
206 #[error("failed to create a PCI root hub: {0}")]
207 CreatePciRoot(arch::DeviceRegistrationError),
208 #[error("unable to create PIT: {0}")]
209 CreatePit(base::Error),
210 #[error("unable to make PIT device: {0}")]
211 CreatePitDevice(devices::PitError),
212 #[cfg(any(target_os = "android", target_os = "linux"))]
213 #[error("unable to create proxy device: {0}")]
214 CreateProxyDevice(devices::ProxyError),
215 #[error("unable to create serial devices: {0}")]
216 CreateSerialDevices(arch::DeviceRegistrationError),
217 #[error("failed to create socket: {0}")]
218 CreateSocket(io::Error),
219 #[error("failed to create tube: {0}")]
220 CreateTube(base::TubeError),
221 #[error("failed to create VCPU: {0}")]
222 CreateVcpu(base::Error),
223 #[error("invalid e820 setup params")]
224 E820Configuration,
225 #[error("failed to enable singlestep execution: {0}")]
226 EnableSinglestep(base::Error),
227 #[error("failed to enable split irqchip: {0}")]
228 EnableSplitIrqchip(base::Error),
229 #[error("failed to get serial cmdline: {0}")]
230 GetSerialCmdline(GetSerialCmdlineError),
231 #[error("failed to insert device onto bus: {0}")]
232 InsertBus(devices::BusError),
233 #[error("the kernel extends past the end of RAM")]
234 InvalidCpuConfig,
235 #[error("invalid CPU config parameters")]
236 KernelOffsetPastEnd,
237 #[error("error loading bios: {0}")]
238 LoadBios(io::Error),
239 #[error("error loading kernel bzImage: {0}")]
240 LoadBzImage(bzimage::Error),
241 #[error("error loading custom pVM firmware: {0}")]
242 LoadCustomPvmFw(arch::LoadImageError),
243 #[error("error loading initrd: {0}")]
244 LoadInitrd(arch::LoadImageError),
245 #[error("error loading Kernel: {0}")]
246 LoadKernel(kernel_loader::Error),
247 #[error("error loading pflash: {0}")]
248 LoadPflash(io::Error),
249 #[error("error loading pVM firmware: {0}")]
250 LoadPvmFw(base::Error),
251 #[error("error in multiboot_info setup")]
252 MultibootInfoSetup,
253 #[error("error translating address: Page not present")]
254 PageNotPresent,
255 #[error("pci mmio overlaps with pVM firmware memory")]
256 PciMmioOverlapPvmFw,
257 #[error("pVM firmware not supported when bios is used on x86_64")]
258 PvmFwBiosUnsupported,
259 #[error("error reading guest memory {0}")]
260 ReadingGuestMemory(vm_memory::GuestMemoryError),
261 #[error("single register read not supported on x86_64")]
262 ReadRegIsUnsupported,
263 #[error("error reading CPU registers {0}")]
264 ReadRegs(base::Error),
265 #[error("error registering an IrqFd: {0}")]
266 RegisterIrqfd(base::Error),
267 #[error("error registering virtual socket device: {0}")]
268 RegisterVsock(arch::DeviceRegistrationError),
269 #[error("error reserved pcie config mmio")]
270 ReservePcieCfgMmio(resources::Error),
271 #[error("failed to set a hardware breakpoint: {0}")]
272 SetHwBreakpoint(base::Error),
273 #[error("failed to set identity map addr: {0}")]
274 SetIdentityMapAddr(base::Error),
275 #[error("failed to set interrupts: {0}")]
276 SetLint(interrupts::Error),
277 #[error("failed to set tss addr: {0}")]
278 SetTssAddr(base::Error),
279 #[error("failed to set up cmos: {0}")]
280 SetupCmos(anyhow::Error),
281 #[error("failed to set up cpuid: {0}")]
282 SetupCpuid(cpuid::Error),
283 #[error("setup data too large")]
284 SetupDataTooLarge,
285 #[error("failed to set up FPU: {0}")]
286 SetupFpu(base::Error),
287 #[error("failed to set up guest memory: {0}")]
288 SetupGuestMemory(GuestMemoryError),
289 #[error("failed to set up mptable: {0}")]
290 SetupMptable(mptable::Error),
291 #[error("failed to set up MSRs: {0}")]
292 SetupMsrs(base::Error),
293 #[error("failed to set up page tables: {0}")]
294 SetupPageTables(regs::Error),
295 #[error("failed to set up pflash: {0}")]
296 SetupPflash(anyhow::Error),
297 #[error("failed to set up registers: {0}")]
298 SetupRegs(regs::Error),
299 #[error("failed to set up SMBIOS: {0}")]
300 SetupSmbios(smbios::Error),
301 #[error("failed to set up sregs: {0}")]
302 SetupSregs(base::Error),
303 #[error("too many vCPUs")]
304 TooManyVcpus,
305 #[error("failed to translate virtual address")]
306 TranslatingVirtAddr,
307 #[error("protected VMs not supported on x86_64")]
308 UnsupportedProtectionType,
309 #[error("single register write not supported on x86_64")]
310 WriteRegIsUnsupported,
311 #[error("error writing CPU registers {0}")]
312 WriteRegs(base::Error),
313 #[error("error writing guest memory {0}")]
314 WritingGuestMemory(GuestMemoryError),
315 #[error("error writing setup_data: {0}")]
316 WritingSetupData(GuestMemoryError),
317 #[error("the zero page extends past the end of guest_mem")]
318 ZeroPagePastRamEnd,
319 #[error("error writing the zero page of guest memory")]
320 ZeroPageSetup,
321 }
322
323 pub type Result<T> = std::result::Result<T, Error>;
324
325 pub struct X8664arch;
326
327 // Like `bootparam::setup_data` without the incomplete array field at the end, which allows us to
328 // safely implement Copy, Clone
329 #[repr(C)]
330 #[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
331 struct setup_data_hdr {
332 pub next: u64,
333 pub type_: u32,
334 pub len: u32,
335 }
336
337 #[repr(u32)]
338 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
339 pub enum SetupDataType {
340 Dtb = SETUP_DTB,
341 RngSeed = SETUP_RNG_SEED,
342 }
343
344 /// A single entry to be inserted in the bootparam `setup_data` linked list.
345 pub struct SetupData {
346 pub data: Vec<u8>,
347 pub type_: SetupDataType,
348 }
349
350 #[derive(Copy, Clone, Debug)]
351 enum E820Type {
352 Ram = 0x01,
353 Reserved = 0x2,
354 }
355
356 #[derive(Copy, Clone, Debug)]
357 struct E820Entry {
358 pub address: GuestAddress,
359 pub len: u64,
360 pub mem_type: E820Type,
361 }
362
363 const KB: u64 = 1 << 10;
364 const MB: u64 = 1 << 20;
365 const GB: u64 = 1 << 30;
366
367 pub const BOOT_STACK_POINTER: u64 = 0x8000;
368 const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32;
369 // Make sure it align to 256MB for MTRR convenient
370 const MEM_32BIT_GAP_SIZE: u64 = 768 * MB;
371 // Reserved memory for nand_bios/LAPIC/IOAPIC/HPET/.....
372 const RESERVED_MEM_SIZE: u64 = 0x800_0000;
373 const DEFAULT_PCI_MEM_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
374 // Reserve 64MB for pcie enhanced configuration
375 const DEFAULT_PCIE_CFG_MMIO_SIZE: u64 = 0x400_0000;
376 const DEFAULT_PCIE_CFG_MMIO_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
377 const DEFAULT_PCIE_CFG_MMIO_START: u64 = DEFAULT_PCIE_CFG_MMIO_END - DEFAULT_PCIE_CFG_MMIO_SIZE + 1;
378 // Linux (with 4-level paging) has a physical memory limit of 46 bits (64 TiB).
379 const HIGH_MMIO_MAX_END: u64 = (1u64 << 46) - 1;
380 pub const KERNEL_32BIT_ENTRY_OFFSET: u64 = 0x0;
381 pub const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
382 pub const MULTIBOOT_INFO_OFFSET: u64 = 0x6000;
383 pub const MULTIBOOT_INFO_SIZE: u64 = 0x1000;
384 pub const ZERO_PAGE_OFFSET: u64 = 0x7000;
385 // Set BIOS max size to 16M: this is used only when `unrestricted guest` is disabled
386 const BIOS_MAX_SIZE: u64 = 0x1000000;
387
388 pub const KERNEL_START_OFFSET: u64 = 0x20_0000;
389 const CMDLINE_OFFSET: u64 = 0x2_0000;
390 const CMDLINE_MAX_SIZE: u64 = 0x800; // including terminating zero
391 const SETUP_DATA_START: u64 = CMDLINE_OFFSET + CMDLINE_MAX_SIZE;
392 const SETUP_DATA_END: u64 = MPTABLE_START;
393 const X86_64_SERIAL_1_3_IRQ: u32 = 4;
394 const X86_64_SERIAL_2_4_IRQ: u32 = 3;
395 // X86_64_SCI_IRQ is used to fill the ACPI FACP table.
396 // The sci_irq number is better to be a legacy
397 // IRQ number which is less than 16(actually most of the
398 // platforms have fixed IRQ number 9). So we can
399 // reserve the IRQ number 5 for SCI and let the
400 // the other devices starts from next.
401 pub const X86_64_SCI_IRQ: u32 = 5;
402 // The CMOS RTC uses IRQ 8; start allocating IRQs at 9.
403 pub const X86_64_IRQ_BASE: u32 = 9;
404 const ACPI_HI_RSDP_WINDOW_BASE: u64 = 0x000E_0000;
405
406 // pVM firmware memory. Should be within the low 4GB, so that it is identity-mapped
407 // by setup_page_tables() when a protected VM boots in long mode, since the pVM firmware is
408 // the VM entry point.
409 const PROTECTED_VM_FW_MAX_SIZE: u64 = 0x40_0000;
410 // Load the pVM firmware just below 2 GB to allow use of `-mcmodel=small`.
411 const PROTECTED_VM_FW_START: u64 = 0x8000_0000 - PROTECTED_VM_FW_MAX_SIZE;
412
413 #[derive(Debug, PartialEq, Eq)]
414 pub enum CpuManufacturer {
415 Intel,
416 Amd,
417 Unknown,
418 }
419
get_cpu_manufacturer() -> CpuManufacturer420 pub fn get_cpu_manufacturer() -> CpuManufacturer {
421 cpuid::cpu_manufacturer()
422 }
423
424 pub struct ArchMemoryLayout {
425 // the pci mmio range below 4G
426 pci_mmio_before_32bit: AddressRange,
427 // the pcie cfg mmio range
428 pcie_cfg_mmio: AddressRange,
429 // the pVM firmware memory (if running a protected VM)
430 pvmfw_mem: Option<AddressRange>,
431 }
432
create_arch_memory_layout( pci_config: &PciConfig, has_protected_vm_firmware: bool, ) -> Result<ArchMemoryLayout>433 pub fn create_arch_memory_layout(
434 pci_config: &PciConfig,
435 has_protected_vm_firmware: bool,
436 ) -> Result<ArchMemoryLayout> {
437 // the max bus number is 256 and each bus occupy 1MB, so the max pcie cfg mmio size = 256M
438 const MAX_PCIE_ECAM_SIZE: u64 = 256 * MB;
439 let pcie_cfg_mmio = match pci_config.ecam {
440 Some(MemoryRegionConfig {
441 start,
442 size: Some(size),
443 }) => AddressRange::from_start_and_size(start, size.min(MAX_PCIE_ECAM_SIZE)).unwrap(),
444 Some(MemoryRegionConfig { start, size: None }) => {
445 AddressRange::from_start_and_end(start, DEFAULT_PCIE_CFG_MMIO_END)
446 }
447 None => {
448 AddressRange::from_start_and_end(DEFAULT_PCIE_CFG_MMIO_START, DEFAULT_PCIE_CFG_MMIO_END)
449 }
450 };
451 if pcie_cfg_mmio.start % pcie_cfg_mmio.len().unwrap() != 0
452 || pcie_cfg_mmio.start % MB != 0
453 || pcie_cfg_mmio.len().unwrap() % MB != 0
454 {
455 return Err(Error::ConfigurePciEcam(
456 "base and len must be aligned to 1MB and base must be a multiple of len".to_string(),
457 ));
458 }
459 if pcie_cfg_mmio.end >= 0x1_0000_0000 {
460 return Err(Error::ConfigurePciEcam(
461 "end address can't go beyond 4G".to_string(),
462 ));
463 }
464
465 let pci_mmio_before_32bit = match pci_config.mem {
466 Some(MemoryRegionConfig {
467 start,
468 size: Some(size),
469 }) => AddressRange::from_start_and_size(start, size)
470 .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
471 Some(MemoryRegionConfig { start, size: None }) => {
472 AddressRange::from_start_and_end(start, DEFAULT_PCI_MEM_END)
473 }
474 None => AddressRange::from_start_and_end(
475 pcie_cfg_mmio
476 .start
477 .min(FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE),
478 DEFAULT_PCI_MEM_END,
479 ),
480 };
481
482 let pvmfw_mem = if has_protected_vm_firmware {
483 let range = AddressRange {
484 start: PROTECTED_VM_FW_START,
485 end: PROTECTED_VM_FW_START + PROTECTED_VM_FW_MAX_SIZE - 1,
486 };
487 if !pci_mmio_before_32bit.intersect(range).is_empty() {
488 return Err(Error::PciMmioOverlapPvmFw);
489 }
490
491 Some(range)
492 } else {
493 None
494 };
495
496 Ok(ArchMemoryLayout {
497 pci_mmio_before_32bit,
498 pcie_cfg_mmio,
499 pvmfw_mem,
500 })
501 }
502
503 /// The x86 reset vector for i386+ and x86_64 puts the processor into an "unreal mode" where it
504 /// can access the last 1 MB of the 32-bit address space in 16-bit mode, and starts the instruction
505 /// pointer at the effective physical address 0xFFFF_FFF0.
bios_start(bios_size: u64) -> GuestAddress506 fn bios_start(bios_size: u64) -> GuestAddress {
507 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_size)
508 }
509
identity_map_addr_start() -> GuestAddress510 fn identity_map_addr_start() -> GuestAddress {
511 // Set Identity map address 4 pages before the max BIOS size
512 GuestAddress(FIRST_ADDR_PAST_32BITS - BIOS_MAX_SIZE - 4 * 0x1000)
513 }
514
tss_addr_start() -> GuestAddress515 fn tss_addr_start() -> GuestAddress {
516 // Set TSS address one page after identity map address
517 GuestAddress(identity_map_addr_start().offset() + 0x1000)
518 }
519
tss_addr_end() -> GuestAddress520 fn tss_addr_end() -> GuestAddress {
521 // Set TSS address section to have 3 pages
522 GuestAddress(tss_addr_start().offset() + 0x3000)
523 }
524
configure_boot_params( guest_mem: &GuestMemory, cmdline_addr: GuestAddress, setup_data: Option<GuestAddress>, initrd: Option<(GuestAddress, usize)>, mut params: boot_params, e820_entries: &[E820Entry], ) -> Result<()>525 fn configure_boot_params(
526 guest_mem: &GuestMemory,
527 cmdline_addr: GuestAddress,
528 setup_data: Option<GuestAddress>,
529 initrd: Option<(GuestAddress, usize)>,
530 mut params: boot_params,
531 e820_entries: &[E820Entry],
532 ) -> Result<()> {
533 const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
534 const KERNEL_HDR_MAGIC: u32 = 0x5372_6448;
535 const KERNEL_LOADER_OTHER: u8 = 0xff;
536 const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x100_0000; // Must be non-zero.
537
538 params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
539 params.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
540 params.hdr.header = KERNEL_HDR_MAGIC;
541 params.hdr.cmd_line_ptr = cmdline_addr.offset() as u32;
542 params.ext_cmd_line_ptr = (cmdline_addr.offset() >> 32) as u32;
543 params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
544 if let Some(setup_data) = setup_data {
545 params.hdr.setup_data = setup_data.offset();
546 }
547 if let Some((initrd_addr, initrd_size)) = initrd {
548 params.hdr.ramdisk_image = initrd_addr.offset() as u32;
549 params.ext_ramdisk_image = (initrd_addr.offset() >> 32) as u32;
550 params.hdr.ramdisk_size = initrd_size as u32;
551 params.ext_ramdisk_size = (initrd_size as u64 >> 32) as u32;
552 }
553
554 if e820_entries.len() >= params.e820_table.len() {
555 return Err(Error::E820Configuration);
556 }
557
558 for (src, dst) in e820_entries.iter().zip(params.e820_table.iter_mut()) {
559 dst.addr = src.address.offset();
560 dst.size = src.len;
561 dst.type_ = src.mem_type as u32;
562 }
563 params.e820_entries = e820_entries.len() as u8;
564
565 let zero_page_addr = GuestAddress(ZERO_PAGE_OFFSET);
566 if !guest_mem.is_valid_range(zero_page_addr, mem::size_of::<boot_params>() as u64) {
567 return Err(Error::ZeroPagePastRamEnd);
568 }
569
570 guest_mem
571 .write_obj_at_addr(params, zero_page_addr)
572 .map_err(|_| Error::ZeroPageSetup)?;
573
574 Ok(())
575 }
576
configure_multiboot_info( guest_mem: &GuestMemory, cmdline_addr: GuestAddress, e820_entries: &[E820Entry], ) -> Result<()>577 fn configure_multiboot_info(
578 guest_mem: &GuestMemory,
579 cmdline_addr: GuestAddress,
580 e820_entries: &[E820Entry],
581 ) -> Result<()> {
582 let mut multiboot_info = MultibootInfo {
583 ..Default::default()
584 };
585
586 // Extra Multiboot-related data is added directly after the info structure.
587 let mut multiboot_data_addr =
588 GuestAddress(MULTIBOOT_INFO_OFFSET + mem::size_of_val(&multiboot_info) as u64);
589 multiboot_data_addr = multiboot_data_addr
590 .align(16)
591 .ok_or(Error::MultibootInfoSetup)?;
592
593 // mem_lower is the amount of RAM below 1 MB, in units of KiB.
594 let mem_lower = guest_mem
595 .regions()
596 .filter(|r| {
597 r.options.purpose == MemoryRegionPurpose::GuestMemoryRegion
598 && r.guest_addr.offset() < 1 * MB
599 })
600 .map(|r| r.size as u64)
601 .sum::<u64>()
602 / KB;
603
604 // mem_upper is the amount of RAM above 1 MB up to the first memory hole, in units of KiB.
605 // We don't have the ISA 15-16 MB hole, so this includes all RAM from 1 MB up to the
606 // beginning of the PCI hole just below 4 GB.
607 let mem_upper = guest_mem
608 .regions()
609 .filter(|r| {
610 r.options.purpose == MemoryRegionPurpose::GuestMemoryRegion
611 && r.guest_addr.offset() >= 1 * MB
612 && r.guest_addr.offset() < 4 * GB
613 })
614 .map(|r| r.size as u64)
615 .sum::<u64>()
616 / KB;
617
618 multiboot_info.mem_lower = mem_lower as u32;
619 multiboot_info.mem_upper = mem_upper as u32;
620 multiboot_info.flags |= MultibootInfo::F_MEM;
621
622 // Memory map - convert from params.e820_table to Multiboot format.
623 let multiboot_mmap: Vec<MultibootMmapEntry> = e820_entries
624 .iter()
625 .map(|e820_entry| MultibootMmapEntry {
626 size: 20, // size of the entry, not including the size field itself
627 base_addr: e820_entry.address.offset(),
628 length: e820_entry.len,
629 type_: e820_entry.mem_type as u32,
630 })
631 .collect();
632 let multiboot_mmap_bytes = multiboot_mmap.as_bytes();
633 let multiboot_mmap_addr =
634 append_multiboot_info(guest_mem, &mut multiboot_data_addr, multiboot_mmap_bytes)?;
635 multiboot_info.mmap_addr = multiboot_mmap_addr.offset() as u32;
636 multiboot_info.mmap_length = multiboot_mmap_bytes.len() as u32;
637 multiboot_info.flags |= MultibootInfo::F_MMAP;
638
639 // Command line
640 multiboot_info.cmdline = cmdline_addr.offset() as u32;
641 multiboot_info.flags |= MultibootInfo::F_CMDLINE;
642
643 // Boot loader name
644 let boot_loader_name_addr =
645 append_multiboot_info(guest_mem, &mut multiboot_data_addr, b"crosvm\0")?;
646 multiboot_info.boot_loader_name = boot_loader_name_addr.offset() as u32;
647 multiboot_info.flags |= MultibootInfo::F_BOOT_LOADER_NAME;
648
649 guest_mem
650 .write_obj_at_addr(multiboot_info, GuestAddress(MULTIBOOT_INFO_OFFSET))
651 .map_err(|_| Error::MultibootInfoSetup)?;
652
653 Ok(())
654 }
655
append_multiboot_info( guest_mem: &GuestMemory, addr: &mut GuestAddress, data: &[u8], ) -> Result<GuestAddress>656 fn append_multiboot_info(
657 guest_mem: &GuestMemory,
658 addr: &mut GuestAddress,
659 data: &[u8],
660 ) -> Result<GuestAddress> {
661 let data_addr = *addr;
662 let new_addr = addr
663 .checked_add(data.len() as u64)
664 .and_then(|a| a.align(16))
665 .ok_or(Error::MultibootInfoSetup)?;
666
667 // Make sure we don't write beyond the region reserved for Multiboot info.
668 if new_addr.offset() - MULTIBOOT_INFO_OFFSET > MULTIBOOT_INFO_SIZE {
669 return Err(Error::MultibootInfoSetup);
670 }
671
672 guest_mem
673 .write_all_at_addr(data, data_addr)
674 .map_err(|_| Error::MultibootInfoSetup)?;
675
676 *addr = new_addr;
677 Ok(data_addr)
678 }
679
680 /// Write setup_data entries in guest memory and link them together with the `next` field.
681 ///
682 /// Returns the guest address of the first entry in the setup_data list, if any.
write_setup_data( guest_mem: &GuestMemory, setup_data_start: GuestAddress, setup_data_end: GuestAddress, setup_data: &[SetupData], ) -> Result<Option<GuestAddress>>683 fn write_setup_data(
684 guest_mem: &GuestMemory,
685 setup_data_start: GuestAddress,
686 setup_data_end: GuestAddress,
687 setup_data: &[SetupData],
688 ) -> Result<Option<GuestAddress>> {
689 let mut setup_data_list_head = None;
690
691 // Place the first setup_data at the first 64-bit aligned offset following setup_data_start.
692 let mut setup_data_addr = setup_data_start.align(8).ok_or(Error::SetupDataTooLarge)?;
693
694 let mut entry_iter = setup_data.iter().peekable();
695 while let Some(entry) = entry_iter.next() {
696 if setup_data_list_head.is_none() {
697 setup_data_list_head = Some(setup_data_addr);
698 }
699
700 // Ensure the entry (header plus data) fits into guest memory.
701 let entry_size = (mem::size_of::<setup_data_hdr>() + entry.data.len()) as u64;
702 let entry_end = setup_data_addr
703 .checked_add(entry_size)
704 .ok_or(Error::SetupDataTooLarge)?;
705
706 if entry_end >= setup_data_end {
707 return Err(Error::SetupDataTooLarge);
708 }
709
710 let next_setup_data_addr = if entry_iter.peek().is_some() {
711 // Place the next setup_data at a 64-bit aligned address.
712 setup_data_addr
713 .checked_add(entry_size)
714 .and_then(|addr| addr.align(8))
715 .ok_or(Error::SetupDataTooLarge)?
716 } else {
717 // This is the final entry. Terminate the list with next == 0.
718 GuestAddress(0)
719 };
720
721 let hdr = setup_data_hdr {
722 next: next_setup_data_addr.offset(),
723 type_: entry.type_ as u32,
724 len: entry
725 .data
726 .len()
727 .try_into()
728 .map_err(|_| Error::SetupDataTooLarge)?,
729 };
730
731 guest_mem
732 .write_obj_at_addr(hdr, setup_data_addr)
733 .map_err(Error::WritingSetupData)?;
734 guest_mem
735 .write_all_at_addr(
736 &entry.data,
737 setup_data_addr.unchecked_add(mem::size_of::<setup_data_hdr>() as u64),
738 )
739 .map_err(Error::WritingSetupData)?;
740
741 setup_data_addr = next_setup_data_addr;
742 }
743
744 Ok(setup_data_list_head)
745 }
746
747 /// Generate a SETUP_RNG_SEED SetupData with random seed data.
setup_data_rng_seed() -> SetupData748 fn setup_data_rng_seed() -> SetupData {
749 let mut data = vec![0u8; 256];
750 OsRng.fill_bytes(&mut data);
751 SetupData {
752 data,
753 type_: SetupDataType::RngSeed,
754 }
755 }
756
757 /// Add an e820 region to the e820 map.
add_e820_entry( e820_entries: &mut Vec<E820Entry>, range: AddressRange, mem_type: E820Type, ) -> Result<()>758 fn add_e820_entry(
759 e820_entries: &mut Vec<E820Entry>,
760 range: AddressRange,
761 mem_type: E820Type,
762 ) -> Result<()> {
763 e820_entries.push(E820Entry {
764 address: GuestAddress(range.start),
765 len: range.len().ok_or(Error::E820Configuration)?,
766 mem_type,
767 });
768
769 Ok(())
770 }
771
772 /// Generate a memory map in INT 0x15 AX=0xE820 format.
generate_e820_memory_map( arch_memory_layout: &ArchMemoryLayout, guest_mem: &GuestMemory, ) -> Result<Vec<E820Entry>>773 fn generate_e820_memory_map(
774 arch_memory_layout: &ArchMemoryLayout,
775 guest_mem: &GuestMemory,
776 ) -> Result<Vec<E820Entry>> {
777 let mut e820_entries = Vec::new();
778
779 for r in guest_mem.regions() {
780 let range = AddressRange::from_start_and_size(r.guest_addr.offset(), r.size as u64)
781 .expect("invalid guest mem region");
782 let mem_type = match r.options.purpose {
783 MemoryRegionPurpose::Bios => E820Type::Reserved,
784 MemoryRegionPurpose::GuestMemoryRegion => E820Type::Ram,
785 // After the pVM firmware jumped to the guest, the pVM firmware itself is no longer
786 // running, so its memory is reusable by the guest OS. So add this memory as RAM rather
787 // than Reserved.
788 MemoryRegionPurpose::ProtectedFirmwareRegion => E820Type::Ram,
789 MemoryRegionPurpose::ReservedMemory => E820Type::Reserved,
790 };
791 add_e820_entry(&mut e820_entries, range, mem_type)?;
792 }
793
794 let pcie_cfg_mmio_range = arch_memory_layout.pcie_cfg_mmio;
795 add_e820_entry(&mut e820_entries, pcie_cfg_mmio_range, E820Type::Reserved)?;
796
797 add_e820_entry(
798 &mut e820_entries,
799 X8664arch::get_pcie_vcfg_mmio_range(guest_mem, &pcie_cfg_mmio_range),
800 E820Type::Reserved,
801 )?;
802
803 // Reserve memory section for Identity map and TSS
804 add_e820_entry(
805 &mut e820_entries,
806 AddressRange {
807 start: identity_map_addr_start().offset(),
808 end: tss_addr_end().offset() - 1,
809 },
810 E820Type::Reserved,
811 )?;
812
813 Ok(e820_entries)
814 }
815
816 /// Returns a Vec of the valid memory addresses.
817 /// These should be used to configure the GuestMemory structure for the platform.
818 /// For x86_64 all addresses are valid from the start of the kernel except a
819 /// carve out at the end of 32bit address space.
arch_memory_regions( arch_memory_layout: &ArchMemoryLayout, mem_size: u64, bios_size: Option<u64>, ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)>820 pub fn arch_memory_regions(
821 arch_memory_layout: &ArchMemoryLayout,
822 mem_size: u64,
823 bios_size: Option<u64>,
824 ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)> {
825 let mut regions = Vec::new();
826
827 // Some guest kernels expect a typical PC memory layout where the region between 640 KB and
828 // 1 MB is reserved for device memory/ROMs and get confused if there is a RAM region
829 // spanning this area, so we provide the traditional 640 KB low memory and 1 MB+
830 // high memory regions.
831 let mem_below_1m = 640 * KB;
832 regions.push((
833 GuestAddress(0),
834 mem_below_1m,
835 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
836 ));
837
838 // Reserved/BIOS data area between 640 KB and 1 MB.
839 // This needs to be backed by an actual GuestMemory region so we can write BIOS tables here, but
840 // it should be reported as "reserved" in the e820 memory map to match PC architecture
841 // expectations.
842 regions.push((
843 GuestAddress(640 * KB),
844 (1 * MB) - (640 * KB),
845 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ReservedMemory),
846 ));
847
848 // RAM between 1 MB and 4 GB
849 let mem_1m_to_4g = arch_memory_layout.pci_mmio_before_32bit.start.min(mem_size) - 1 * MB;
850 regions.push((
851 GuestAddress(1 * MB),
852 mem_1m_to_4g,
853 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
854 ));
855
856 // RAM above 4 GB
857 let mem_above_4g = mem_size.saturating_sub(1 * MB + mem_1m_to_4g);
858 if mem_above_4g > 0 {
859 regions.push((
860 GuestAddress(FIRST_ADDR_PAST_32BITS),
861 mem_above_4g,
862 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
863 ));
864 }
865
866 if let Some(bios_size) = bios_size {
867 regions.push((
868 bios_start(bios_size),
869 bios_size,
870 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::Bios),
871 ));
872 }
873
874 if let Some(pvmfw_mem) = arch_memory_layout.pvmfw_mem {
875 // Remove any areas of guest memory regions that overlap the pVM firmware range.
876 while let Some(overlapping_region_index) = regions.iter().position(|(addr, size, _opts)| {
877 let region_addr_range = AddressRange::from_start_and_size(addr.offset(), *size)
878 .expect("invalid GuestMemory range");
879 region_addr_range.overlaps(pvmfw_mem)
880 }) {
881 let overlapping_region = regions.swap_remove(overlapping_region_index);
882 let overlapping_region_range = AddressRange::from_start_and_size(
883 overlapping_region.0.offset(),
884 overlapping_region.1,
885 )
886 .unwrap();
887 let (first, second) = overlapping_region_range.non_overlapping_ranges(pvmfw_mem);
888 if !first.is_empty() {
889 regions.push((
890 GuestAddress(first.start),
891 first.len().unwrap(),
892 overlapping_region.2.clone(),
893 ));
894 }
895 if !second.is_empty() {
896 regions.push((
897 GuestAddress(second.start),
898 second.len().unwrap(),
899 overlapping_region.2,
900 ));
901 }
902 }
903
904 // Insert a region for the pVM firmware area.
905 regions.push((
906 GuestAddress(pvmfw_mem.start),
907 pvmfw_mem.len().expect("invalid pvmfw region"),
908 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
909 ));
910 }
911
912 regions.sort_unstable_by_key(|(addr, _, _)| *addr);
913
914 for (addr, size, options) in ®ions {
915 debug!(
916 "{:#018x}-{:#018x} {:?}",
917 addr.offset(),
918 addr.offset() + size - 1,
919 options.purpose,
920 );
921 }
922
923 regions
924 }
925
926 impl arch::LinuxArch for X8664arch {
927 type Error = Error;
928 type ArchMemoryLayout = ArchMemoryLayout;
929
arch_memory_layout( components: &VmComponents, ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>930 fn arch_memory_layout(
931 components: &VmComponents,
932 ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
933 create_arch_memory_layout(
934 &components.pci_config,
935 components.hv_cfg.protection_type.runs_firmware(),
936 )
937 }
938
guest_memory_layout( components: &VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, _hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>939 fn guest_memory_layout(
940 components: &VmComponents,
941 arch_memory_layout: &Self::ArchMemoryLayout,
942 _hypervisor: &impl Hypervisor,
943 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
944 let bios_size = match &components.vm_image {
945 VmImage::Bios(bios_file) => Some(bios_file.metadata().map_err(Error::LoadBios)?.len()),
946 VmImage::Kernel(_) => None,
947 };
948
949 Ok(arch_memory_regions(
950 arch_memory_layout,
951 components.memory_size,
952 bios_size,
953 ))
954 }
955
get_system_allocator_config<V: Vm>( vm: &V, arch_memory_layout: &Self::ArchMemoryLayout, ) -> SystemAllocatorConfig956 fn get_system_allocator_config<V: Vm>(
957 vm: &V,
958 arch_memory_layout: &Self::ArchMemoryLayout,
959 ) -> SystemAllocatorConfig {
960 SystemAllocatorConfig {
961 io: Some(AddressRange {
962 start: 0xc000,
963 end: 0xffff,
964 }),
965 low_mmio: arch_memory_layout.pci_mmio_before_32bit,
966 high_mmio: Self::get_high_mmio_range(vm, arch_memory_layout),
967 platform_mmio: None,
968 first_irq: X86_64_IRQ_BASE,
969 }
970 }
971
build_vm<V, Vcpu>( mut components: VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipX86_64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, debugcon_jail: Option<Minijail>, pflash_jail: Option<Minijail>, fw_cfg_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, device_tree_overlays: Vec<DtbOverlay>, _fdt_position: Option<FdtPosition>, _no_pmu: bool, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmX86_64, Vcpu: VcpuX86_64,972 fn build_vm<V, Vcpu>(
973 mut components: VmComponents,
974 arch_memory_layout: &Self::ArchMemoryLayout,
975 vm_evt_wrtube: &SendTube,
976 system_allocator: &mut SystemAllocator,
977 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
978 serial_jail: Option<Minijail>,
979 battery: (Option<BatteryType>, Option<Minijail>),
980 mut vm: V,
981 ramoops_region: Option<arch::pstore::RamoopsRegion>,
982 devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
983 irq_chip: &mut dyn IrqChipX86_64,
984 vcpu_ids: &mut Vec<usize>,
985 dump_device_tree_blob: Option<PathBuf>,
986 debugcon_jail: Option<Minijail>,
987 pflash_jail: Option<Minijail>,
988 fw_cfg_jail: Option<Minijail>,
989 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
990 guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
991 device_tree_overlays: Vec<DtbOverlay>,
992 _fdt_position: Option<FdtPosition>,
993 _no_pmu: bool,
994 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
995 where
996 V: VmX86_64,
997 Vcpu: VcpuX86_64,
998 {
999 let mem = vm.get_memory().clone();
1000
1001 let vcpu_count = components.vcpu_count;
1002
1003 vm.set_identity_map_addr(identity_map_addr_start())
1004 .map_err(Error::SetIdentityMapAddr)?;
1005
1006 vm.set_tss_addr(tss_addr_start())
1007 .map_err(Error::SetTssAddr)?;
1008
1009 // Use IRQ info in ACPI if provided by the user.
1010 let mut mptable = true;
1011 let mut sci_irq = X86_64_SCI_IRQ;
1012
1013 // punch pcie config mmio from pci low mmio, so that it couldn't be
1014 // allocated to any device.
1015 let pcie_cfg_mmio_range = arch_memory_layout.pcie_cfg_mmio;
1016 system_allocator
1017 .reserve_mmio(pcie_cfg_mmio_range)
1018 .map_err(Error::ReservePcieCfgMmio)?;
1019
1020 for sdt in components.acpi_sdts.iter() {
1021 if sdt.is_signature(b"FACP") {
1022 mptable = false;
1023 let sci_irq_fadt: u16 = sdt.read(acpi::FADT_FIELD_SCI_INTERRUPT);
1024 sci_irq = sci_irq_fadt.into();
1025 if !system_allocator.reserve_irq(sci_irq) {
1026 warn!("sci irq {} already reserved.", sci_irq);
1027 }
1028 }
1029 }
1030
1031 let pcie_vcfg_range = Self::get_pcie_vcfg_mmio_range(&mem, &pcie_cfg_mmio_range);
1032 let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
1033 let io_bus = Arc::new(Bus::new(BusType::Io));
1034
1035 let (pci_devices, _devs): (Vec<_>, Vec<_>) = devs
1036 .into_iter()
1037 .partition(|(dev, _)| dev.as_pci_device().is_some());
1038
1039 let pci_devices = pci_devices
1040 .into_iter()
1041 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
1042 .collect();
1043
1044 let (pci, pci_irqs, pid_debug_label_map, amls, gpe_scope_amls) = arch::generate_pci_root(
1045 pci_devices,
1046 irq_chip.as_irq_chip_mut(),
1047 mmio_bus.clone(),
1048 GuestAddress(pcie_cfg_mmio_range.start),
1049 12,
1050 io_bus.clone(),
1051 system_allocator,
1052 &mut vm,
1053 4, // Share the four pin interrupts (INTx#)
1054 Some(pcie_vcfg_range.start),
1055 #[cfg(feature = "swap")]
1056 swap_controller,
1057 )
1058 .map_err(Error::CreatePciRoot)?;
1059
1060 let pci = Arc::new(Mutex::new(pci));
1061 pci.lock().enable_pcie_cfg_mmio(pcie_cfg_mmio_range.start);
1062 let pci_cfg = PciConfigIo::new(
1063 pci.clone(),
1064 components.break_linux_pci_config_io,
1065 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1066 );
1067 let pci_bus = Arc::new(Mutex::new(pci_cfg));
1068 io_bus.insert(pci_bus, 0xcf8, 0x8).unwrap();
1069
1070 let pcie_cfg_mmio = Arc::new(Mutex::new(PciConfigMmio::new(pci.clone(), 12)));
1071 let pcie_cfg_mmio_len = pcie_cfg_mmio_range.len().unwrap();
1072 mmio_bus
1073 .insert(pcie_cfg_mmio, pcie_cfg_mmio_range.start, pcie_cfg_mmio_len)
1074 .unwrap();
1075
1076 let pcie_vcfg_mmio = Arc::new(Mutex::new(PciVirtualConfigMmio::new(pci.clone(), 13)));
1077 mmio_bus
1078 .insert(
1079 pcie_vcfg_mmio,
1080 pcie_vcfg_range.start,
1081 pcie_vcfg_range.len().unwrap(),
1082 )
1083 .unwrap();
1084
1085 // Event used to notify crosvm that guest OS is trying to suspend.
1086 let (suspend_tube_send, suspend_tube_recv) =
1087 Tube::directional_pair().map_err(Error::CreateTube)?;
1088 let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
1089
1090 if components.fw_cfg_enable {
1091 Self::setup_fw_cfg_device(
1092 &io_bus,
1093 components.fw_cfg_parameters.clone(),
1094 components.bootorder_fw_cfg_blob.clone(),
1095 fw_cfg_jail,
1096 #[cfg(feature = "swap")]
1097 swap_controller,
1098 )?;
1099 }
1100
1101 if !components.no_i8042 {
1102 Self::setup_legacy_i8042_device(
1103 &io_bus,
1104 irq_chip.pit_uses_speaker_port(),
1105 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1106 )?;
1107 }
1108 let mut vm_request_tube = if !components.no_rtc {
1109 let (host_tube, device_tube) = Tube::pair()
1110 .context("create tube")
1111 .map_err(Error::SetupCmos)?;
1112 Self::setup_legacy_cmos_device(
1113 arch_memory_layout,
1114 &io_bus,
1115 irq_chip,
1116 device_tube,
1117 components.memory_size,
1118 )
1119 .map_err(Error::SetupCmos)?;
1120 Some(host_tube)
1121 } else {
1122 None
1123 };
1124 let serial_devices = Self::setup_serial_devices(
1125 components.hv_cfg.protection_type,
1126 irq_chip.as_irq_chip_mut(),
1127 &io_bus,
1128 serial_parameters,
1129 serial_jail,
1130 #[cfg(feature = "swap")]
1131 swap_controller,
1132 )?;
1133 Self::setup_debugcon_devices(
1134 components.hv_cfg.protection_type,
1135 &io_bus,
1136 serial_parameters,
1137 debugcon_jail,
1138 #[cfg(feature = "swap")]
1139 swap_controller,
1140 )?;
1141
1142 let bios_size = if let VmImage::Bios(ref bios) = components.vm_image {
1143 bios.metadata().map_err(Error::LoadBios)?.len()
1144 } else {
1145 0
1146 };
1147 if let Some(pflash_image) = components.pflash_image {
1148 Self::setup_pflash(
1149 pflash_image,
1150 components.pflash_block_size,
1151 bios_size,
1152 &mmio_bus,
1153 pflash_jail,
1154 #[cfg(feature = "swap")]
1155 swap_controller,
1156 )?;
1157 }
1158
1159 // Functions that use/create jails MUST be used before the call to
1160 // setup_acpi_devices below, as this move us into a multiprocessing state
1161 // from which we can no longer fork.
1162
1163 let mut resume_notify_devices = Vec::new();
1164
1165 // each bus occupy 1MB mmio for pcie enhanced configuration
1166 let max_bus = (pcie_cfg_mmio_len / 0x100000 - 1) as u8;
1167 let (mut acpi_dev_resource, bat_control) = Self::setup_acpi_devices(
1168 arch_memory_layout,
1169 pci.clone(),
1170 &mem,
1171 &io_bus,
1172 system_allocator,
1173 suspend_tube_send.clone(),
1174 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1175 components.acpi_sdts,
1176 irq_chip.as_irq_chip_mut(),
1177 sci_irq,
1178 battery,
1179 &mmio_bus,
1180 max_bus,
1181 &mut resume_notify_devices,
1182 #[cfg(feature = "swap")]
1183 swap_controller,
1184 #[cfg(any(target_os = "android", target_os = "linux"))]
1185 components.ac_adapter,
1186 guest_suspended_cvar,
1187 &pci_irqs,
1188 )?;
1189
1190 // Create customized SSDT table
1191 let sdt = acpi::create_customize_ssdt(pci.clone(), amls, gpe_scope_amls);
1192 if let Some(sdt) = sdt {
1193 acpi_dev_resource.sdts.push(sdt);
1194 }
1195
1196 irq_chip
1197 .finalize_devices(system_allocator, &io_bus, &mmio_bus)
1198 .map_err(Error::RegisterIrqfd)?;
1199
1200 // All of these bios generated tables are set manually for the benefit of the kernel boot
1201 // flow (since there's no BIOS to set it) and for the BIOS boot flow since crosvm doesn't
1202 // have a way to pass the BIOS these configs.
1203 // This works right now because the only guest BIOS used with crosvm (u-boot) ignores these
1204 // tables and the guest OS picks them up.
1205 // If another guest does need a way to pass these tables down to it's BIOS, this approach
1206 // should be rethought.
1207
1208 // Make sure the `vcpu_count` casts below and the arithmetic in `setup_mptable` are well
1209 // defined.
1210 if vcpu_count >= u8::MAX.into() {
1211 return Err(Error::TooManyVcpus);
1212 }
1213
1214 if mptable {
1215 // Note that this puts the mptable at 0x9FC00 in guest physical memory.
1216 mptable::setup_mptable(&mem, vcpu_count as u8, &pci_irqs)
1217 .map_err(Error::SetupMptable)?;
1218 }
1219 smbios::setup_smbios(&mem, &components.smbios, bios_size).map_err(Error::SetupSmbios)?;
1220
1221 let host_cpus = if components.host_cpu_topology {
1222 components.vcpu_affinity.clone()
1223 } else {
1224 None
1225 };
1226
1227 // TODO (tjeznach) Write RSDP to bootconfig before writing to memory
1228 acpi::create_acpi_tables(
1229 &mem,
1230 vcpu_count as u8,
1231 sci_irq,
1232 0xcf9,
1233 6, // RST_CPU|SYS_RST
1234 &acpi_dev_resource,
1235 host_cpus,
1236 vcpu_ids,
1237 &pci_irqs,
1238 pcie_cfg_mmio_range.start,
1239 max_bus,
1240 components.force_s2idle,
1241 )
1242 .ok_or(Error::CreateAcpi)?;
1243
1244 let mut cmdline = Self::get_base_linux_cmdline();
1245
1246 get_serial_cmdline(&mut cmdline, serial_parameters, "io", &serial_devices)
1247 .map_err(Error::GetSerialCmdline)?;
1248
1249 for param in components.extra_kernel_params {
1250 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
1251 }
1252
1253 if let Some(ramoops_region) = ramoops_region {
1254 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
1255 .map_err(Error::Cmdline)?;
1256 }
1257
1258 let pci_start = arch_memory_layout.pci_mmio_before_32bit.start;
1259
1260 let mut vcpu_init = vec![VcpuInitX86_64::default(); vcpu_count];
1261 let mut msrs = BTreeMap::new();
1262
1263 let protection_type = components.hv_cfg.protection_type;
1264
1265 match components.vm_image {
1266 VmImage::Bios(ref mut bios) => {
1267 if protection_type.runs_firmware() {
1268 return Err(Error::PvmFwBiosUnsupported);
1269 }
1270
1271 // Allow a bios to hardcode CMDLINE_OFFSET and read the kernel command line from it.
1272 Self::load_cmdline(
1273 &mem,
1274 GuestAddress(CMDLINE_OFFSET),
1275 cmdline,
1276 CMDLINE_MAX_SIZE as usize - 1,
1277 )?;
1278 Self::load_bios(&mem, bios)?;
1279 regs::set_default_msrs(&mut msrs);
1280 // The default values for `Regs` and `Sregs` already set up the reset vector.
1281 }
1282 VmImage::Kernel(ref mut kernel_image) => {
1283 let (params, kernel_region, kernel_entry, cpu_mode, kernel_type) =
1284 Self::load_kernel(&mem, kernel_image)?;
1285
1286 info!("Loaded {} kernel", kernel_type);
1287
1288 Self::setup_system_memory(
1289 arch_memory_layout,
1290 &mem,
1291 cmdline,
1292 components.initrd_image,
1293 components.android_fstab,
1294 kernel_region,
1295 params,
1296 dump_device_tree_blob,
1297 device_tree_overlays,
1298 protection_type,
1299 )?;
1300
1301 if protection_type.needs_firmware_loaded() {
1302 arch::load_image(
1303 &mem,
1304 &mut components
1305 .pvm_fw
1306 .expect("pvmfw must be available if ProtectionType loads it"),
1307 GuestAddress(PROTECTED_VM_FW_START),
1308 PROTECTED_VM_FW_MAX_SIZE,
1309 )
1310 .map_err(Error::LoadCustomPvmFw)?;
1311 } else if protection_type.runs_firmware() {
1312 // Tell the hypervisor to load the pVM firmware.
1313 vm.load_protected_vm_firmware(
1314 GuestAddress(PROTECTED_VM_FW_START),
1315 PROTECTED_VM_FW_MAX_SIZE,
1316 )
1317 .map_err(Error::LoadPvmFw)?;
1318 }
1319
1320 let entry_addr = if protection_type.needs_firmware_loaded() {
1321 Some(PROTECTED_VM_FW_START)
1322 } else if protection_type.runs_firmware() {
1323 None // Initial RIP value is set by the hypervisor
1324 } else {
1325 Some(kernel_entry.offset())
1326 };
1327
1328 if let Some(entry) = entry_addr {
1329 vcpu_init[0].regs.rip = entry;
1330 }
1331
1332 match kernel_type {
1333 KernelType::BzImage | KernelType::Elf => {
1334 // Configure the bootstrap VCPU for the Linux/x86 boot protocol.
1335 // <https://www.kernel.org/doc/html/latest/x86/boot.html>
1336 vcpu_init[0].regs.rsp = BOOT_STACK_POINTER;
1337 vcpu_init[0].regs.rsi = ZERO_PAGE_OFFSET;
1338 }
1339 KernelType::Multiboot => {
1340 // Provide Multiboot-compatible bootloader information.
1341 vcpu_init[0].regs.rax = MULTIBOOT_BOOTLOADER_MAGIC.into();
1342 vcpu_init[0].regs.rbx = MULTIBOOT_INFO_OFFSET;
1343 }
1344 }
1345
1346 if protection_type.runs_firmware() {
1347 // Pass pVM payload entry address to pVM firmware.
1348 // NOTE: this is only for development purposes. An actual pvmfw
1349 // implementation should not use this value and should instead receive
1350 // the pVM payload start and size info from crosvm as the DTB properties
1351 // /config/kernel-address and /config/kernel-size and determine the offset
1352 // of the entry point on its own, not trust crosvm to provide it.
1353 vcpu_init[0].regs.rdi = kernel_entry.offset();
1354 }
1355
1356 match cpu_mode {
1357 CpuMode::LongMode => {
1358 regs::set_long_mode_msrs(&mut msrs);
1359
1360 // Set up long mode and enable paging.
1361 regs::configure_segments_and_sregs(&mem, &mut vcpu_init[0].sregs)
1362 .map_err(Error::ConfigureSegments)?;
1363 regs::setup_page_tables(&mem, &mut vcpu_init[0].sregs)
1364 .map_err(Error::SetupPageTables)?;
1365 }
1366 CpuMode::FlatProtectedMode => {
1367 regs::set_default_msrs(&mut msrs);
1368
1369 // Set up 32-bit protected mode with paging disabled.
1370 regs::configure_segments_and_sregs_flat32(&mem, &mut vcpu_init[0].sregs)
1371 .map_err(Error::ConfigureSegments)?;
1372 }
1373 }
1374
1375 regs::set_mtrr_msrs(&mut msrs, &vm, pci_start);
1376 }
1377 }
1378
1379 // Initialize MSRs for all VCPUs.
1380 for vcpu in vcpu_init.iter_mut() {
1381 vcpu.msrs = msrs.clone();
1382 }
1383
1384 let mut vm_request_tubes = Vec::new();
1385 if let Some(req_tube) = vm_request_tube.take() {
1386 vm_request_tubes.push(req_tube);
1387 }
1388
1389 Ok(RunnableLinuxVm {
1390 vm,
1391 vcpu_count,
1392 vcpus: None,
1393 vcpu_affinity: components.vcpu_affinity,
1394 vcpu_init,
1395 no_smt: components.no_smt,
1396 irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1397 io_bus,
1398 mmio_bus,
1399 pid_debug_label_map,
1400 suspend_tube: (suspend_tube_send, suspend_tube_recv),
1401 resume_notify_devices,
1402 rt_cpus: components.rt_cpus,
1403 delay_rt: components.delay_rt,
1404 bat_control,
1405 pm: Some(acpi_dev_resource.pm),
1406 root_config: pci,
1407 #[cfg(any(target_os = "android", target_os = "linux"))]
1408 platform_devices: Vec::new(),
1409 hotplug_bus: BTreeMap::new(),
1410 devices_thread: None,
1411 vm_request_tubes,
1412 })
1413 }
1414
configure_vcpu<V: Vm>( vm: &V, hypervisor: &dyn HypervisorX86_64, irq_chip: &mut dyn IrqChipX86_64, vcpu: &mut dyn VcpuX86_64, vcpu_init: VcpuInitX86_64, vcpu_id: usize, num_cpus: usize, cpu_config: Option<CpuConfigX86_64>, ) -> Result<()>1415 fn configure_vcpu<V: Vm>(
1416 vm: &V,
1417 hypervisor: &dyn HypervisorX86_64,
1418 irq_chip: &mut dyn IrqChipX86_64,
1419 vcpu: &mut dyn VcpuX86_64,
1420 vcpu_init: VcpuInitX86_64,
1421 vcpu_id: usize,
1422 num_cpus: usize,
1423 cpu_config: Option<CpuConfigX86_64>,
1424 ) -> Result<()> {
1425 let cpu_config = match cpu_config {
1426 Some(config) => config,
1427 None => return Err(Error::InvalidCpuConfig),
1428 };
1429 if !vm.check_capability(VmCap::EarlyInitCpuid) {
1430 cpuid::setup_cpuid(hypervisor, irq_chip, vcpu, vcpu_id, num_cpus, cpu_config)
1431 .map_err(Error::SetupCpuid)?;
1432 }
1433
1434 vcpu.set_regs(&vcpu_init.regs).map_err(Error::WriteRegs)?;
1435
1436 vcpu.set_sregs(&vcpu_init.sregs)
1437 .map_err(Error::SetupSregs)?;
1438
1439 vcpu.set_fpu(&vcpu_init.fpu).map_err(Error::SetupFpu)?;
1440
1441 let vcpu_supported_var_mtrrs = regs::vcpu_supported_variable_mtrrs(vcpu);
1442 let num_var_mtrrs = regs::count_variable_mtrrs(&vcpu_init.msrs);
1443 let skip_mtrr_msrs = if num_var_mtrrs > vcpu_supported_var_mtrrs {
1444 warn!(
1445 "Too many variable MTRR entries ({} required, {} supported),
1446 please check pci_start addr, guest with pass through device may be very slow",
1447 num_var_mtrrs, vcpu_supported_var_mtrrs,
1448 );
1449 // Filter out the MTRR entries from the MSR list.
1450 true
1451 } else {
1452 false
1453 };
1454
1455 for (msr_index, value) in vcpu_init.msrs.into_iter() {
1456 if skip_mtrr_msrs && regs::is_mtrr_msr(msr_index) {
1457 continue;
1458 }
1459
1460 vcpu.set_msr(msr_index, value).map_err(Error::SetupMsrs)?;
1461 }
1462
1463 interrupts::set_lint(vcpu_id, irq_chip).map_err(Error::SetLint)?;
1464
1465 Ok(())
1466 }
1467
register_pci_device<V: VmX86_64, Vcpu: VcpuX86_64>( linux: &mut RunnableLinuxVm<V, Vcpu>, device: Box<dyn PciDevice>, #[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>, resources: &mut SystemAllocator, hp_control_tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<PciAddress>1468 fn register_pci_device<V: VmX86_64, Vcpu: VcpuX86_64>(
1469 linux: &mut RunnableLinuxVm<V, Vcpu>,
1470 device: Box<dyn PciDevice>,
1471 #[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>,
1472 resources: &mut SystemAllocator,
1473 hp_control_tube: &mpsc::Sender<PciRootCommand>,
1474 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1475 ) -> Result<PciAddress> {
1476 arch::configure_pci_device(
1477 linux,
1478 device,
1479 #[cfg(any(target_os = "android", target_os = "linux"))]
1480 minijail,
1481 resources,
1482 hp_control_tube,
1483 #[cfg(feature = "swap")]
1484 swap_controller,
1485 )
1486 .map_err(Error::ConfigurePciDevice)
1487 }
1488
get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>>1489 fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
1490 Ok(BTreeMap::new())
1491 }
1492
get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>>1493 fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
1494 Ok(BTreeMap::new())
1495 }
1496
get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>>1497 fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
1498 Ok(BTreeMap::new())
1499 }
1500
get_host_cpu_clusters() -> Result<Vec<CpuSet>>1501 fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
1502 Ok(Vec::new())
1503 }
1504 }
1505
1506 // OSC returned status register in CDW1
1507 const OSC_STATUS_UNSUPPORT_UUID: u32 = 0x4;
1508 // pci host bridge OSC returned control register in CDW3
1509 #[allow(dead_code)]
1510 const PCI_HB_OSC_CONTROL_PCIE_HP: u32 = 0x1;
1511 const PCI_HB_OSC_CONTROL_SHPC_HP: u32 = 0x2;
1512 #[allow(dead_code)]
1513 const PCI_HB_OSC_CONTROL_PCIE_PME: u32 = 0x4;
1514 const PCI_HB_OSC_CONTROL_PCIE_AER: u32 = 0x8;
1515 #[allow(dead_code)]
1516 const PCI_HB_OSC_CONTROL_PCIE_CAP: u32 = 0x10;
1517
1518 struct PciRootOSC {}
1519
1520 // Method (_OSC, 4, NotSerialized) // _OSC: Operating System Capabilities
1521 // {
1522 // CreateDWordField (Arg3, Zero, CDW1) // flag and return value
1523 // If (Arg0 == ToUUID ("33db4d5b-1ff7-401c-9657-7441c03dd766"))
1524 // {
1525 // CreateDWordField (Arg3, 8, CDW3) // control field
1526 // if ( 0 == (CDW1 & 0x01)) // Query flag ?
1527 // {
1528 // CDW3 &= !(SHPC_HP | AER)
1529 // }
1530 // } Else {
1531 // CDW1 |= UNSUPPORT_UUID
1532 // }
1533 // Return (Arg3)
1534 // }
1535 impl Aml for PciRootOSC {
to_aml_bytes(&self, aml: &mut Vec<u8>)1536 fn to_aml_bytes(&self, aml: &mut Vec<u8>) {
1537 let osc_uuid = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
1538 // virtual pcie root port supports hotplug, pme, and pcie cap register, clear all
1539 // the other bits.
1540 let mask = !(PCI_HB_OSC_CONTROL_SHPC_HP | PCI_HB_OSC_CONTROL_PCIE_AER);
1541 aml::Method::new(
1542 "_OSC".into(),
1543 4,
1544 false,
1545 vec![
1546 &aml::CreateDWordField::new(
1547 &aml::Name::new_field_name("CDW1"),
1548 &aml::Arg(3),
1549 &aml::ZERO,
1550 ),
1551 &aml::If::new(
1552 &aml::Equal::new(&aml::Arg(0), &aml::Uuid::new(osc_uuid)),
1553 vec![
1554 &aml::CreateDWordField::new(
1555 &aml::Name::new_field_name("CDW3"),
1556 &aml::Arg(3),
1557 &(8_u8),
1558 ),
1559 &aml::If::new(
1560 &aml::Equal::new(
1561 &aml::ZERO,
1562 &aml::And::new(
1563 &aml::ZERO,
1564 &aml::Name::new_field_name("CDW1"),
1565 &aml::ONE,
1566 ),
1567 ),
1568 vec![&aml::And::new(
1569 &aml::Name::new_field_name("CDW3"),
1570 &mask,
1571 &aml::Name::new_field_name("CDW3"),
1572 )],
1573 ),
1574 ],
1575 ),
1576 &aml::Else::new(vec![&aml::Or::new(
1577 &aml::Name::new_field_name("CDW1"),
1578 &OSC_STATUS_UNSUPPORT_UUID,
1579 &aml::Name::new_field_name("CDW1"),
1580 )]),
1581 &aml::Return::new(&aml::Arg(3)),
1582 ],
1583 )
1584 .to_aml_bytes(aml)
1585 }
1586 }
1587
1588 pub enum CpuMode {
1589 /// 32-bit protected mode with paging disabled.
1590 FlatProtectedMode,
1591
1592 /// 64-bit long mode.
1593 LongMode,
1594 }
1595
1596 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
1597 pub enum KernelType {
1598 BzImage,
1599 Elf,
1600 Multiboot,
1601 }
1602
1603 impl fmt::Display for KernelType {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1604 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1605 match self {
1606 KernelType::BzImage => write!(f, "bzImage"),
1607 KernelType::Elf => write!(f, "ELF"),
1608 KernelType::Multiboot => write!(f, "Multiboot"),
1609 }
1610 }
1611 }
1612
1613 impl X8664arch {
1614 /// Loads the bios from an open file.
1615 ///
1616 /// # Arguments
1617 ///
1618 /// * `mem` - The memory to be used by the guest.
1619 /// * `bios_image` - the File object for the specified bios
load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()>1620 fn load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()> {
1621 let bios_image_length = bios_image.get_len().map_err(Error::LoadBios)?;
1622 if bios_image_length >= FIRST_ADDR_PAST_32BITS {
1623 return Err(Error::LoadBios(io::Error::new(
1624 io::ErrorKind::InvalidData,
1625 format!(
1626 "bios was {} bytes, expected less than {}",
1627 bios_image_length, FIRST_ADDR_PAST_32BITS,
1628 ),
1629 )));
1630 }
1631
1632 let guest_slice = mem
1633 .get_slice_at_addr(bios_start(bios_image_length), bios_image_length as usize)
1634 .map_err(Error::SetupGuestMemory)?;
1635 bios_image
1636 .read_exact_at_volatile(guest_slice, 0)
1637 .map_err(Error::LoadBios)?;
1638 Ok(())
1639 }
1640
setup_pflash( pflash_image: File, block_size: u32, bios_size: u64, mmio_bus: &Bus, jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<()>1641 fn setup_pflash(
1642 pflash_image: File,
1643 block_size: u32,
1644 bios_size: u64,
1645 mmio_bus: &Bus,
1646 jail: Option<Minijail>,
1647 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1648 ) -> Result<()> {
1649 let size = pflash_image.metadata().map_err(Error::LoadPflash)?.len();
1650 let start = FIRST_ADDR_PAST_32BITS - bios_size - size;
1651 let pflash_image = Box::new(pflash_image);
1652
1653 #[cfg(any(target_os = "android", target_os = "linux"))]
1654 let fds = pflash_image.as_raw_descriptors();
1655
1656 let pflash = Pflash::new(pflash_image, block_size).map_err(Error::SetupPflash)?;
1657 let pflash: Arc<Mutex<dyn BusDevice>> = match jail {
1658 #[cfg(any(target_os = "android", target_os = "linux"))]
1659 Some(jail) => Arc::new(Mutex::new(
1660 ProxyDevice::new(
1661 pflash,
1662 jail,
1663 fds,
1664 #[cfg(feature = "swap")]
1665 swap_controller,
1666 )
1667 .map_err(Error::CreateProxyDevice)?,
1668 )),
1669 #[cfg(windows)]
1670 Some(_) => unreachable!(),
1671 None => Arc::new(Mutex::new(pflash)),
1672 };
1673 mmio_bus
1674 .insert(pflash, start, size)
1675 .map_err(Error::InsertBus)?;
1676
1677 Ok(())
1678 }
1679
1680 /// Writes the command line string to the given memory slice.
1681 ///
1682 /// # Arguments
1683 ///
1684 /// * `guest_mem` - A u8 slice that will be partially overwritten by the command line.
1685 /// * `guest_addr` - The address in `guest_mem` at which to load the command line.
1686 /// * `cmdline` - The kernel command line.
1687 /// * `kernel_max_cmdline_len` - The maximum command line length (without NUL terminator)
1688 /// supported by the kernel.
load_cmdline( guest_mem: &GuestMemory, guest_addr: GuestAddress, cmdline: kernel_cmdline::Cmdline, kernel_max_cmdline_len: usize, ) -> Result<()>1689 fn load_cmdline(
1690 guest_mem: &GuestMemory,
1691 guest_addr: GuestAddress,
1692 cmdline: kernel_cmdline::Cmdline,
1693 kernel_max_cmdline_len: usize,
1694 ) -> Result<()> {
1695 let mut cmdline_guest_mem_slice = guest_mem
1696 .get_slice_at_addr(guest_addr, CMDLINE_MAX_SIZE as usize)
1697 .map_err(|_| Error::CommandLineOverflow)?;
1698
1699 let mut cmdline_bytes: Vec<u8> = cmdline
1700 .into_bytes_with_max_len(kernel_max_cmdline_len)
1701 .map_err(Error::Cmdline)?;
1702 cmdline_bytes.push(0u8); // Add NUL terminator.
1703
1704 cmdline_guest_mem_slice
1705 .write_all(&cmdline_bytes)
1706 .map_err(|_| Error::CommandLineOverflow)?;
1707
1708 Ok(())
1709 }
1710
1711 /// Loads the kernel from an open file.
1712 ///
1713 /// # Arguments
1714 ///
1715 /// * `mem` - The memory to be used by the guest.
1716 /// * `kernel_image` - the File object for the specified kernel.
1717 ///
1718 /// # Returns
1719 ///
1720 /// On success, returns the Linux x86_64 boot protocol parameters, the address range containing
1721 /// the kernel, the entry point (initial `RIP` value), the initial CPU mode, and the type of
1722 /// kernel.
load_kernel( mem: &GuestMemory, kernel_image: &mut File, ) -> Result<(boot_params, AddressRange, GuestAddress, CpuMode, KernelType)>1723 fn load_kernel(
1724 mem: &GuestMemory,
1725 kernel_image: &mut File,
1726 ) -> Result<(boot_params, AddressRange, GuestAddress, CpuMode, KernelType)> {
1727 let kernel_start = GuestAddress(KERNEL_START_OFFSET);
1728
1729 let multiboot =
1730 kernel_loader::multiboot_header_from_file(kernel_image).map_err(Error::LoadKernel)?;
1731
1732 if let Some(multiboot_load) = multiboot.as_ref().and_then(|m| m.load.as_ref()) {
1733 let loaded_kernel = kernel_loader::load_multiboot(mem, kernel_image, multiboot_load)
1734 .map_err(Error::LoadKernel)?;
1735
1736 let boot_params = boot_params {
1737 hdr: setup_header {
1738 cmdline_size: CMDLINE_MAX_SIZE as u32 - 1,
1739 ..Default::default()
1740 },
1741 ..Default::default()
1742 };
1743 return Ok((
1744 boot_params,
1745 loaded_kernel.address_range,
1746 loaded_kernel.entry,
1747 CpuMode::FlatProtectedMode,
1748 KernelType::Multiboot,
1749 ));
1750 }
1751
1752 match kernel_loader::load_elf64(mem, kernel_start, kernel_image, 0) {
1753 Ok(loaded_kernel) => {
1754 // ELF kernels don't contain a `boot_params` structure, so synthesize a default one.
1755 let boot_params = boot_params {
1756 hdr: setup_header {
1757 cmdline_size: CMDLINE_MAX_SIZE as u32 - 1,
1758 ..Default::default()
1759 },
1760 ..Default::default()
1761 };
1762 Ok((
1763 boot_params,
1764 loaded_kernel.address_range,
1765 loaded_kernel.entry,
1766 CpuMode::LongMode,
1767 KernelType::Elf,
1768 ))
1769 }
1770 Err(kernel_loader::Error::InvalidMagicNumber) => {
1771 // The image failed to parse as ELF, so try to load it as a bzImage.
1772 let (boot_params, bzimage_region, bzimage_entry, cpu_mode) =
1773 bzimage::load_bzimage(mem, kernel_start, kernel_image)
1774 .map_err(Error::LoadBzImage)?;
1775 Ok((
1776 boot_params,
1777 bzimage_region,
1778 bzimage_entry,
1779 cpu_mode,
1780 KernelType::BzImage,
1781 ))
1782 }
1783 Err(e) => Err(Error::LoadKernel(e)),
1784 }
1785 }
1786
1787 /// Configures the system memory space should be called once per vm before
1788 /// starting vcpu threads.
1789 ///
1790 /// # Arguments
1791 ///
1792 /// * `mem` - The memory to be used by the guest.
1793 /// * `cmdline` - the kernel commandline
1794 /// * `initrd_file` - an initial ramdisk image
setup_system_memory( arch_memory_layout: &ArchMemoryLayout, mem: &GuestMemory, cmdline: kernel_cmdline::Cmdline, initrd_file: Option<File>, android_fstab: Option<File>, kernel_region: AddressRange, params: boot_params, dump_device_tree_blob: Option<PathBuf>, device_tree_overlays: Vec<DtbOverlay>, protection_type: ProtectionType, ) -> Result<()>1795 pub fn setup_system_memory(
1796 arch_memory_layout: &ArchMemoryLayout,
1797 mem: &GuestMemory,
1798 cmdline: kernel_cmdline::Cmdline,
1799 initrd_file: Option<File>,
1800 android_fstab: Option<File>,
1801 kernel_region: AddressRange,
1802 params: boot_params,
1803 dump_device_tree_blob: Option<PathBuf>,
1804 device_tree_overlays: Vec<DtbOverlay>,
1805 protection_type: ProtectionType,
1806 ) -> Result<()> {
1807 let e820_entries = generate_e820_memory_map(arch_memory_layout, mem)?;
1808
1809 let kernel_max_cmdline_len = if params.hdr.cmdline_size == 0 {
1810 // Old kernels have a maximum length of 255 bytes, not including the NUL.
1811 255
1812 } else {
1813 params.hdr.cmdline_size as usize
1814 };
1815 debug!("kernel_max_cmdline_len={kernel_max_cmdline_len}");
1816 Self::load_cmdline(
1817 mem,
1818 GuestAddress(CMDLINE_OFFSET),
1819 cmdline,
1820 kernel_max_cmdline_len,
1821 )?;
1822
1823 let initrd = match initrd_file {
1824 Some(mut initrd_file) => {
1825 let initrd_addr_max = if params.hdr.xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G != 0 {
1826 u64::MAX
1827 } else if params.hdr.initrd_addr_max == 0 {
1828 // Default initrd_addr_max for old kernels (see Documentation/x86/boot.txt).
1829 0x37FFFFFF
1830 } else {
1831 u64::from(params.hdr.initrd_addr_max)
1832 };
1833
1834 let (initrd_start, initrd_size) = arch::load_image_high(
1835 mem,
1836 &mut initrd_file,
1837 GuestAddress(kernel_region.end + 1),
1838 GuestAddress(initrd_addr_max),
1839 Some(|region| {
1840 region.options.purpose != MemoryRegionPurpose::ProtectedFirmwareRegion
1841 }),
1842 base::pagesize() as u64,
1843 )
1844 .map_err(Error::LoadInitrd)?;
1845 Some((initrd_start, initrd_size))
1846 }
1847 None => None,
1848 };
1849
1850 let mut setup_data = Vec::<SetupData>::new();
1851 if android_fstab.is_some()
1852 || !device_tree_overlays.is_empty()
1853 || protection_type.runs_firmware()
1854 {
1855 let device_tree_blob = fdt::create_fdt(
1856 android_fstab,
1857 dump_device_tree_blob,
1858 device_tree_overlays,
1859 kernel_region,
1860 initrd,
1861 )
1862 .map_err(Error::CreateFdt)?;
1863 setup_data.push(SetupData {
1864 data: device_tree_blob,
1865 type_: SetupDataType::Dtb,
1866 });
1867 }
1868
1869 setup_data.push(setup_data_rng_seed());
1870
1871 let setup_data = write_setup_data(
1872 mem,
1873 GuestAddress(SETUP_DATA_START),
1874 GuestAddress(SETUP_DATA_END),
1875 &setup_data,
1876 )?;
1877
1878 configure_boot_params(
1879 mem,
1880 GuestAddress(CMDLINE_OFFSET),
1881 setup_data,
1882 initrd,
1883 params,
1884 &e820_entries,
1885 )?;
1886
1887 configure_multiboot_info(mem, GuestAddress(CMDLINE_OFFSET), &e820_entries)?;
1888
1889 Ok(())
1890 }
1891
get_pcie_vcfg_mmio_range(mem: &GuestMemory, pcie_cfg_mmio: &AddressRange) -> AddressRange1892 fn get_pcie_vcfg_mmio_range(mem: &GuestMemory, pcie_cfg_mmio: &AddressRange) -> AddressRange {
1893 // Put PCIe VCFG region at a 2MB boundary after physical memory or 4gb, whichever is
1894 // greater.
1895 let ram_end_round_2mb = mem.end_addr().offset().next_multiple_of(2 * MB);
1896 let start = std::cmp::max(ram_end_round_2mb, 4 * GB);
1897 // Each pci device's ECAM size is 4kb and its vcfg size is 8kb
1898 let end = start + pcie_cfg_mmio.len().unwrap() * 2 - 1;
1899 AddressRange { start, end }
1900 }
1901
1902 /// Returns the high mmio range
get_high_mmio_range<V: Vm>(vm: &V, arch_memory_layout: &ArchMemoryLayout) -> AddressRange1903 fn get_high_mmio_range<V: Vm>(vm: &V, arch_memory_layout: &ArchMemoryLayout) -> AddressRange {
1904 let mem = vm.get_memory();
1905 let start = Self::get_pcie_vcfg_mmio_range(mem, &arch_memory_layout.pcie_cfg_mmio).end + 1;
1906
1907 let phys_mem_end = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
1908 let high_mmio_end = std::cmp::min(phys_mem_end, HIGH_MMIO_MAX_END);
1909
1910 AddressRange {
1911 start,
1912 end: high_mmio_end,
1913 }
1914 }
1915
1916 /// This returns a minimal kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline1917 pub fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1918 let mut cmdline = kernel_cmdline::Cmdline::new();
1919 cmdline.insert_str("panic=-1").unwrap();
1920
1921 cmdline
1922 }
1923
1924 /// Sets up fw_cfg device.
1925 /// # Arguments
1926 ///
1927 /// * `io_bus` - the IO bus object
1928 /// * `fw_cfg_parameters` - command-line specified data to add to device. May contain all None
1929 /// fields if user did not specify data to add to the device
setup_fw_cfg_device( io_bus: &Bus, fw_cfg_parameters: Vec<FwCfgParameters>, bootorder_fw_cfg_blob: Vec<u8>, fw_cfg_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<()>1930 fn setup_fw_cfg_device(
1931 io_bus: &Bus,
1932 fw_cfg_parameters: Vec<FwCfgParameters>,
1933 bootorder_fw_cfg_blob: Vec<u8>,
1934 fw_cfg_jail: Option<Minijail>,
1935 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1936 ) -> Result<()> {
1937 let fw_cfg = match devices::FwCfgDevice::new(FW_CFG_MAX_FILE_SLOTS, fw_cfg_parameters) {
1938 Ok(mut device) => {
1939 // this condition will only be true if the user specified at least one bootindex
1940 // option on the command line. If none were specified, bootorder_fw_cfg_blob will
1941 // only have a null byte (null terminator)
1942 if bootorder_fw_cfg_blob.len() > 1 {
1943 // Add boot order file to the device. If the file is not present, firmware may
1944 // not be able to boot.
1945 if let Err(err) = device.add_file(
1946 "bootorder",
1947 bootorder_fw_cfg_blob,
1948 devices::FwCfgItemType::GenericItem,
1949 ) {
1950 return Err(Error::CreateFwCfgDevice(err));
1951 }
1952 }
1953 device
1954 }
1955 Err(err) => {
1956 return Err(Error::CreateFwCfgDevice(err));
1957 }
1958 };
1959
1960 let fw_cfg: Arc<Mutex<dyn BusDevice>> = match fw_cfg_jail.as_ref() {
1961 #[cfg(any(target_os = "android", target_os = "linux"))]
1962 Some(jail) => {
1963 let jail_clone = jail.try_clone().map_err(Error::CloneJail)?;
1964 #[cfg(feature = "seccomp_trace")]
1965 debug!(
1966 "seccomp_trace {{\"event\": \"minijail_clone\", \"src_jail_addr\": \"0x{:x}\", \"dst_jail_addr\": \"0x{:x}\"}}",
1967 read_jail_addr(jail),
1968 read_jail_addr(&jail_clone)
1969 );
1970 Arc::new(Mutex::new(
1971 ProxyDevice::new(
1972 fw_cfg,
1973 jail_clone,
1974 Vec::new(),
1975 #[cfg(feature = "swap")]
1976 swap_controller,
1977 )
1978 .map_err(Error::CreateProxyDevice)?,
1979 ))
1980 }
1981 #[cfg(windows)]
1982 Some(_) => unreachable!(),
1983 None => Arc::new(Mutex::new(fw_cfg)),
1984 };
1985
1986 io_bus
1987 .insert(fw_cfg, FW_CFG_BASE_PORT, FW_CFG_WIDTH)
1988 .map_err(Error::InsertBus)?;
1989
1990 Ok(())
1991 }
1992
1993 /// Sets up the legacy x86 i8042/KBD platform device
1994 ///
1995 /// # Arguments
1996 ///
1997 /// * - `io_bus` - the IO bus object
1998 /// * - `pit_uses_speaker_port` - does the PIT use port 0x61 for the PC speaker
1999 /// * - `vm_evt_wrtube` - the event object which should receive exit events
setup_legacy_i8042_device( io_bus: &Bus, pit_uses_speaker_port: bool, vm_evt_wrtube: SendTube, ) -> Result<()>2000 pub fn setup_legacy_i8042_device(
2001 io_bus: &Bus,
2002 pit_uses_speaker_port: bool,
2003 vm_evt_wrtube: SendTube,
2004 ) -> Result<()> {
2005 let i8042 = Arc::new(Mutex::new(devices::I8042Device::new(
2006 vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
2007 )));
2008
2009 if pit_uses_speaker_port {
2010 io_bus.insert(i8042, 0x062, 0x3).unwrap();
2011 } else {
2012 io_bus.insert(i8042, 0x061, 0x4).unwrap();
2013 }
2014
2015 Ok(())
2016 }
2017
2018 /// Sets up the legacy x86 CMOS/RTC platform device
2019 /// # Arguments
2020 ///
2021 /// * - `io_bus` - the IO bus object
2022 /// * - `mem_size` - the size in bytes of physical ram for the guest
setup_legacy_cmos_device( arch_memory_layout: &ArchMemoryLayout, io_bus: &Bus, irq_chip: &mut dyn IrqChipX86_64, vm_control: Tube, mem_size: u64, ) -> anyhow::Result<()>2023 pub fn setup_legacy_cmos_device(
2024 arch_memory_layout: &ArchMemoryLayout,
2025 io_bus: &Bus,
2026 irq_chip: &mut dyn IrqChipX86_64,
2027 vm_control: Tube,
2028 mem_size: u64,
2029 ) -> anyhow::Result<()> {
2030 let mem_regions = arch_memory_regions(arch_memory_layout, mem_size, None);
2031
2032 let mem_below_4g = mem_regions
2033 .iter()
2034 .filter(|r| r.0.offset() < FIRST_ADDR_PAST_32BITS)
2035 .map(|r| r.1)
2036 .sum();
2037
2038 let mem_above_4g = mem_regions
2039 .iter()
2040 .filter(|r| r.0.offset() >= FIRST_ADDR_PAST_32BITS)
2041 .map(|r| r.1)
2042 .sum();
2043
2044 let irq_evt = devices::IrqEdgeEvent::new().context("cmos irq")?;
2045 let cmos = devices::cmos::Cmos::new(
2046 mem_below_4g,
2047 mem_above_4g,
2048 Utc::now,
2049 vm_control,
2050 irq_evt.try_clone().context("cmos irq clone")?,
2051 )
2052 .context("create cmos")?;
2053
2054 irq_chip
2055 .register_edge_irq_event(
2056 devices::cmos::RTC_IRQ as u32,
2057 &irq_evt,
2058 IrqEventSource::from_device(&cmos),
2059 )
2060 .context("cmos register irq")?;
2061 io_bus
2062 .insert(Arc::new(Mutex::new(cmos)), 0x70, 0x2)
2063 .context("cmos insert irq")?;
2064
2065 Ok(())
2066 }
2067
2068 /// Sets up the acpi devices for this platform and
2069 /// return the resources which is used to set the ACPI tables.
2070 ///
2071 /// # Arguments
2072 ///
2073 /// * `io_bus` the I/O bus to add the devices to
2074 /// * `resources` the SystemAllocator to allocate IO and MMIO for acpi devices.
2075 /// * `suspend_tube` the tube object which used to suspend/resume the VM.
2076 /// * `sdts` ACPI system description tables
2077 /// * `irq_chip` the IrqChip object for registering irq events
2078 /// * `battery` indicate whether to create the battery
2079 /// * `mmio_bus` the MMIO bus to add the devices to
2080 /// * `pci_irqs` IRQ assignment of PCI devices. Tuples of (PCI address, gsi, PCI interrupt pin).
2081 /// Note that this matches one of the return values of generate_pci_root.
setup_acpi_devices( arch_memory_layout: &ArchMemoryLayout, pci_root: Arc<Mutex<PciRoot>>, mem: &GuestMemory, io_bus: &Bus, resources: &mut SystemAllocator, suspend_tube: Arc<Mutex<SendTube>>, vm_evt_wrtube: SendTube, sdts: Vec<SDT>, irq_chip: &mut dyn IrqChip, sci_irq: u32, battery: (Option<BatteryType>, Option<Minijail>), #[cfg_attr(windows, allow(unused_variables))] mmio_bus: &Bus, max_bus: u8, resume_notify_devices: &mut Vec<Arc<Mutex<dyn BusResumeDevice>>>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, #[cfg(any(target_os = "android", target_os = "linux"))] ac_adapter: bool, guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, pci_irqs: &[(PciAddress, u32, PciInterruptPin)], ) -> Result<(acpi::AcpiDevResource, Option<BatControl>)>2082 pub fn setup_acpi_devices(
2083 arch_memory_layout: &ArchMemoryLayout,
2084 pci_root: Arc<Mutex<PciRoot>>,
2085 mem: &GuestMemory,
2086 io_bus: &Bus,
2087 resources: &mut SystemAllocator,
2088 suspend_tube: Arc<Mutex<SendTube>>,
2089 vm_evt_wrtube: SendTube,
2090 sdts: Vec<SDT>,
2091 irq_chip: &mut dyn IrqChip,
2092 sci_irq: u32,
2093 battery: (Option<BatteryType>, Option<Minijail>),
2094 #[cfg_attr(windows, allow(unused_variables))] mmio_bus: &Bus,
2095 max_bus: u8,
2096 resume_notify_devices: &mut Vec<Arc<Mutex<dyn BusResumeDevice>>>,
2097 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2098 #[cfg(any(target_os = "android", target_os = "linux"))] ac_adapter: bool,
2099 guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
2100 pci_irqs: &[(PciAddress, u32, PciInterruptPin)],
2101 ) -> Result<(acpi::AcpiDevResource, Option<BatControl>)> {
2102 // The AML data for the acpi devices
2103 let mut amls = Vec::new();
2104
2105 let bat_control = if let Some(battery_type) = battery.0 {
2106 match battery_type {
2107 #[cfg(any(target_os = "android", target_os = "linux"))]
2108 BatteryType::Goldfish => {
2109 let irq_num = resources.allocate_irq().ok_or(Error::CreateBatDevices(
2110 arch::DeviceRegistrationError::AllocateIrq,
2111 ))?;
2112 let (control_tube, _mmio_base) = arch::sys::linux::add_goldfish_battery(
2113 &mut amls,
2114 battery.1,
2115 mmio_bus,
2116 irq_chip,
2117 irq_num,
2118 resources,
2119 #[cfg(feature = "swap")]
2120 swap_controller,
2121 )
2122 .map_err(Error::CreateBatDevices)?;
2123 Some(BatControl {
2124 type_: BatteryType::Goldfish,
2125 control_tube,
2126 })
2127 }
2128 #[cfg(windows)]
2129 _ => None,
2130 }
2131 } else {
2132 None
2133 };
2134
2135 let pm_alloc = resources.get_anon_alloc();
2136 let pm_iobase = match resources.io_allocator() {
2137 Some(io) => io
2138 .allocate_with_align(
2139 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
2140 pm_alloc,
2141 "ACPIPM".to_string(),
2142 4, // must be 32-bit aligned
2143 )
2144 .map_err(Error::AllocateIOResouce)?,
2145 None => 0x600,
2146 };
2147
2148 let pcie_vcfg = aml::Name::new(
2149 "VCFG".into(),
2150 &Self::get_pcie_vcfg_mmio_range(mem, &arch_memory_layout.pcie_cfg_mmio).start,
2151 );
2152 pcie_vcfg.to_aml_bytes(&mut amls);
2153
2154 let pm_sci_evt = devices::IrqLevelEvent::new().map_err(Error::CreateEvent)?;
2155
2156 #[cfg(any(target_os = "android", target_os = "linux"))]
2157 let acdc = if ac_adapter {
2158 // Allocate GPE for AC adapter notfication
2159 let gpe = resources.allocate_gpe().ok_or(Error::AllocateGpe)?;
2160
2161 let alloc = resources.get_anon_alloc();
2162 let mmio_base = resources
2163 .allocate_mmio(
2164 devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
2165 alloc,
2166 "AcAdapter".to_string(),
2167 resources::AllocOptions::new().align(devices::ac_adapter::ACDC_VIRT_MMIO_SIZE),
2168 )
2169 .unwrap();
2170 let ac_adapter_dev = devices::ac_adapter::AcAdapter::new(mmio_base, gpe);
2171 let ac_dev = Arc::new(Mutex::new(ac_adapter_dev));
2172 mmio_bus
2173 .insert(
2174 ac_dev.clone(),
2175 mmio_base,
2176 devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
2177 )
2178 .unwrap();
2179
2180 ac_dev.lock().to_aml_bytes(&mut amls);
2181 Some(ac_dev)
2182 } else {
2183 None
2184 };
2185 #[cfg(windows)]
2186 let acdc = None;
2187
2188 //Virtual PMC
2189 if let Some(guest_suspended_cvar) = guest_suspended_cvar {
2190 let alloc = resources.get_anon_alloc();
2191 let mmio_base = resources
2192 .allocate_mmio(
2193 devices::pmc_virt::VPMC_VIRT_MMIO_SIZE,
2194 alloc,
2195 "VirtualPmc".to_string(),
2196 resources::AllocOptions::new().align(devices::pmc_virt::VPMC_VIRT_MMIO_SIZE),
2197 )
2198 .unwrap();
2199
2200 let pmc_virtio_mmio =
2201 Arc::new(Mutex::new(VirtualPmc::new(mmio_base, guest_suspended_cvar)));
2202 mmio_bus
2203 .insert(
2204 pmc_virtio_mmio.clone(),
2205 mmio_base,
2206 devices::pmc_virt::VPMC_VIRT_MMIO_SIZE,
2207 )
2208 .unwrap();
2209 pmc_virtio_mmio.lock().to_aml_bytes(&mut amls);
2210 }
2211
2212 let mut pmresource = devices::ACPIPMResource::new(
2213 pm_sci_evt.try_clone().map_err(Error::CloneEvent)?,
2214 suspend_tube,
2215 vm_evt_wrtube,
2216 acdc,
2217 );
2218 pmresource.to_aml_bytes(&mut amls);
2219 irq_chip
2220 .register_level_irq_event(
2221 sci_irq,
2222 &pm_sci_evt,
2223 IrqEventSource::from_device(&pmresource),
2224 )
2225 .map_err(Error::RegisterIrqfd)?;
2226 pmresource.start();
2227
2228 let mut crs_entries: Vec<Box<dyn Aml>> = vec![
2229 Box::new(aml::AddressSpace::new_bus_number(0x0u16, max_bus as u16)),
2230 Box::new(aml::IO::new(0xcf8, 0xcf8, 1, 0x8)),
2231 ];
2232 for r in resources.mmio_pools() {
2233 let entry: Box<dyn Aml> = match (u32::try_from(r.start), u32::try_from(r.end)) {
2234 (Ok(start), Ok(end)) => Box::new(aml::AddressSpace::new_memory(
2235 aml::AddressSpaceCachable::NotCacheable,
2236 true,
2237 start,
2238 end,
2239 )),
2240 _ => Box::new(aml::AddressSpace::new_memory(
2241 aml::AddressSpaceCachable::NotCacheable,
2242 true,
2243 r.start,
2244 r.end,
2245 )),
2246 };
2247 crs_entries.push(entry);
2248 }
2249
2250 let prt_entries: Vec<aml::Package> = pci_irqs
2251 .iter()
2252 .map(|(pci_address, gsi, pci_intr_pin)| {
2253 aml::Package::new(vec![
2254 &pci_address.acpi_adr(),
2255 &pci_intr_pin.to_mask(),
2256 &aml::ZERO,
2257 gsi,
2258 ])
2259 })
2260 .collect();
2261
2262 aml::Device::new(
2263 "_SB_.PC00".into(),
2264 vec![
2265 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A08")),
2266 &aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A03")),
2267 &aml::Name::new("_ADR".into(), &aml::ZERO),
2268 &aml::Name::new("_SEG".into(), &aml::ZERO),
2269 &aml::Name::new("_UID".into(), &aml::ZERO),
2270 &aml::Name::new("SUPP".into(), &aml::ZERO),
2271 &aml::Name::new(
2272 "_CRS".into(),
2273 &aml::ResourceTemplate::new(crs_entries.iter().map(|b| b.as_ref()).collect()),
2274 ),
2275 &PciRootOSC {},
2276 &aml::Name::new(
2277 "_PRT".into(),
2278 &aml::Package::new(prt_entries.iter().map(|p| p as &dyn Aml).collect()),
2279 ),
2280 ],
2281 )
2282 .to_aml_bytes(&mut amls);
2283
2284 if let (Some(start), Some(len)) = (
2285 u32::try_from(arch_memory_layout.pcie_cfg_mmio.start).ok(),
2286 arch_memory_layout
2287 .pcie_cfg_mmio
2288 .len()
2289 .and_then(|l| u32::try_from(l).ok()),
2290 ) {
2291 aml::Device::new(
2292 "_SB_.MB00".into(),
2293 vec![
2294 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
2295 &aml::Name::new(
2296 "_CRS".into(),
2297 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
2298 true, start, len,
2299 )]),
2300 ),
2301 ],
2302 )
2303 .to_aml_bytes(&mut amls);
2304 } else {
2305 warn!("Failed to create ACPI MMCFG region reservation");
2306 }
2307
2308 let root_bus = pci_root.lock().get_root_bus();
2309 let addresses = root_bus.lock().get_downstream_devices();
2310 for address in addresses {
2311 if let Some(acpi_path) = pci_root.lock().acpi_path(&address) {
2312 const DEEPEST_SLEEP_STATE: u32 = 3;
2313 aml::Device::new(
2314 (*acpi_path).into(),
2315 vec![
2316 &aml::Name::new("_ADR".into(), &address.acpi_adr()),
2317 &aml::Name::new(
2318 "_PRW".into(),
2319 &aml::Package::new(vec![&PM_WAKEUP_GPIO, &DEEPEST_SLEEP_STATE]),
2320 ),
2321 ],
2322 )
2323 .to_aml_bytes(&mut amls);
2324 }
2325 }
2326
2327 let pm = Arc::new(Mutex::new(pmresource));
2328 io_bus
2329 .insert(
2330 pm.clone(),
2331 pm_iobase,
2332 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
2333 )
2334 .unwrap();
2335 resume_notify_devices.push(pm.clone());
2336
2337 Ok((
2338 acpi::AcpiDevResource {
2339 amls,
2340 pm_iobase,
2341 pm,
2342 sdts,
2343 },
2344 bat_control,
2345 ))
2346 }
2347
2348 /// Sets up the serial devices for this platform. Returns a list of configured serial devices.
2349 ///
2350 /// # Arguments
2351 ///
2352 /// * - `irq_chip` the IrqChip object for registering irq events
2353 /// * - `io_bus` the I/O bus to add the devices to
2354 /// * - `serial_parameters` - definitions for how the serial devices should be configured
setup_serial_devices( protection_type: ProtectionType, irq_chip: &mut dyn IrqChip, io_bus: &Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<Vec<SerialDeviceInfo>>2355 pub fn setup_serial_devices(
2356 protection_type: ProtectionType,
2357 irq_chip: &mut dyn IrqChip,
2358 io_bus: &Bus,
2359 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2360 serial_jail: Option<Minijail>,
2361 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2362 ) -> Result<Vec<SerialDeviceInfo>> {
2363 let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2364 let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2365
2366 let serial_devices = arch::add_serial_devices(
2367 protection_type,
2368 io_bus,
2369 (X86_64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
2370 (X86_64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
2371 serial_parameters,
2372 serial_jail,
2373 #[cfg(feature = "swap")]
2374 swap_controller,
2375 )
2376 .map_err(Error::CreateSerialDevices)?;
2377
2378 let source = IrqEventSource {
2379 device_id: Serial::device_id(),
2380 queue_id: 0,
2381 device_name: Serial::debug_label(),
2382 };
2383 irq_chip
2384 .register_edge_irq_event(X86_64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
2385 .map_err(Error::RegisterIrqfd)?;
2386 irq_chip
2387 .register_edge_irq_event(X86_64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
2388 .map_err(Error::RegisterIrqfd)?;
2389
2390 Ok(serial_devices)
2391 }
2392
setup_debugcon_devices( protection_type: ProtectionType, io_bus: &Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<()>2393 fn setup_debugcon_devices(
2394 protection_type: ProtectionType,
2395 io_bus: &Bus,
2396 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2397 debugcon_jail: Option<Minijail>,
2398 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2399 ) -> Result<()> {
2400 for param in serial_parameters.values() {
2401 if param.hardware != SerialHardware::Debugcon {
2402 continue;
2403 }
2404
2405 let mut preserved_fds = Vec::new();
2406 let con = param
2407 .create_serial_device::<Debugcon>(
2408 protection_type,
2409 // Debugcon doesn't use the interrupt event
2410 &Event::new().map_err(Error::CreateEvent)?,
2411 &mut preserved_fds,
2412 )
2413 .map_err(Error::CreateDebugconDevice)?;
2414
2415 let con: Arc<Mutex<dyn BusDevice>> = match debugcon_jail.as_ref() {
2416 #[cfg(any(target_os = "android", target_os = "linux"))]
2417 Some(jail) => {
2418 let jail_clone = jail.try_clone().map_err(Error::CloneJail)?;
2419 #[cfg(feature = "seccomp_trace")]
2420 debug!(
2421 "seccomp_trace {{\"event\": \"minijail_clone\", \"src_jail_addr\": \"0x{:x}\", \"dst_jail_addr\": \"0x{:x}\"}}",
2422 read_jail_addr(jail),
2423 read_jail_addr(&jail_clone)
2424 );
2425 Arc::new(Mutex::new(
2426 ProxyDevice::new(
2427 con,
2428 jail_clone,
2429 preserved_fds,
2430 #[cfg(feature = "swap")]
2431 swap_controller,
2432 )
2433 .map_err(Error::CreateProxyDevice)?,
2434 ))
2435 }
2436 #[cfg(windows)]
2437 Some(_) => unreachable!(),
2438 None => Arc::new(Mutex::new(con)),
2439 };
2440 io_bus
2441 .insert(con.clone(), param.debugcon_port.into(), 1)
2442 .map_err(Error::InsertBus)?;
2443 }
2444
2445 Ok(())
2446 }
2447 }
2448
2449 #[sorted]
2450 #[derive(Error, Debug)]
2451 pub enum MsrError {
2452 #[error("CPU not support. Only intel CPUs support ITMT.")]
2453 CpuUnSupport,
2454 #[error("msr must be unique: {0}")]
2455 MsrDuplicate(u32),
2456 }
2457
2458 #[derive(Error, Debug)]
2459 pub enum HybridSupportError {
2460 #[error("Host CPU doesn't support hybrid architecture.")]
2461 UnsupportedHostCpu,
2462 }
2463
2464 /// The wrapper for CPUID call functions.
2465 pub struct CpuIdCall {
2466 /// __cpuid_count or a fake function for test.
2467 cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2468 /// __cpuid or a fake function for test.
2469 cpuid: unsafe fn(u32) -> CpuidResult,
2470 }
2471
2472 impl CpuIdCall {
new( cpuid_count: unsafe fn(u32, u32) -> CpuidResult, cpuid: unsafe fn(u32) -> CpuidResult, ) -> CpuIdCall2473 pub fn new(
2474 cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2475 cpuid: unsafe fn(u32) -> CpuidResult,
2476 ) -> CpuIdCall {
2477 CpuIdCall { cpuid_count, cpuid }
2478 }
2479 }
2480
2481 /// Check if host supports hybrid CPU feature. The check include:
2482 /// 1. Check if CPUID.1AH exists. CPUID.1AH is hybrid information enumeration leaf.
2483 /// 2. Check if CPUID.07H.00H:EDX[bit 15] sets. This bit means the processor is identified as a
2484 /// hybrid part.
2485 /// 3. Check if CPUID.1AH:EAX sets. The hybrid core type is set in EAX.
2486 ///
2487 /// # Arguments
2488 ///
2489 /// * - `cpuid` the wrapped cpuid functions used to get CPUID info.
check_host_hybrid_support(cpuid: &CpuIdCall) -> std::result::Result<(), HybridSupportError>2490 pub fn check_host_hybrid_support(cpuid: &CpuIdCall) -> std::result::Result<(), HybridSupportError> {
2491 // CPUID.0H.EAX returns maximum input value for basic CPUID information.
2492 //
2493 // SAFETY:
2494 // Safe because we pass 0 for this call and the host supports the
2495 // `cpuid` instruction.
2496 let mut cpuid_entry = unsafe { (cpuid.cpuid)(0x0) };
2497 if cpuid_entry.eax < 0x1A {
2498 return Err(HybridSupportError::UnsupportedHostCpu);
2499 }
2500 // SAFETY:
2501 // Safe because we pass 0x7 and 0 for this call and the host supports the
2502 // `cpuid` instruction.
2503 cpuid_entry = unsafe { (cpuid.cpuid_count)(0x7, 0) };
2504 if cpuid_entry.edx & 1 << EDX_HYBRID_CPU_SHIFT == 0 {
2505 return Err(HybridSupportError::UnsupportedHostCpu);
2506 }
2507 // From SDM, if a value entered for CPUID.EAX is less than or equal to the
2508 // maximum input value and the leaf is not supported on that processor then
2509 // 0 is returned in all the registers.
2510 // For the CPU with hybrid support, its CPUID.1AH.EAX shouldn't be zero.
2511 //
2512 // SAFETY:
2513 // Safe because we pass 0 for this call and the host supports the
2514 // `cpuid` instruction.
2515 cpuid_entry = unsafe { (cpuid.cpuid)(0x1A) };
2516 if cpuid_entry.eax == 0 {
2517 return Err(HybridSupportError::UnsupportedHostCpu);
2518 }
2519 Ok(())
2520 }
2521
2522 #[cfg(test)]
2523 mod tests {
2524 use std::mem::size_of;
2525
2526 use super::*;
2527
setup() -> ArchMemoryLayout2528 fn setup() -> ArchMemoryLayout {
2529 let pci_config = PciConfig {
2530 ecam: Some(MemoryRegionConfig {
2531 start: 3 * GB,
2532 size: Some(256 * MB),
2533 }),
2534 mem: Some(MemoryRegionConfig {
2535 start: 2 * GB,
2536 size: None,
2537 }),
2538 };
2539 create_arch_memory_layout(&pci_config, false).unwrap()
2540 }
2541
2542 #[test]
regions_lt_4gb_nobios()2543 fn regions_lt_4gb_nobios() {
2544 let arch_memory_layout = setup();
2545 let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, /* bios_size */ None);
2546 assert_eq!(
2547 regions,
2548 [
2549 (
2550 GuestAddress(0),
2551 640 * KB,
2552 MemoryRegionOptions {
2553 align: 0,
2554 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2555 file_backed: None,
2556 },
2557 ),
2558 (
2559 GuestAddress(640 * KB),
2560 384 * KB,
2561 MemoryRegionOptions {
2562 align: 0,
2563 purpose: MemoryRegionPurpose::ReservedMemory,
2564 file_backed: None,
2565 },
2566 ),
2567 (
2568 GuestAddress(1 * MB),
2569 512 * MB - 1 * MB,
2570 MemoryRegionOptions {
2571 align: 0,
2572 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2573 file_backed: None,
2574 },
2575 )
2576 ]
2577 );
2578 }
2579
2580 #[test]
regions_gt_4gb_nobios()2581 fn regions_gt_4gb_nobios() {
2582 let arch_memory_layout = setup();
2583 let size = 4 * GB + 0x8000;
2584 let regions = arch_memory_regions(&arch_memory_layout, size, /* bios_size */ None);
2585 assert_eq!(
2586 regions,
2587 [
2588 (
2589 GuestAddress(0),
2590 640 * KB,
2591 MemoryRegionOptions {
2592 align: 0,
2593 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2594 file_backed: None,
2595 },
2596 ),
2597 (
2598 GuestAddress(640 * KB),
2599 384 * KB,
2600 MemoryRegionOptions {
2601 align: 0,
2602 purpose: MemoryRegionPurpose::ReservedMemory,
2603 file_backed: None,
2604 },
2605 ),
2606 (
2607 GuestAddress(1 * MB),
2608 2 * GB - 1 * MB,
2609 MemoryRegionOptions {
2610 align: 0,
2611 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2612 file_backed: None,
2613 },
2614 ),
2615 (
2616 GuestAddress(4 * GB),
2617 2 * GB + 0x8000,
2618 MemoryRegionOptions {
2619 align: 0,
2620 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2621 file_backed: None,
2622 },
2623 ),
2624 ]
2625 );
2626 }
2627
2628 #[test]
regions_lt_4gb_bios()2629 fn regions_lt_4gb_bios() {
2630 let arch_memory_layout = setup();
2631 let bios_len = 1 * MB;
2632 let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, Some(bios_len));
2633 assert_eq!(
2634 regions,
2635 [
2636 (
2637 GuestAddress(0),
2638 640 * KB,
2639 MemoryRegionOptions {
2640 align: 0,
2641 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2642 file_backed: None,
2643 },
2644 ),
2645 (
2646 GuestAddress(640 * KB),
2647 384 * KB,
2648 MemoryRegionOptions {
2649 align: 0,
2650 purpose: MemoryRegionPurpose::ReservedMemory,
2651 file_backed: None,
2652 },
2653 ),
2654 (
2655 GuestAddress(1 * MB),
2656 512 * MB - 1 * MB,
2657 MemoryRegionOptions {
2658 align: 0,
2659 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2660 file_backed: None,
2661 },
2662 ),
2663 (
2664 GuestAddress(4 * GB - bios_len),
2665 bios_len,
2666 MemoryRegionOptions {
2667 align: 0,
2668 purpose: MemoryRegionPurpose::Bios,
2669 file_backed: None,
2670 },
2671 ),
2672 ]
2673 );
2674 }
2675
2676 #[test]
regions_gt_4gb_bios()2677 fn regions_gt_4gb_bios() {
2678 let arch_memory_layout = setup();
2679 let bios_len = 1 * MB;
2680 let regions = arch_memory_regions(&arch_memory_layout, 4 * GB + 0x8000, Some(bios_len));
2681 assert_eq!(
2682 regions,
2683 [
2684 (
2685 GuestAddress(0),
2686 640 * KB,
2687 MemoryRegionOptions {
2688 align: 0,
2689 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2690 file_backed: None,
2691 },
2692 ),
2693 (
2694 GuestAddress(640 * KB),
2695 384 * KB,
2696 MemoryRegionOptions {
2697 align: 0,
2698 purpose: MemoryRegionPurpose::ReservedMemory,
2699 file_backed: None,
2700 },
2701 ),
2702 (
2703 GuestAddress(1 * MB),
2704 2 * GB - 1 * MB,
2705 MemoryRegionOptions {
2706 align: 0,
2707 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2708 file_backed: None,
2709 },
2710 ),
2711 (
2712 GuestAddress(4 * GB - bios_len),
2713 bios_len,
2714 MemoryRegionOptions {
2715 align: 0,
2716 purpose: MemoryRegionPurpose::Bios,
2717 file_backed: None,
2718 },
2719 ),
2720 (
2721 GuestAddress(4 * GB),
2722 2 * GB + 0x8000,
2723 MemoryRegionOptions {
2724 align: 0,
2725 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2726 file_backed: None,
2727 },
2728 ),
2729 ]
2730 );
2731 }
2732
2733 #[test]
regions_eq_4gb_nobios()2734 fn regions_eq_4gb_nobios() {
2735 let arch_memory_layout = setup();
2736 // Test with exact size of 4GB - the overhead.
2737 let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, /* bios_size */ None);
2738 assert_eq!(
2739 regions,
2740 [
2741 (
2742 GuestAddress(0),
2743 640 * KB,
2744 MemoryRegionOptions {
2745 align: 0,
2746 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2747 file_backed: None,
2748 },
2749 ),
2750 (
2751 GuestAddress(640 * KB),
2752 384 * KB,
2753 MemoryRegionOptions {
2754 align: 0,
2755 purpose: MemoryRegionPurpose::ReservedMemory,
2756 file_backed: None,
2757 },
2758 ),
2759 (
2760 GuestAddress(1 * MB),
2761 2 * GB - 1 * MB,
2762 MemoryRegionOptions {
2763 align: 0,
2764 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2765 file_backed: None,
2766 },
2767 )
2768 ]
2769 );
2770 }
2771
2772 #[test]
regions_eq_4gb_bios()2773 fn regions_eq_4gb_bios() {
2774 let arch_memory_layout = setup();
2775 // Test with exact size of 4GB - the overhead.
2776 let bios_len = 1 * MB;
2777 let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, Some(bios_len));
2778 assert_eq!(
2779 regions,
2780 [
2781 (
2782 GuestAddress(0),
2783 640 * KB,
2784 MemoryRegionOptions {
2785 align: 0,
2786 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2787 file_backed: None,
2788 },
2789 ),
2790 (
2791 GuestAddress(640 * KB),
2792 384 * KB,
2793 MemoryRegionOptions {
2794 align: 0,
2795 purpose: MemoryRegionPurpose::ReservedMemory,
2796 file_backed: None,
2797 },
2798 ),
2799 (
2800 GuestAddress(1 * MB),
2801 2 * GB - 1 * MB,
2802 MemoryRegionOptions {
2803 align: 0,
2804 purpose: MemoryRegionPurpose::GuestMemoryRegion,
2805 file_backed: None,
2806 },
2807 ),
2808 (
2809 GuestAddress(4 * GB - bios_len),
2810 bios_len,
2811 MemoryRegionOptions {
2812 align: 0,
2813 purpose: MemoryRegionPurpose::Bios,
2814 file_backed: None,
2815 },
2816 ),
2817 ]
2818 );
2819 }
2820
2821 #[test]
check_pci_mmio_layout()2822 fn check_pci_mmio_layout() {
2823 let arch_memory_layout = setup();
2824
2825 assert_eq!(arch_memory_layout.pci_mmio_before_32bit.start, 2 * GB);
2826 assert_eq!(arch_memory_layout.pcie_cfg_mmio.start, 3 * GB);
2827 assert_eq!(arch_memory_layout.pcie_cfg_mmio.len().unwrap(), 256 * MB);
2828 }
2829
2830 #[test]
check_32bit_gap_size_alignment()2831 fn check_32bit_gap_size_alignment() {
2832 let arch_memory_layout = setup();
2833 // pci_mmio_before_32bit is 256 MB aligned to be friendly for MTRR mappings.
2834 assert_eq!(
2835 arch_memory_layout.pci_mmio_before_32bit.start % (256 * MB),
2836 0
2837 );
2838 }
2839
2840 #[test]
write_setup_data_empty()2841 fn write_setup_data_empty() {
2842 let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2843 let setup_data = [];
2844 let setup_data_addr = write_setup_data(
2845 &mem,
2846 GuestAddress(0x1000),
2847 GuestAddress(0x2000),
2848 &setup_data,
2849 )
2850 .expect("write_setup_data");
2851 assert_eq!(setup_data_addr, None);
2852 }
2853
2854 #[test]
write_setup_data_two_of_them()2855 fn write_setup_data_two_of_them() {
2856 let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2857
2858 let entry1_addr = GuestAddress(0x1000);
2859 let entry1_next_addr = entry1_addr;
2860 let entry1_len_addr = entry1_addr.checked_add(12).unwrap();
2861 let entry1_data_addr = entry1_addr.checked_add(16).unwrap();
2862 let entry1_data = [0x55u8; 13];
2863 let entry1_size = (size_of::<setup_data_hdr>() + entry1_data.len()) as u64;
2864 let entry1_align = 3;
2865
2866 let entry2_addr = GuestAddress(entry1_addr.offset() + entry1_size + entry1_align);
2867 let entry2_next_addr = entry2_addr;
2868 let entry2_len_addr = entry2_addr.checked_add(12).unwrap();
2869 let entry2_data_addr = entry2_addr.checked_add(16).unwrap();
2870 let entry2_data = [0xAAu8; 9];
2871
2872 let setup_data = [
2873 SetupData {
2874 data: entry1_data.to_vec(),
2875 type_: SetupDataType::Dtb,
2876 },
2877 SetupData {
2878 data: entry2_data.to_vec(),
2879 type_: SetupDataType::Dtb,
2880 },
2881 ];
2882
2883 let setup_data_head_addr = write_setup_data(
2884 &mem,
2885 GuestAddress(0x1000),
2886 GuestAddress(0x2000),
2887 &setup_data,
2888 )
2889 .expect("write_setup_data");
2890 assert_eq!(setup_data_head_addr, Some(entry1_addr));
2891
2892 assert_eq!(
2893 mem.read_obj_from_addr::<u64>(entry1_next_addr).unwrap(),
2894 entry2_addr.offset()
2895 );
2896 assert_eq!(
2897 mem.read_obj_from_addr::<u32>(entry1_len_addr).unwrap(),
2898 entry1_data.len() as u32
2899 );
2900 assert_eq!(
2901 mem.read_obj_from_addr::<[u8; 13]>(entry1_data_addr)
2902 .unwrap(),
2903 entry1_data
2904 );
2905
2906 assert_eq!(mem.read_obj_from_addr::<u64>(entry2_next_addr).unwrap(), 0);
2907 assert_eq!(
2908 mem.read_obj_from_addr::<u32>(entry2_len_addr).unwrap(),
2909 entry2_data.len() as u32
2910 );
2911 assert_eq!(
2912 mem.read_obj_from_addr::<[u8; 9]>(entry2_data_addr).unwrap(),
2913 entry2_data
2914 );
2915 }
2916
2917 #[test]
cmdline_overflow()2918 fn cmdline_overflow() {
2919 const MEM_SIZE: u64 = 0x1000;
2920 let gm = GuestMemory::new(&[(GuestAddress(0x0), MEM_SIZE)]).unwrap();
2921 let mut cmdline = kernel_cmdline::Cmdline::new();
2922 cmdline.insert_str("12345").unwrap();
2923 let cmdline_address = GuestAddress(MEM_SIZE - 5);
2924 let err =
2925 X8664arch::load_cmdline(&gm, cmdline_address, cmdline, CMDLINE_MAX_SIZE as usize - 1)
2926 .unwrap_err();
2927 assert!(matches!(err, Error::CommandLineOverflow));
2928 }
2929
2930 #[test]
cmdline_write_end()2931 fn cmdline_write_end() {
2932 const MEM_SIZE: u64 = 0x1000;
2933 let gm = GuestMemory::new(&[(GuestAddress(0x0), MEM_SIZE)]).unwrap();
2934 let mut cmdline = kernel_cmdline::Cmdline::new();
2935 cmdline.insert_str("1234").unwrap();
2936 let mut cmdline_address = GuestAddress(45);
2937 X8664arch::load_cmdline(&gm, cmdline_address, cmdline, CMDLINE_MAX_SIZE as usize - 1)
2938 .unwrap();
2939 let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2940 assert_eq!(val, b'1');
2941 cmdline_address = cmdline_address.unchecked_add(1);
2942 let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2943 assert_eq!(val, b'2');
2944 cmdline_address = cmdline_address.unchecked_add(1);
2945 let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2946 assert_eq!(val, b'3');
2947 cmdline_address = cmdline_address.unchecked_add(1);
2948 let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2949 assert_eq!(val, b'4');
2950 cmdline_address = cmdline_address.unchecked_add(1);
2951 let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
2952 assert_eq!(val, b'\0');
2953 }
2954 }
2955