• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 mod fdt;
6 
7 const E820_RAM: u32 = 1;
8 const SETUP_DTB: u32 = 2;
9 const X86_64_FDT_MAX_SIZE: u64 = 0x200000;
10 
11 #[allow(dead_code)]
12 #[allow(non_upper_case_globals)]
13 #[allow(non_camel_case_types)]
14 #[allow(non_snake_case)]
15 mod bootparam;
16 
17 // boot_params is just a series of ints, it is safe to initialize it.
18 unsafe impl data_model::DataInit for bootparam::boot_params {}
19 
20 #[allow(dead_code)]
21 #[allow(non_upper_case_globals)]
22 mod msr_index;
23 
24 #[allow(dead_code)]
25 #[allow(non_upper_case_globals)]
26 #[allow(non_camel_case_types)]
27 #[allow(clippy::all)]
28 mod mpspec;
29 // These mpspec types are only data, reading them from data is a safe initialization.
30 unsafe impl data_model::DataInit for mpspec::mpc_bus {}
31 unsafe impl data_model::DataInit for mpspec::mpc_cpu {}
32 unsafe impl data_model::DataInit for mpspec::mpc_intsrc {}
33 unsafe impl data_model::DataInit for mpspec::mpc_ioapic {}
34 unsafe impl data_model::DataInit for mpspec::mpc_table {}
35 unsafe impl data_model::DataInit for mpspec::mpc_lintsrc {}
36 unsafe impl data_model::DataInit for mpspec::mpf_intel {}
37 
38 mod acpi;
39 mod bzimage;
40 mod cpuid;
41 mod gdt;
42 mod interrupts;
43 mod mptable;
44 mod regs;
45 mod smbios;
46 
47 use std::collections::BTreeMap;
48 use std::error::Error as StdError;
49 use std::ffi::{CStr, CString};
50 use std::fmt::{self, Display};
51 use std::fs::File;
52 use std::io::{self, Seek};
53 use std::mem;
54 use std::sync::Arc;
55 
56 use crate::bootparam::boot_params;
57 use acpi_tables::aml::Aml;
58 use acpi_tables::sdt::SDT;
59 use arch::{
60     get_serial_cmdline, GetSerialCmdlineError, RunnableLinuxVm, SerialHardware, SerialParameters,
61     VmComponents, VmImage,
62 };
63 use base::Event;
64 use devices::{IrqChip, IrqChipX86_64, PciConfigIo, PciDevice, ProtectionType};
65 use hypervisor::{HypervisorX86_64, VcpuX86_64, VmX86_64};
66 use minijail::Minijail;
67 use remain::sorted;
68 use resources::SystemAllocator;
69 use sync::Mutex;
70 use vm_control::{BatControl, BatteryType};
71 use vm_memory::{GuestAddress, GuestMemory, GuestMemoryError};
72 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
73 use {
74     gdbstub::arch::x86::reg::X86_64CoreRegs,
75     hypervisor::x86_64::{Regs, Sregs},
76 };
77 
78 #[sorted]
79 #[derive(Debug)]
80 pub enum Error {
81     AllocateIOResouce(resources::Error),
82     AllocateIrq,
83     CloneEvent(base::Error),
84     Cmdline(kernel_cmdline::Error),
85     ConfigureSystem,
86     CreateBatDevices(arch::DeviceRegistrationError),
87     CreateDevices(Box<dyn StdError>),
88     CreateEvent(base::Error),
89     CreateFdt(arch::fdt::Error),
90     CreateIoapicDevice(base::Error),
91     CreateIrqChip(Box<dyn StdError>),
92     CreatePciRoot(arch::DeviceRegistrationError),
93     CreatePit(base::Error),
94     CreatePitDevice(devices::PitError),
95     CreateSerialDevices(arch::DeviceRegistrationError),
96     CreateSocket(io::Error),
97     CreateVcpu(base::Error),
98     CreateVm(Box<dyn StdError>),
99     E820Configuration,
100     EnableSinglestep(base::Error),
101     EnableSplitIrqchip(base::Error),
102     GetSerialCmdline(GetSerialCmdlineError),
103     KernelOffsetPastEnd,
104     LoadBios(io::Error),
105     LoadBzImage(bzimage::Error),
106     LoadCmdline(kernel_loader::Error),
107     LoadInitrd(arch::LoadImageError),
108     LoadKernel(kernel_loader::Error),
109     PageNotPresent,
110     Pstore(arch::pstore::Error),
111     ReadingGuestMemory(vm_memory::GuestMemoryError),
112     ReadRegs(base::Error),
113     RegisterIrqfd(base::Error),
114     RegisterVsock(arch::DeviceRegistrationError),
115     SetHwBreakpoint(base::Error),
116     SetLint(interrupts::Error),
117     SetTssAddr(base::Error),
118     SetupCpuid(cpuid::Error),
119     SetupFpu(regs::Error),
120     SetupGuestMemory(GuestMemoryError),
121     SetupMptable(mptable::Error),
122     SetupMsrs(regs::Error),
123     SetupRegs(regs::Error),
124     SetupSmbios(smbios::Error),
125     SetupSregs(regs::Error),
126     TranslatingVirtAddr,
127     UnsupportedProtectionType,
128     WriteRegs(base::Error),
129     WritingGuestMemory(GuestMemoryError),
130     ZeroPagePastRamEnd,
131     ZeroPageSetup,
132 }
133 
134 impl Display for Error {
135     #[remain::check]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result136     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
137         use self::Error::*;
138 
139         #[sorted]
140         match self {
141             AllocateIOResouce(e) => write!(f, "error allocating IO resource: {}", e),
142             AllocateIrq => write!(f, "error allocating a single irq"),
143             CloneEvent(e) => write!(f, "unable to clone an Event: {}", e),
144             Cmdline(e) => write!(f, "the given kernel command line was invalid: {}", e),
145             ConfigureSystem => write!(f, "error configuring the system"),
146             CreateBatDevices(e) => write!(f, "unable to create battery devices: {}", e),
147             CreateDevices(e) => write!(f, "error creating devices: {}", e),
148             CreateEvent(e) => write!(f, "unable to make an Event: {}", e),
149             CreateFdt(e) => write!(f, "failed to create fdt: {}", e),
150             CreateIoapicDevice(e) => write!(f, "failed to create IOAPIC device: {}", e),
151             CreateIrqChip(e) => write!(f, "failed to create IRQ chip: {}", e),
152             CreatePciRoot(e) => write!(f, "failed to create a PCI root hub: {}", e),
153             CreatePit(e) => write!(f, "unable to create PIT: {}", e),
154             CreatePitDevice(e) => write!(f, "unable to make PIT device: {}", e),
155             CreateSerialDevices(e) => write!(f, "unable to create serial devices: {}", e),
156             CreateSocket(e) => write!(f, "failed to create socket: {}", e),
157             CreateVcpu(e) => write!(f, "failed to create VCPU: {}", e),
158             CreateVm(e) => write!(f, "failed to create VM: {}", e),
159             E820Configuration => write!(f, "invalid e820 setup params"),
160             EnableSinglestep(e) => write!(f, "failed to enable singlestep execution: {}", e),
161             EnableSplitIrqchip(e) => write!(f, "failed to enable split irqchip: {}", e),
162             GetSerialCmdline(e) => write!(f, "failed to get serial cmdline: {}", e),
163             KernelOffsetPastEnd => write!(f, "the kernel extends past the end of RAM"),
164             LoadBios(e) => write!(f, "error loading bios: {}", e),
165             LoadBzImage(e) => write!(f, "error loading kernel bzImage: {}", e),
166             LoadCmdline(e) => write!(f, "error loading command line: {}", e),
167             LoadInitrd(e) => write!(f, "error loading initrd: {}", e),
168             LoadKernel(e) => write!(f, "error loading Kernel: {}", e),
169             PageNotPresent => write!(f, "error translating address: Page not present"),
170             Pstore(e) => write!(f, "failed to allocate pstore region: {}", e),
171             ReadingGuestMemory(e) => write!(f, "error reading guest memory {}", e),
172             ReadRegs(e) => write!(f, "error reading CPU registers {}", e),
173             RegisterIrqfd(e) => write!(f, "error registering an IrqFd: {}", e),
174             RegisterVsock(e) => write!(f, "error registering virtual socket device: {}", e),
175             SetHwBreakpoint(e) => write!(f, "failed to set a hardware breakpoint: {}", e),
176             SetLint(e) => write!(f, "failed to set interrupts: {}", e),
177             SetTssAddr(e) => write!(f, "failed to set tss addr: {}", e),
178             SetupCpuid(e) => write!(f, "failed to set up cpuid: {}", e),
179             SetupFpu(e) => write!(f, "failed to set up FPU: {}", e),
180             SetupGuestMemory(e) => write!(f, "failed to set up guest memory: {}", e),
181             SetupMptable(e) => write!(f, "failed to set up mptable: {}", e),
182             SetupMsrs(e) => write!(f, "failed to set up MSRs: {}", e),
183             SetupRegs(e) => write!(f, "failed to set up registers: {}", e),
184             SetupSmbios(e) => write!(f, "failed to set up SMBIOS: {}", e),
185             SetupSregs(e) => write!(f, "failed to set up sregs: {}", e),
186             TranslatingVirtAddr => write!(f, "failed to translate virtual address"),
187             UnsupportedProtectionType => write!(f, "protected VMs not supported on x86_64"),
188             WriteRegs(e) => write!(f, "error writing CPU registers {}", e),
189             WritingGuestMemory(e) => write!(f, "error writing guest memory {}", e),
190             ZeroPagePastRamEnd => write!(f, "the zero page extends past the end of guest_mem"),
191             ZeroPageSetup => write!(f, "error writing the zero page of guest memory"),
192         }
193     }
194 }
195 
196 pub type Result<T> = std::result::Result<T, Error>;
197 
198 impl std::error::Error for Error {}
199 
200 pub struct X8664arch;
201 
202 const BOOT_STACK_POINTER: u64 = 0x8000;
203 // Make sure it align to 256MB for MTRR convenient
204 const MEM_32BIT_GAP_SIZE: u64 = 768 << 20;
205 const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32;
206 const END_ADDR_BEFORE_32BITS: u64 = FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE;
207 const MMIO_SIZE: u64 = MEM_32BIT_GAP_SIZE - 0x8000000;
208 const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
209 const ZERO_PAGE_OFFSET: u64 = 0x7000;
210 const TSS_ADDR: u64 = 0xfffbd000;
211 
212 const KERNEL_START_OFFSET: u64 = 0x200000;
213 const CMDLINE_OFFSET: u64 = 0x20000;
214 const CMDLINE_MAX_SIZE: u64 = KERNEL_START_OFFSET - CMDLINE_OFFSET;
215 const X86_64_SERIAL_1_3_IRQ: u32 = 4;
216 const X86_64_SERIAL_2_4_IRQ: u32 = 3;
217 // X86_64_SCI_IRQ is used to fill the ACPI FACP table.
218 // The sci_irq number is better to be a legacy
219 // IRQ number which is less than 16(actually most of the
220 // platforms have fixed IRQ number 9). So we can
221 // reserve the IRQ number 5 for SCI and let the
222 // the other devices starts from next.
223 pub const X86_64_SCI_IRQ: u32 = 5;
224 // The CMOS RTC uses IRQ 8; start allocating IRQs at 9.
225 pub const X86_64_IRQ_BASE: u32 = 9;
226 const ACPI_HI_RSDP_WINDOW_BASE: u64 = 0x000E0000;
227 
228 /// The x86 reset vector for i386+ and x86_64 puts the processor into an "unreal mode" where it
229 /// can access the last 1 MB of the 32-bit address space in 16-bit mode, and starts the instruction
230 /// pointer at the effective physical address 0xFFFFFFF0.
bios_start(bios_size: u64) -> GuestAddress231 fn bios_start(bios_size: u64) -> GuestAddress {
232     GuestAddress(FIRST_ADDR_PAST_32BITS - bios_size)
233 }
234 
configure_system( guest_mem: &GuestMemory, _mem_size: u64, kernel_addr: GuestAddress, cmdline_addr: GuestAddress, cmdline_size: usize, setup_data: Option<GuestAddress>, initrd: Option<(GuestAddress, usize)>, mut params: boot_params, ) -> Result<()>235 fn configure_system(
236     guest_mem: &GuestMemory,
237     _mem_size: u64,
238     kernel_addr: GuestAddress,
239     cmdline_addr: GuestAddress,
240     cmdline_size: usize,
241     setup_data: Option<GuestAddress>,
242     initrd: Option<(GuestAddress, usize)>,
243     mut params: boot_params,
244 ) -> Result<()> {
245     const EBDA_START: u64 = 0x0009fc00;
246     const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
247     const KERNEL_HDR_MAGIC: u32 = 0x53726448;
248     const KERNEL_LOADER_OTHER: u8 = 0xff;
249     const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x1000000; // Must be non-zero.
250     let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS);
251     let end_32bit_gap_start = GuestAddress(END_ADDR_BEFORE_32BITS);
252 
253     params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
254     params.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
255     params.hdr.header = KERNEL_HDR_MAGIC;
256     params.hdr.cmd_line_ptr = cmdline_addr.offset() as u32;
257     params.hdr.cmdline_size = cmdline_size as u32;
258     params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
259     if let Some(setup_data) = setup_data {
260         params.hdr.setup_data = setup_data.offset();
261     }
262     if let Some((initrd_addr, initrd_size)) = initrd {
263         params.hdr.ramdisk_image = initrd_addr.offset() as u32;
264         params.hdr.ramdisk_size = initrd_size as u32;
265     }
266 
267     add_e820_entry(&mut params, 0, EBDA_START, E820_RAM)?;
268 
269     let mem_end = guest_mem.end_addr();
270     if mem_end < end_32bit_gap_start {
271         add_e820_entry(
272             &mut params,
273             kernel_addr.offset() as u64,
274             mem_end.offset_from(kernel_addr) as u64,
275             E820_RAM,
276         )?;
277     } else {
278         add_e820_entry(
279             &mut params,
280             kernel_addr.offset() as u64,
281             end_32bit_gap_start.offset_from(kernel_addr) as u64,
282             E820_RAM,
283         )?;
284         if mem_end > first_addr_past_32bits {
285             add_e820_entry(
286                 &mut params,
287                 first_addr_past_32bits.offset() as u64,
288                 mem_end.offset_from(first_addr_past_32bits) as u64,
289                 E820_RAM,
290             )?;
291         }
292     }
293 
294     let zero_page_addr = GuestAddress(ZERO_PAGE_OFFSET);
295     guest_mem
296         .checked_offset(zero_page_addr, mem::size_of::<boot_params>() as u64)
297         .ok_or(Error::ZeroPagePastRamEnd)?;
298     guest_mem
299         .write_obj_at_addr(params, zero_page_addr)
300         .map_err(|_| Error::ZeroPageSetup)?;
301 
302     Ok(())
303 }
304 
305 /// Add an e820 region to the e820 map.
306 /// Returns Ok(()) if successful, or an error if there is no space left in the map.
add_e820_entry(params: &mut boot_params, addr: u64, size: u64, mem_type: u32) -> Result<()>307 fn add_e820_entry(params: &mut boot_params, addr: u64, size: u64, mem_type: u32) -> Result<()> {
308     if params.e820_entries >= params.e820_table.len() as u8 {
309         return Err(Error::E820Configuration);
310     }
311 
312     params.e820_table[params.e820_entries as usize].addr = addr;
313     params.e820_table[params.e820_entries as usize].size = size;
314     params.e820_table[params.e820_entries as usize].type_ = mem_type;
315     params.e820_entries += 1;
316 
317     Ok(())
318 }
319 
320 /// Returns a Vec of the valid memory addresses.
321 /// These should be used to configure the GuestMemory structure for the platform.
322 /// For x86_64 all addresses are valid from the start of the kernel except a
323 /// carve out at the end of 32bit address space.
arch_memory_regions(size: u64, bios_size: Option<u64>) -> Vec<(GuestAddress, u64)>324 fn arch_memory_regions(size: u64, bios_size: Option<u64>) -> Vec<(GuestAddress, u64)> {
325     let mem_end = GuestAddress(size);
326     let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS);
327     let end_32bit_gap_start = GuestAddress(END_ADDR_BEFORE_32BITS);
328 
329     let mut regions = Vec::new();
330     if mem_end <= end_32bit_gap_start {
331         regions.push((GuestAddress(0), size));
332         if let Some(bios_size) = bios_size {
333             regions.push((bios_start(bios_size), bios_size));
334         }
335     } else {
336         regions.push((GuestAddress(0), end_32bit_gap_start.offset()));
337         if let Some(bios_size) = bios_size {
338             regions.push((bios_start(bios_size), bios_size));
339         }
340         regions.push((
341             first_addr_past_32bits,
342             mem_end.offset_from(end_32bit_gap_start),
343         ));
344     }
345 
346     regions
347 }
348 
349 impl arch::LinuxArch for X8664arch {
350     type Error = Error;
351 
guest_memory_layout( components: &VmComponents, ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error>352     fn guest_memory_layout(
353         components: &VmComponents,
354     ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error> {
355         let bios_size = match &components.vm_image {
356             VmImage::Bios(bios_file) => Some(bios_file.metadata().map_err(Error::LoadBios)?.len()),
357             VmImage::Kernel(_) => None,
358         };
359         Ok(arch_memory_regions(components.memory_size, bios_size))
360     }
361 
build_vm<V, Vcpu, I, FD, FI, E1, E2>( mut components: VmComponents, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (&Option<BatteryType>, Option<Minijail>), mut vm: V, create_devices: FD, create_irq_chip: FI, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu, I>, Self::Error> where V: VmX86_64, Vcpu: VcpuX86_64, I: IrqChipX86_64, FD: FnOnce( &GuestMemory, &mut V, &mut SystemAllocator, &Event, ) -> std::result::Result<Vec<(Box<dyn PciDevice>, Option<Minijail>)>, E1>, FI: FnOnce(&V, usize) -> std::result::Result<I, E2>, E1: StdError + 'static, E2: StdError + 'static,362     fn build_vm<V, Vcpu, I, FD, FI, E1, E2>(
363         mut components: VmComponents,
364         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
365         serial_jail: Option<Minijail>,
366         battery: (&Option<BatteryType>, Option<Minijail>),
367         mut vm: V,
368         create_devices: FD,
369         create_irq_chip: FI,
370     ) -> std::result::Result<RunnableLinuxVm<V, Vcpu, I>, Self::Error>
371     where
372         V: VmX86_64,
373         Vcpu: VcpuX86_64,
374         I: IrqChipX86_64,
375         FD: FnOnce(
376             &GuestMemory,
377             &mut V,
378             &mut SystemAllocator,
379             &Event,
380         ) -> std::result::Result<Vec<(Box<dyn PciDevice>, Option<Minijail>)>, E1>,
381         FI: FnOnce(&V, /* vcpu_count: */ usize) -> std::result::Result<I, E2>,
382         E1: StdError + 'static,
383         E2: StdError + 'static,
384     {
385         if components.protected_vm != ProtectionType::Unprotected {
386             return Err(Error::UnsupportedProtectionType);
387         }
388 
389         let mem = vm.get_memory().clone();
390         let mut resources = Self::get_resource_allocator(&mem);
391 
392         let vcpu_count = components.vcpu_count;
393         let mut irq_chip =
394             create_irq_chip(&vm, vcpu_count).map_err(|e| Error::CreateIrqChip(Box::new(e)))?;
395 
396         let tss_addr = GuestAddress(TSS_ADDR);
397         vm.set_tss_addr(tss_addr).map_err(Error::SetTssAddr)?;
398 
399         let mut mmio_bus = devices::Bus::new();
400 
401         let exit_evt = Event::new().map_err(Error::CreateEvent)?;
402 
403         let pci_devices = create_devices(&mem, &mut vm, &mut resources, &exit_evt)
404             .map_err(|e| Error::CreateDevices(Box::new(e)))?;
405         let (pci, pci_irqs, pid_debug_label_map) = arch::generate_pci_root(
406             pci_devices,
407             &mut irq_chip,
408             &mut mmio_bus,
409             &mut resources,
410             &mut vm,
411             4, // Share the four pin interrupts (INTx#)
412         )
413         .map_err(Error::CreatePciRoot)?;
414         let pci_bus = Arc::new(Mutex::new(PciConfigIo::new(pci)));
415 
416         // Event used to notify crosvm that guest OS is trying to suspend.
417         let suspend_evt = Event::new().map_err(Error::CreateEvent)?;
418 
419         let mut io_bus = Self::setup_io_bus(
420             irq_chip.pit_uses_speaker_port(),
421             exit_evt.try_clone().map_err(Error::CloneEvent)?,
422             Some(pci_bus),
423             components.memory_size,
424         )?;
425 
426         Self::setup_serial_devices(
427             components.protected_vm,
428             &mut irq_chip,
429             &mut io_bus,
430             serial_parameters,
431             serial_jail,
432         )?;
433 
434         let (acpi_dev_resource, bat_control) = Self::setup_acpi_devices(
435             &mut io_bus,
436             &mut resources,
437             suspend_evt.try_clone().map_err(Error::CloneEvent)?,
438             exit_evt.try_clone().map_err(Error::CloneEvent)?,
439             components.acpi_sdts,
440             &mut irq_chip,
441             battery,
442             &mut mmio_bus,
443         )?;
444 
445         let ramoops_region = match components.pstore {
446             Some(pstore) => Some(
447                 arch::pstore::create_memory_region(&mut vm, &mut resources, &pstore)
448                     .map_err(Error::Pstore)?,
449             ),
450             None => None,
451         };
452 
453         irq_chip
454             .finalize_devices(&mut resources, &mut io_bus, &mut mmio_bus)
455             .map_err(Error::RegisterIrqfd)?;
456 
457         // All of these bios generated tables are set manually for the benefit of the kernel boot
458         // flow (since there's no BIOS to set it) and for the BIOS boot flow since crosvm doesn't
459         // have a way to pass the BIOS these configs.
460         // This works right now because the only guest BIOS used with crosvm (u-boot) ignores these
461         // tables and the guest OS picks them up.
462         // If another guest does need a way to pass these tables down to it's BIOS, this approach
463         // should be rethought.
464 
465         // Note that this puts the mptable at 0x9FC00 in guest physical memory.
466         mptable::setup_mptable(&mem, vcpu_count as u8, pci_irqs).map_err(Error::SetupMptable)?;
467         smbios::setup_smbios(&mem, components.dmi_path).map_err(Error::SetupSmbios)?;
468 
469         // TODO (tjeznach) Write RSDP to bootconfig before writing to memory
470         acpi::create_acpi_tables(&mem, vcpu_count as u8, X86_64_SCI_IRQ, acpi_dev_resource);
471 
472         match components.vm_image {
473             VmImage::Bios(ref mut bios) => Self::load_bios(&mem, bios)?,
474             VmImage::Kernel(ref mut kernel_image) => {
475                 let mut cmdline = Self::get_base_linux_cmdline();
476 
477                 get_serial_cmdline(&mut cmdline, serial_parameters, "io")
478                     .map_err(Error::GetSerialCmdline)?;
479 
480                 for param in components.extra_kernel_params {
481                     cmdline.insert_str(&param).map_err(Error::Cmdline)?;
482                 }
483                 // It seems that default record_size is only 4096 byte even if crosvm allocates
484                 // more memory. It means that one crash can only 4096 byte.
485                 // Set record_size and console_size to 1/4 of allocated memory size.
486                 // This configulation is same as the host.
487                 if let Some(ramoops_region) = ramoops_region {
488                     let ramoops_opts = [
489                         ("mem_address", ramoops_region.address),
490                         ("mem_size", ramoops_region.size as u64),
491                         ("console_size", (ramoops_region.size / 4) as u64),
492                         ("record_size", (ramoops_region.size / 4) as u64),
493                         ("dump_oops", 1_u64),
494                     ];
495                     for (name, val) in &ramoops_opts {
496                         cmdline
497                             .insert_str(format!("ramoops.{}={:#x}", name, val))
498                             .map_err(Error::Cmdline)?;
499                     }
500                 }
501 
502                 // separate out load_kernel from other setup to get a specific error for
503                 // kernel loading
504                 let (params, kernel_end) = Self::load_kernel(&mem, kernel_image)?;
505 
506                 Self::setup_system_memory(
507                     &mem,
508                     components.memory_size,
509                     &CString::new(cmdline).unwrap(),
510                     components.initrd_image,
511                     components.android_fstab,
512                     kernel_end,
513                     params,
514                 )?;
515             }
516         }
517 
518         Ok(RunnableLinuxVm {
519             vm,
520             resources,
521             exit_evt,
522             vcpu_count,
523             vcpus: None,
524             vcpu_affinity: components.vcpu_affinity,
525             no_smt: components.no_smt,
526             irq_chip,
527             has_bios: matches!(components.vm_image, VmImage::Bios(_)),
528             io_bus,
529             mmio_bus,
530             pid_debug_label_map,
531             suspend_evt,
532             rt_cpus: components.rt_cpus,
533             bat_control,
534             #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
535             gdb: components.gdb,
536         })
537     }
538 
configure_vcpu( guest_mem: &GuestMemory, hypervisor: &dyn HypervisorX86_64, irq_chip: &mut dyn IrqChipX86_64, vcpu: &mut dyn VcpuX86_64, vcpu_id: usize, num_cpus: usize, has_bios: bool, no_smt: bool, ) -> Result<()>539     fn configure_vcpu(
540         guest_mem: &GuestMemory,
541         hypervisor: &dyn HypervisorX86_64,
542         irq_chip: &mut dyn IrqChipX86_64,
543         vcpu: &mut dyn VcpuX86_64,
544         vcpu_id: usize,
545         num_cpus: usize,
546         has_bios: bool,
547         no_smt: bool,
548     ) -> Result<()> {
549         cpuid::setup_cpuid(hypervisor, irq_chip, vcpu, vcpu_id, num_cpus, no_smt)
550             .map_err(Error::SetupCpuid)?;
551 
552         if has_bios {
553             return Ok(());
554         }
555 
556         let kernel_load_addr = GuestAddress(KERNEL_START_OFFSET);
557         regs::setup_msrs(vcpu, END_ADDR_BEFORE_32BITS).map_err(Error::SetupMsrs)?;
558         let kernel_end = guest_mem
559             .checked_offset(kernel_load_addr, KERNEL_64BIT_ENTRY_OFFSET)
560             .ok_or(Error::KernelOffsetPastEnd)?;
561         regs::setup_regs(
562             vcpu,
563             (kernel_end).offset() as u64,
564             BOOT_STACK_POINTER as u64,
565             ZERO_PAGE_OFFSET as u64,
566         )
567         .map_err(Error::SetupRegs)?;
568         regs::setup_fpu(vcpu).map_err(Error::SetupFpu)?;
569         regs::setup_sregs(guest_mem, vcpu).map_err(Error::SetupSregs)?;
570         interrupts::set_lint(vcpu_id, irq_chip).map_err(Error::SetLint)?;
571 
572         Ok(())
573     }
574 
575     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_read_registers<T: VcpuX86_64>(vcpu: &T) -> Result<X86_64CoreRegs>576     fn debug_read_registers<T: VcpuX86_64>(vcpu: &T) -> Result<X86_64CoreRegs> {
577         // General registers: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15
578         let gregs = vcpu.get_regs().map_err(Error::ReadRegs)?;
579         let regs = [
580             gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rbp, gregs.rsp,
581             gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, gregs.r15,
582         ];
583 
584         // GDB exposes 32-bit eflags instead of 64-bit rflags.
585         // https://github.com/bminor/binutils-gdb/blob/master/gdb/features/i386/64bit-core.xml
586         let eflags = gregs.rflags as u32;
587         let rip = gregs.rip;
588 
589         // Segment registers: CS, SS, DS, ES, FS, GS
590         let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
591         let sgs = [sregs.cs, sregs.ss, sregs.ds, sregs.es, sregs.fs, sregs.gs];
592         let mut segments = [0u32; 6];
593         // GDB uses only the selectors.
594         for i in 0..sgs.len() {
595             segments[i] = sgs[i].selector as u32;
596         }
597 
598         // TODO(keiichiw): Other registers such as FPU, xmm and mxcsr.
599 
600         Ok(X86_64CoreRegs {
601             regs,
602             eflags,
603             rip,
604             segments,
605             ..Default::default()
606         })
607     }
608 
609     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_write_registers<T: VcpuX86_64>(vcpu: &T, regs: &X86_64CoreRegs) -> Result<()>610     fn debug_write_registers<T: VcpuX86_64>(vcpu: &T, regs: &X86_64CoreRegs) -> Result<()> {
611         // General purpose registers (RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15) + RIP + rflags
612         let orig_gregs = vcpu.get_regs().map_err(Error::ReadRegs)?;
613         let gregs = Regs {
614             rax: regs.regs[0],
615             rbx: regs.regs[1],
616             rcx: regs.regs[2],
617             rdx: regs.regs[3],
618             rsi: regs.regs[4],
619             rdi: regs.regs[5],
620             rbp: regs.regs[6],
621             rsp: regs.regs[7],
622             r8: regs.regs[8],
623             r9: regs.regs[9],
624             r10: regs.regs[10],
625             r11: regs.regs[11],
626             r12: regs.regs[12],
627             r13: regs.regs[13],
628             r14: regs.regs[14],
629             r15: regs.regs[15],
630             rip: regs.rip,
631             // Update the lower 32 bits of rflags.
632             rflags: (orig_gregs.rflags & !(u32::MAX as u64)) | (regs.eflags as u64),
633         };
634         vcpu.set_regs(&gregs).map_err(Error::WriteRegs)?;
635 
636         // Segment registers: CS, SS, DS, ES, FS, GS
637         // Since GDB care only selectors, we call get_sregs() first.
638         let mut sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
639         sregs.cs.selector = regs.segments[0] as u16;
640         sregs.ss.selector = regs.segments[1] as u16;
641         sregs.ds.selector = regs.segments[2] as u16;
642         sregs.es.selector = regs.segments[3] as u16;
643         sregs.fs.selector = regs.segments[4] as u16;
644         sregs.gs.selector = regs.segments[5] as u16;
645 
646         vcpu.set_sregs(&sregs).map_err(Error::WriteRegs)?;
647 
648         // TODO(keiichiw): Other registers such as FPU, xmm and mxcsr.
649 
650         Ok(())
651     }
652 
653     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_read_memory<T: VcpuX86_64>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>654     fn debug_read_memory<T: VcpuX86_64>(
655         vcpu: &T,
656         guest_mem: &GuestMemory,
657         vaddr: GuestAddress,
658         len: usize,
659     ) -> Result<Vec<u8>> {
660         let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
661         let mut buf = vec![0; len];
662         let mut total_read = 0u64;
663         // Handle reads across page boundaries.
664 
665         while total_read < len as u64 {
666             let (paddr, psize) = phys_addr(guest_mem, vaddr.0 + total_read, &sregs)?;
667             let read_len = std::cmp::min(len as u64 - total_read, psize - (paddr & (psize - 1)));
668             guest_mem
669                 .get_slice_at_addr(GuestAddress(paddr), read_len as usize)
670                 .map_err(Error::ReadingGuestMemory)?
671                 .copy_to(&mut buf[total_read as usize..]);
672             total_read += read_len;
673         }
674         Ok(buf)
675     }
676 
677     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_write_memory<T: VcpuX86_64>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>678     fn debug_write_memory<T: VcpuX86_64>(
679         vcpu: &T,
680         guest_mem: &GuestMemory,
681         vaddr: GuestAddress,
682         buf: &[u8],
683     ) -> Result<()> {
684         let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
685         let mut total_written = 0u64;
686         // Handle writes across page boundaries.
687         while total_written < buf.len() as u64 {
688             let (paddr, psize) = phys_addr(guest_mem, vaddr.0 + total_written, &sregs)?;
689             let write_len = std::cmp::min(
690                 buf.len() as u64 - total_written,
691                 psize - (paddr & (psize - 1)),
692             );
693 
694             guest_mem
695                 .write_all_at_addr(
696                     &buf[total_written as usize..(total_written as usize + write_len as usize)],
697                     GuestAddress(paddr),
698                 )
699                 .map_err(Error::WritingGuestMemory)?;
700             total_written += write_len;
701         }
702         Ok(())
703     }
704 
705     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_enable_singlestep<T: VcpuX86_64>(vcpu: &T) -> Result<()>706     fn debug_enable_singlestep<T: VcpuX86_64>(vcpu: &T) -> Result<()> {
707         vcpu.set_guest_debug(&[], true /* enable_singlestep */)
708             .map_err(Error::EnableSinglestep)
709     }
710 
711     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_set_hw_breakpoints<T: VcpuX86_64>( vcpu: &T, breakpoints: &[GuestAddress], ) -> Result<()>712     fn debug_set_hw_breakpoints<T: VcpuX86_64>(
713         vcpu: &T,
714         breakpoints: &[GuestAddress],
715     ) -> Result<()> {
716         vcpu.set_guest_debug(&breakpoints, false /* enable_singlestep */)
717             .map_err(Error::SetHwBreakpoint)
718     }
719 }
720 
721 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
722 // return the translated address and the size of the page it resides in.
phys_addr(mem: &GuestMemory, vaddr: u64, sregs: &Sregs) -> Result<(u64, u64)>723 fn phys_addr(mem: &GuestMemory, vaddr: u64, sregs: &Sregs) -> Result<(u64, u64)> {
724     const CR0_PG_MASK: u64 = 1 << 31;
725     const CR4_PAE_MASK: u64 = 1 << 5;
726     const CR4_LA57_MASK: u64 = 1 << 12;
727     const MSR_EFER_LMA: u64 = 1 << 10;
728     // bits 12 through 51 are the address in a PTE.
729     const PTE_ADDR_MASK: u64 = ((1 << 52) - 1) & !0x0fff;
730     const PAGE_PRESENT: u64 = 0x1;
731     const PAGE_PSE_MASK: u64 = 0x1 << 7;
732 
733     const PAGE_SIZE_4K: u64 = 4 * 1024;
734     const PAGE_SIZE_2M: u64 = 2 * 1024 * 1024;
735     const PAGE_SIZE_1G: u64 = 1024 * 1024 * 1024;
736 
737     fn next_pte(mem: &GuestMemory, curr_table_addr: u64, vaddr: u64, level: usize) -> Result<u64> {
738         let ent: u64 = mem
739             .read_obj_from_addr(GuestAddress(
740                 (curr_table_addr & PTE_ADDR_MASK) + page_table_offset(vaddr, level),
741             ))
742             .map_err(|_| Error::TranslatingVirtAddr)?;
743         /* TODO - convert to a trace
744         println!(
745             "level {} vaddr {:x} table-addr {:x} mask {:x} ent {:x} offset {:x}",
746             level,
747             vaddr,
748             curr_table_addr,
749             PTE_ADDR_MASK,
750             ent,
751             page_table_offset(vaddr, level)
752         );
753         */
754         if ent & PAGE_PRESENT == 0 {
755             return Err(Error::PageNotPresent);
756         }
757         Ok(ent)
758     }
759 
760     // Get the offset in to the page of `vaddr`.
761     fn page_offset(vaddr: u64, page_size: u64) -> u64 {
762         vaddr & (page_size - 1)
763     }
764 
765     // Get the offset in to the page table of the given `level` specified by the virtual `address`.
766     // `level` is 1 through 5 in x86_64 to handle the five levels of paging.
767     fn page_table_offset(addr: u64, level: usize) -> u64 {
768         let offset = (level - 1) * 9 + 12;
769         ((addr >> offset) & 0x1ff) << 3
770     }
771 
772     if sregs.cr0 & CR0_PG_MASK == 0 {
773         return Ok((vaddr, PAGE_SIZE_4K));
774     }
775 
776     if sregs.cr4 & CR4_PAE_MASK == 0 {
777         return Err(Error::TranslatingVirtAddr);
778     }
779 
780     if sregs.efer & MSR_EFER_LMA != 0 {
781         // TODO - check LA57
782         if sregs.cr4 & CR4_LA57_MASK != 0 {}
783         let p4_ent = next_pte(mem, sregs.cr3, vaddr, 4)?;
784         let p3_ent = next_pte(mem, p4_ent, vaddr, 3)?;
785         // TODO check if it's a 1G page with the PSE bit in p2_ent
786         if p3_ent & PAGE_PSE_MASK != 0 {
787             // It's a 1G page with the PSE bit in p3_ent
788             let paddr = p3_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_1G);
789             return Ok((paddr, PAGE_SIZE_1G));
790         }
791         let p2_ent = next_pte(mem, p3_ent, vaddr, 2)?;
792         if p2_ent & PAGE_PSE_MASK != 0 {
793             // It's a 2M page with the PSE bit in p2_ent
794             let paddr = p2_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_2M);
795             return Ok((paddr, PAGE_SIZE_2M));
796         }
797         let p1_ent = next_pte(mem, p2_ent, vaddr, 1)?;
798         let paddr = p1_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_4K);
799         return Ok((paddr, PAGE_SIZE_4K));
800     }
801     Err(Error::TranslatingVirtAddr)
802 }
803 
804 impl X8664arch {
805     /// Loads the bios from an open file.
806     ///
807     /// # Arguments
808     ///
809     /// * `mem` - The memory to be used by the guest.
810     /// * `bios_image` - the File object for the specified bios
load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()>811     fn load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()> {
812         let bios_image_length = bios_image
813             .seek(io::SeekFrom::End(0))
814             .map_err(Error::LoadBios)?;
815         if bios_image_length >= FIRST_ADDR_PAST_32BITS {
816             return Err(Error::LoadBios(io::Error::new(
817                 io::ErrorKind::InvalidData,
818                 format!(
819                     "bios was {} bytes, expected less than {}",
820                     bios_image_length, FIRST_ADDR_PAST_32BITS,
821                 ),
822             )));
823         }
824         bios_image
825             .seek(io::SeekFrom::Start(0))
826             .map_err(Error::LoadBios)?;
827         mem.read_to_memory(
828             bios_start(bios_image_length),
829             bios_image,
830             bios_image_length as usize,
831         )
832         .map_err(Error::SetupGuestMemory)?;
833         Ok(())
834     }
835 
836     /// Loads the kernel from an open file.
837     ///
838     /// # Arguments
839     ///
840     /// * `mem` - The memory to be used by the guest.
841     /// * `kernel_image` - the File object for the specified kernel.
load_kernel(mem: &GuestMemory, kernel_image: &mut File) -> Result<(boot_params, u64)>842     fn load_kernel(mem: &GuestMemory, kernel_image: &mut File) -> Result<(boot_params, u64)> {
843         let elf_result =
844             kernel_loader::load_kernel(mem, GuestAddress(KERNEL_START_OFFSET), kernel_image);
845         if elf_result == Err(kernel_loader::Error::InvalidElfMagicNumber) {
846             bzimage::load_bzimage(mem, GuestAddress(KERNEL_START_OFFSET), kernel_image)
847                 .map_err(Error::LoadBzImage)
848         } else {
849             let kernel_end = elf_result.map_err(Error::LoadKernel)?;
850             Ok((Default::default(), kernel_end))
851         }
852     }
853 
854     /// Configures the system memory space should be called once per vm before
855     /// starting vcpu threads.
856     ///
857     /// # Arguments
858     ///
859     /// * `mem` - The memory to be used by the guest.
860     /// * `cmdline` - the kernel commandline
861     /// * `initrd_file` - an initial ramdisk image
setup_system_memory( mem: &GuestMemory, mem_size: u64, cmdline: &CStr, initrd_file: Option<File>, android_fstab: Option<File>, kernel_end: u64, params: boot_params, ) -> Result<()>862     fn setup_system_memory(
863         mem: &GuestMemory,
864         mem_size: u64,
865         cmdline: &CStr,
866         initrd_file: Option<File>,
867         android_fstab: Option<File>,
868         kernel_end: u64,
869         params: boot_params,
870     ) -> Result<()> {
871         kernel_loader::load_cmdline(mem, GuestAddress(CMDLINE_OFFSET), cmdline)
872             .map_err(Error::LoadCmdline)?;
873 
874         // Track the first free address after the kernel - this is where extra
875         // data like the device tree blob and initrd will be loaded.
876         let mut free_addr = kernel_end;
877 
878         let setup_data = if let Some(android_fstab) = android_fstab {
879             let free_addr_aligned = (((free_addr + 64 - 1) / 64) * 64) + 64;
880             let dtb_start = GuestAddress(free_addr_aligned);
881             let dtb_size = fdt::create_fdt(
882                 X86_64_FDT_MAX_SIZE as usize,
883                 mem,
884                 dtb_start.offset(),
885                 android_fstab,
886             )
887             .map_err(Error::CreateFdt)?;
888             free_addr = dtb_start.offset() + dtb_size as u64;
889             Some(dtb_start)
890         } else {
891             None
892         };
893 
894         let initrd = match initrd_file {
895             Some(mut initrd_file) => {
896                 let mut initrd_addr_max = u64::from(params.hdr.initrd_addr_max);
897                 // Default initrd_addr_max for old kernels (see Documentation/x86/boot.txt).
898                 if initrd_addr_max == 0 {
899                     initrd_addr_max = 0x37FFFFFF;
900                 }
901 
902                 let mem_max = mem.end_addr().offset() - 1;
903                 if initrd_addr_max > mem_max {
904                     initrd_addr_max = mem_max;
905                 }
906 
907                 let (initrd_start, initrd_size) = arch::load_image_high(
908                     mem,
909                     &mut initrd_file,
910                     GuestAddress(free_addr),
911                     GuestAddress(initrd_addr_max),
912                     base::pagesize() as u64,
913                 )
914                 .map_err(Error::LoadInitrd)?;
915                 Some((initrd_start, initrd_size))
916             }
917             None => None,
918         };
919 
920         configure_system(
921             mem,
922             mem_size,
923             GuestAddress(KERNEL_START_OFFSET),
924             GuestAddress(CMDLINE_OFFSET),
925             cmdline.to_bytes().len() + 1,
926             setup_data,
927             initrd,
928             params,
929         )?;
930         Ok(())
931     }
932 
933     /// This returns the start address of high mmio
934     ///
935     /// # Arguments
936     ///
937     /// * mem: The memory to be used by the guest
get_high_mmio_base(mem: &GuestMemory) -> u64938     fn get_high_mmio_base(mem: &GuestMemory) -> u64 {
939         // Put device memory at a 2MB boundary after physical memory or 4gb, whichever is greater.
940         const MB: u64 = 1 << 20;
941         const GB: u64 = 1 << 30;
942         let ram_end_round_2mb = (mem.end_addr().offset() + 2 * MB - 1) / (2 * MB) * (2 * MB);
943         std::cmp::max(ram_end_round_2mb, 4 * GB)
944     }
945 
946     /// This returns a minimal kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline947     fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
948         let mut cmdline = kernel_cmdline::Cmdline::new(CMDLINE_MAX_SIZE as usize);
949         cmdline.insert_str("pci=noacpi reboot=k panic=-1").unwrap();
950 
951         cmdline
952     }
953 
954     /// Returns a system resource allocator.
get_resource_allocator(mem: &GuestMemory) -> SystemAllocator955     fn get_resource_allocator(mem: &GuestMemory) -> SystemAllocator {
956         let high_mmio_start = Self::get_high_mmio_base(mem);
957         SystemAllocator::builder()
958             .add_io_addresses(0xc000, 0x10000)
959             .add_low_mmio_addresses(END_ADDR_BEFORE_32BITS, MMIO_SIZE)
960             .add_high_mmio_addresses(high_mmio_start, u64::max_value() - high_mmio_start)
961             .create_allocator(X86_64_IRQ_BASE)
962             .unwrap()
963     }
964 
965     /// Sets up the IO bus for this platform
966     ///
967     /// # Arguments
968     ///
969     /// * - `pit_uses_speaker_port` - does the PIT use port 0x61 for the PC speaker
970     /// * - `exit_evt` - the event object which should receive exit events
971     /// * - `mem_size` - the size in bytes of physical ram for the guest
setup_io_bus( pit_uses_speaker_port: bool, exit_evt: Event, pci: Option<Arc<Mutex<devices::PciConfigIo>>>, mem_size: u64, ) -> Result<devices::Bus>972     fn setup_io_bus(
973         pit_uses_speaker_port: bool,
974         exit_evt: Event,
975         pci: Option<Arc<Mutex<devices::PciConfigIo>>>,
976         mem_size: u64,
977     ) -> Result<devices::Bus> {
978         struct NoDevice;
979         impl devices::BusDevice for NoDevice {
980             fn debug_label(&self) -> String {
981                 "no device".to_owned()
982             }
983         }
984 
985         let mut io_bus = devices::Bus::new();
986 
987         let mem_regions = arch_memory_regions(mem_size, None);
988 
989         let mem_below_4g = mem_regions
990             .iter()
991             .filter(|r| r.0.offset() < FIRST_ADDR_PAST_32BITS)
992             .map(|r| r.1)
993             .sum();
994 
995         let mem_above_4g = mem_regions
996             .iter()
997             .filter(|r| r.0.offset() >= FIRST_ADDR_PAST_32BITS)
998             .map(|r| r.1)
999             .sum();
1000 
1001         io_bus
1002             .insert(
1003                 Arc::new(Mutex::new(devices::Cmos::new(mem_below_4g, mem_above_4g))),
1004                 0x70,
1005                 0x2,
1006             )
1007             .unwrap();
1008 
1009         let nul_device = Arc::new(Mutex::new(NoDevice));
1010         let i8042 = Arc::new(Mutex::new(devices::I8042Device::new(
1011             exit_evt.try_clone().map_err(Error::CloneEvent)?,
1012         )));
1013 
1014         if pit_uses_speaker_port {
1015             io_bus.insert(i8042, 0x062, 0x3).unwrap();
1016         } else {
1017             io_bus.insert(i8042, 0x061, 0x4).unwrap();
1018         }
1019 
1020         io_bus.insert(nul_device.clone(), 0x0ed, 0x1).unwrap(); // most likely this one does nothing
1021         io_bus.insert(nul_device.clone(), 0x0f0, 0x2).unwrap(); // ignore fpu
1022 
1023         if let Some(pci_root) = pci {
1024             io_bus.insert(pci_root, 0xcf8, 0x8).unwrap();
1025         } else {
1026             // ignore pci.
1027             io_bus.insert(nul_device, 0xcf8, 0x8).unwrap();
1028         }
1029 
1030         Ok(io_bus)
1031     }
1032 
1033     /// Sets up the acpi devices for this platform and
1034     /// return the resources which is used to set the ACPI tables.
1035     ///
1036     /// # Arguments
1037     ///
1038     /// * - `io_bus` the I/O bus to add the devices to
1039     /// * - `resources` the SystemAllocator to allocate IO and MMIO for acpi
1040     ///                devices.
1041     /// * - `suspend_evt` the event object which used to suspend the vm
1042     /// * - `sdts` ACPI system description tables
1043     /// * - `irq_chip` the IrqChip object for registering irq events
1044     /// * - `battery` indicate whether to create the battery
1045     /// * - `mmio_bus` the MMIO bus to add the devices to
setup_acpi_devices( io_bus: &mut devices::Bus, resources: &mut SystemAllocator, suspend_evt: Event, exit_evt: Event, sdts: Vec<SDT>, irq_chip: &mut impl IrqChip, battery: (&Option<BatteryType>, Option<Minijail>), mmio_bus: &mut devices::Bus, ) -> Result<(acpi::ACPIDevResource, Option<BatControl>)>1046     fn setup_acpi_devices(
1047         io_bus: &mut devices::Bus,
1048         resources: &mut SystemAllocator,
1049         suspend_evt: Event,
1050         exit_evt: Event,
1051         sdts: Vec<SDT>,
1052         irq_chip: &mut impl IrqChip,
1053         battery: (&Option<BatteryType>, Option<Minijail>),
1054         mmio_bus: &mut devices::Bus,
1055     ) -> Result<(acpi::ACPIDevResource, Option<BatControl>)> {
1056         // The AML data for the acpi devices
1057         let mut amls = Vec::new();
1058 
1059         let pm_alloc = resources.get_anon_alloc();
1060         let pm_iobase = match resources.io_allocator() {
1061             Some(io) => io
1062                 .allocate_with_align(
1063                     devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1064                     pm_alloc,
1065                     "ACPIPM".to_string(),
1066                     devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1067                 )
1068                 .map_err(Error::AllocateIOResouce)?,
1069             None => 0x600,
1070         };
1071 
1072         let pmresource = devices::ACPIPMResource::new(suspend_evt, exit_evt);
1073         Aml::to_aml_bytes(&pmresource, &mut amls);
1074         let pm = Arc::new(Mutex::new(pmresource));
1075         io_bus
1076             .insert(
1077                 pm.clone(),
1078                 pm_iobase as u64,
1079                 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1080             )
1081             .unwrap();
1082         io_bus.notify_on_resume(pm);
1083 
1084         let bat_control = if let Some(battery_type) = battery.0 {
1085             match battery_type {
1086                 BatteryType::Goldfish => {
1087                     let control_tube = arch::add_goldfish_battery(
1088                         &mut amls,
1089                         battery.1,
1090                         mmio_bus,
1091                         irq_chip,
1092                         X86_64_SCI_IRQ,
1093                         resources,
1094                     )
1095                     .map_err(Error::CreateBatDevices)?;
1096                     Some(BatControl {
1097                         type_: BatteryType::Goldfish,
1098                         control_tube,
1099                     })
1100                 }
1101             }
1102         } else {
1103             None
1104         };
1105 
1106         Ok((
1107             acpi::ACPIDevResource {
1108                 amls,
1109                 pm_iobase,
1110                 sdts,
1111             },
1112             bat_control,
1113         ))
1114     }
1115 
1116     /// Sets up the serial devices for this platform. Returns the serial port number and serial
1117     /// device to be used for stdout
1118     ///
1119     /// # Arguments
1120     ///
1121     /// * - `irq_chip` the IrqChip object for registering irq events
1122     /// * - `io_bus` the I/O bus to add the devices to
1123     /// * - `serial_parmaters` - definitions for how the serial devices should be configured
setup_serial_devices( protected_vm: ProtectionType, irq_chip: &mut impl IrqChip, io_bus: &mut devices::Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, ) -> Result<()>1124     fn setup_serial_devices(
1125         protected_vm: ProtectionType,
1126         irq_chip: &mut impl IrqChip,
1127         io_bus: &mut devices::Bus,
1128         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
1129         serial_jail: Option<Minijail>,
1130     ) -> Result<()> {
1131         let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?;
1132         let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?;
1133 
1134         arch::add_serial_devices(
1135             protected_vm,
1136             io_bus,
1137             &com_evt_1_3,
1138             &com_evt_2_4,
1139             &serial_parameters,
1140             serial_jail,
1141         )
1142         .map_err(Error::CreateSerialDevices)?;
1143 
1144         irq_chip
1145             .register_irq_event(X86_64_SERIAL_1_3_IRQ, &com_evt_1_3, None)
1146             .map_err(Error::RegisterIrqfd)?;
1147         irq_chip
1148             .register_irq_event(X86_64_SERIAL_2_4_IRQ, &com_evt_2_4, None)
1149             .map_err(Error::RegisterIrqfd)?;
1150 
1151         Ok(())
1152     }
1153 }
1154 
1155 #[cfg(test)]
1156 mod test_integration;
1157 
1158 #[cfg(test)]
1159 mod tests {
1160     use super::*;
1161 
1162     #[test]
regions_lt_4gb_nobios()1163     fn regions_lt_4gb_nobios() {
1164         let regions = arch_memory_regions(1u64 << 29, /* bios_size */ None);
1165         assert_eq!(1, regions.len());
1166         assert_eq!(GuestAddress(0), regions[0].0);
1167         assert_eq!(1u64 << 29, regions[0].1);
1168     }
1169 
1170     #[test]
regions_gt_4gb_nobios()1171     fn regions_gt_4gb_nobios() {
1172         let regions = arch_memory_regions((1u64 << 32) + 0x8000, /* bios_size */ None);
1173         assert_eq!(2, regions.len());
1174         assert_eq!(GuestAddress(0), regions[0].0);
1175         assert_eq!(GuestAddress(1u64 << 32), regions[1].0);
1176     }
1177 
1178     #[test]
regions_lt_4gb_bios()1179     fn regions_lt_4gb_bios() {
1180         let bios_len = 1 << 20;
1181         let regions = arch_memory_regions(1u64 << 29, Some(bios_len));
1182         assert_eq!(2, regions.len());
1183         assert_eq!(GuestAddress(0), regions[0].0);
1184         assert_eq!(1u64 << 29, regions[0].1);
1185         assert_eq!(
1186             GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
1187             regions[1].0
1188         );
1189         assert_eq!(bios_len, regions[1].1);
1190     }
1191 
1192     #[test]
regions_gt_4gb_bios()1193     fn regions_gt_4gb_bios() {
1194         let bios_len = 1 << 20;
1195         let regions = arch_memory_regions((1u64 << 32) + 0x8000, Some(bios_len));
1196         assert_eq!(3, regions.len());
1197         assert_eq!(GuestAddress(0), regions[0].0);
1198         assert_eq!(
1199             GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
1200             regions[1].0
1201         );
1202         assert_eq!(bios_len, regions[1].1);
1203         assert_eq!(GuestAddress(1u64 << 32), regions[2].0);
1204     }
1205 
1206     #[test]
regions_eq_4gb_nobios()1207     fn regions_eq_4gb_nobios() {
1208         // Test with size = 3328, which is exactly 4 GiB minus the size of the gap (768 MiB).
1209         let regions = arch_memory_regions(3328 << 20, /* bios_size */ None);
1210         assert_eq!(1, regions.len());
1211         assert_eq!(GuestAddress(0), regions[0].0);
1212         assert_eq!(3328 << 20, regions[0].1);
1213     }
1214 
1215     #[test]
regions_eq_4gb_bios()1216     fn regions_eq_4gb_bios() {
1217         // Test with size = 3328, which is exactly 4 GiB minus the size of the gap (768 MiB).
1218         let bios_len = 1 << 20;
1219         let regions = arch_memory_regions(3328 << 20, Some(bios_len));
1220         assert_eq!(2, regions.len());
1221         assert_eq!(GuestAddress(0), regions[0].0);
1222         assert_eq!(3328 << 20, regions[0].1);
1223         assert_eq!(
1224             GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
1225             regions[1].0
1226         );
1227         assert_eq!(bios_len, regions[1].1);
1228     }
1229 }
1230