1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 mod fdt;
6
7 const E820_RAM: u32 = 1;
8 const SETUP_DTB: u32 = 2;
9 const X86_64_FDT_MAX_SIZE: u64 = 0x200000;
10
11 #[allow(dead_code)]
12 #[allow(non_upper_case_globals)]
13 #[allow(non_camel_case_types)]
14 #[allow(non_snake_case)]
15 mod bootparam;
16
17 // boot_params is just a series of ints, it is safe to initialize it.
18 unsafe impl data_model::DataInit for bootparam::boot_params {}
19
20 #[allow(dead_code)]
21 #[allow(non_upper_case_globals)]
22 mod msr_index;
23
24 #[allow(dead_code)]
25 #[allow(non_upper_case_globals)]
26 #[allow(non_camel_case_types)]
27 #[allow(clippy::all)]
28 mod mpspec;
29 // These mpspec types are only data, reading them from data is a safe initialization.
30 unsafe impl data_model::DataInit for mpspec::mpc_bus {}
31 unsafe impl data_model::DataInit for mpspec::mpc_cpu {}
32 unsafe impl data_model::DataInit for mpspec::mpc_intsrc {}
33 unsafe impl data_model::DataInit for mpspec::mpc_ioapic {}
34 unsafe impl data_model::DataInit for mpspec::mpc_table {}
35 unsafe impl data_model::DataInit for mpspec::mpc_lintsrc {}
36 unsafe impl data_model::DataInit for mpspec::mpf_intel {}
37
38 mod acpi;
39 mod bzimage;
40 mod cpuid;
41 mod gdt;
42 mod interrupts;
43 mod mptable;
44 mod regs;
45 mod smbios;
46
47 use std::collections::BTreeMap;
48 use std::error::Error as StdError;
49 use std::ffi::{CStr, CString};
50 use std::fmt::{self, Display};
51 use std::fs::File;
52 use std::io::{self, Seek};
53 use std::mem;
54 use std::sync::Arc;
55
56 use crate::bootparam::boot_params;
57 use acpi_tables::aml::Aml;
58 use acpi_tables::sdt::SDT;
59 use arch::{
60 get_serial_cmdline, GetSerialCmdlineError, RunnableLinuxVm, SerialHardware, SerialParameters,
61 VmComponents, VmImage,
62 };
63 use base::Event;
64 use devices::{IrqChip, IrqChipX86_64, PciConfigIo, PciDevice, ProtectionType};
65 use hypervisor::{HypervisorX86_64, VcpuX86_64, VmX86_64};
66 use minijail::Minijail;
67 use remain::sorted;
68 use resources::SystemAllocator;
69 use sync::Mutex;
70 use vm_control::{BatControl, BatteryType};
71 use vm_memory::{GuestAddress, GuestMemory, GuestMemoryError};
72 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
73 use {
74 gdbstub::arch::x86::reg::X86_64CoreRegs,
75 hypervisor::x86_64::{Regs, Sregs},
76 };
77
78 #[sorted]
79 #[derive(Debug)]
80 pub enum Error {
81 AllocateIOResouce(resources::Error),
82 AllocateIrq,
83 CloneEvent(base::Error),
84 Cmdline(kernel_cmdline::Error),
85 ConfigureSystem,
86 CreateBatDevices(arch::DeviceRegistrationError),
87 CreateDevices(Box<dyn StdError>),
88 CreateEvent(base::Error),
89 CreateFdt(arch::fdt::Error),
90 CreateIoapicDevice(base::Error),
91 CreateIrqChip(Box<dyn StdError>),
92 CreatePciRoot(arch::DeviceRegistrationError),
93 CreatePit(base::Error),
94 CreatePitDevice(devices::PitError),
95 CreateSerialDevices(arch::DeviceRegistrationError),
96 CreateSocket(io::Error),
97 CreateVcpu(base::Error),
98 CreateVm(Box<dyn StdError>),
99 E820Configuration,
100 EnableSinglestep(base::Error),
101 EnableSplitIrqchip(base::Error),
102 GetSerialCmdline(GetSerialCmdlineError),
103 KernelOffsetPastEnd,
104 LoadBios(io::Error),
105 LoadBzImage(bzimage::Error),
106 LoadCmdline(kernel_loader::Error),
107 LoadInitrd(arch::LoadImageError),
108 LoadKernel(kernel_loader::Error),
109 PageNotPresent,
110 Pstore(arch::pstore::Error),
111 ReadingGuestMemory(vm_memory::GuestMemoryError),
112 ReadRegs(base::Error),
113 RegisterIrqfd(base::Error),
114 RegisterVsock(arch::DeviceRegistrationError),
115 SetHwBreakpoint(base::Error),
116 SetLint(interrupts::Error),
117 SetTssAddr(base::Error),
118 SetupCpuid(cpuid::Error),
119 SetupFpu(regs::Error),
120 SetupGuestMemory(GuestMemoryError),
121 SetupMptable(mptable::Error),
122 SetupMsrs(regs::Error),
123 SetupRegs(regs::Error),
124 SetupSmbios(smbios::Error),
125 SetupSregs(regs::Error),
126 TranslatingVirtAddr,
127 UnsupportedProtectionType,
128 WriteRegs(base::Error),
129 WritingGuestMemory(GuestMemoryError),
130 ZeroPagePastRamEnd,
131 ZeroPageSetup,
132 }
133
134 impl Display for Error {
135 #[remain::check]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result136 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
137 use self::Error::*;
138
139 #[sorted]
140 match self {
141 AllocateIOResouce(e) => write!(f, "error allocating IO resource: {}", e),
142 AllocateIrq => write!(f, "error allocating a single irq"),
143 CloneEvent(e) => write!(f, "unable to clone an Event: {}", e),
144 Cmdline(e) => write!(f, "the given kernel command line was invalid: {}", e),
145 ConfigureSystem => write!(f, "error configuring the system"),
146 CreateBatDevices(e) => write!(f, "unable to create battery devices: {}", e),
147 CreateDevices(e) => write!(f, "error creating devices: {}", e),
148 CreateEvent(e) => write!(f, "unable to make an Event: {}", e),
149 CreateFdt(e) => write!(f, "failed to create fdt: {}", e),
150 CreateIoapicDevice(e) => write!(f, "failed to create IOAPIC device: {}", e),
151 CreateIrqChip(e) => write!(f, "failed to create IRQ chip: {}", e),
152 CreatePciRoot(e) => write!(f, "failed to create a PCI root hub: {}", e),
153 CreatePit(e) => write!(f, "unable to create PIT: {}", e),
154 CreatePitDevice(e) => write!(f, "unable to make PIT device: {}", e),
155 CreateSerialDevices(e) => write!(f, "unable to create serial devices: {}", e),
156 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
157 CreateVcpu(e) => write!(f, "failed to create VCPU: {}", e),
158 CreateVm(e) => write!(f, "failed to create VM: {}", e),
159 E820Configuration => write!(f, "invalid e820 setup params"),
160 EnableSinglestep(e) => write!(f, "failed to enable singlestep execution: {}", e),
161 EnableSplitIrqchip(e) => write!(f, "failed to enable split irqchip: {}", e),
162 GetSerialCmdline(e) => write!(f, "failed to get serial cmdline: {}", e),
163 KernelOffsetPastEnd => write!(f, "the kernel extends past the end of RAM"),
164 LoadBios(e) => write!(f, "error loading bios: {}", e),
165 LoadBzImage(e) => write!(f, "error loading kernel bzImage: {}", e),
166 LoadCmdline(e) => write!(f, "error loading command line: {}", e),
167 LoadInitrd(e) => write!(f, "error loading initrd: {}", e),
168 LoadKernel(e) => write!(f, "error loading Kernel: {}", e),
169 PageNotPresent => write!(f, "error translating address: Page not present"),
170 Pstore(e) => write!(f, "failed to allocate pstore region: {}", e),
171 ReadingGuestMemory(e) => write!(f, "error reading guest memory {}", e),
172 ReadRegs(e) => write!(f, "error reading CPU registers {}", e),
173 RegisterIrqfd(e) => write!(f, "error registering an IrqFd: {}", e),
174 RegisterVsock(e) => write!(f, "error registering virtual socket device: {}", e),
175 SetHwBreakpoint(e) => write!(f, "failed to set a hardware breakpoint: {}", e),
176 SetLint(e) => write!(f, "failed to set interrupts: {}", e),
177 SetTssAddr(e) => write!(f, "failed to set tss addr: {}", e),
178 SetupCpuid(e) => write!(f, "failed to set up cpuid: {}", e),
179 SetupFpu(e) => write!(f, "failed to set up FPU: {}", e),
180 SetupGuestMemory(e) => write!(f, "failed to set up guest memory: {}", e),
181 SetupMptable(e) => write!(f, "failed to set up mptable: {}", e),
182 SetupMsrs(e) => write!(f, "failed to set up MSRs: {}", e),
183 SetupRegs(e) => write!(f, "failed to set up registers: {}", e),
184 SetupSmbios(e) => write!(f, "failed to set up SMBIOS: {}", e),
185 SetupSregs(e) => write!(f, "failed to set up sregs: {}", e),
186 TranslatingVirtAddr => write!(f, "failed to translate virtual address"),
187 UnsupportedProtectionType => write!(f, "protected VMs not supported on x86_64"),
188 WriteRegs(e) => write!(f, "error writing CPU registers {}", e),
189 WritingGuestMemory(e) => write!(f, "error writing guest memory {}", e),
190 ZeroPagePastRamEnd => write!(f, "the zero page extends past the end of guest_mem"),
191 ZeroPageSetup => write!(f, "error writing the zero page of guest memory"),
192 }
193 }
194 }
195
196 pub type Result<T> = std::result::Result<T, Error>;
197
198 impl std::error::Error for Error {}
199
200 pub struct X8664arch;
201
202 const BOOT_STACK_POINTER: u64 = 0x8000;
203 // Make sure it align to 256MB for MTRR convenient
204 const MEM_32BIT_GAP_SIZE: u64 = 768 << 20;
205 const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32;
206 const END_ADDR_BEFORE_32BITS: u64 = FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE;
207 const MMIO_SIZE: u64 = MEM_32BIT_GAP_SIZE - 0x8000000;
208 const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
209 const ZERO_PAGE_OFFSET: u64 = 0x7000;
210 const TSS_ADDR: u64 = 0xfffbd000;
211
212 const KERNEL_START_OFFSET: u64 = 0x200000;
213 const CMDLINE_OFFSET: u64 = 0x20000;
214 const CMDLINE_MAX_SIZE: u64 = KERNEL_START_OFFSET - CMDLINE_OFFSET;
215 const X86_64_SERIAL_1_3_IRQ: u32 = 4;
216 const X86_64_SERIAL_2_4_IRQ: u32 = 3;
217 // X86_64_SCI_IRQ is used to fill the ACPI FACP table.
218 // The sci_irq number is better to be a legacy
219 // IRQ number which is less than 16(actually most of the
220 // platforms have fixed IRQ number 9). So we can
221 // reserve the IRQ number 5 for SCI and let the
222 // the other devices starts from next.
223 pub const X86_64_SCI_IRQ: u32 = 5;
224 // The CMOS RTC uses IRQ 8; start allocating IRQs at 9.
225 pub const X86_64_IRQ_BASE: u32 = 9;
226 const ACPI_HI_RSDP_WINDOW_BASE: u64 = 0x000E0000;
227
228 /// The x86 reset vector for i386+ and x86_64 puts the processor into an "unreal mode" where it
229 /// can access the last 1 MB of the 32-bit address space in 16-bit mode, and starts the instruction
230 /// pointer at the effective physical address 0xFFFFFFF0.
bios_start(bios_size: u64) -> GuestAddress231 fn bios_start(bios_size: u64) -> GuestAddress {
232 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_size)
233 }
234
configure_system( guest_mem: &GuestMemory, _mem_size: u64, kernel_addr: GuestAddress, cmdline_addr: GuestAddress, cmdline_size: usize, setup_data: Option<GuestAddress>, initrd: Option<(GuestAddress, usize)>, mut params: boot_params, ) -> Result<()>235 fn configure_system(
236 guest_mem: &GuestMemory,
237 _mem_size: u64,
238 kernel_addr: GuestAddress,
239 cmdline_addr: GuestAddress,
240 cmdline_size: usize,
241 setup_data: Option<GuestAddress>,
242 initrd: Option<(GuestAddress, usize)>,
243 mut params: boot_params,
244 ) -> Result<()> {
245 const EBDA_START: u64 = 0x0009fc00;
246 const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
247 const KERNEL_HDR_MAGIC: u32 = 0x53726448;
248 const KERNEL_LOADER_OTHER: u8 = 0xff;
249 const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x1000000; // Must be non-zero.
250 let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS);
251 let end_32bit_gap_start = GuestAddress(END_ADDR_BEFORE_32BITS);
252
253 params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
254 params.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
255 params.hdr.header = KERNEL_HDR_MAGIC;
256 params.hdr.cmd_line_ptr = cmdline_addr.offset() as u32;
257 params.hdr.cmdline_size = cmdline_size as u32;
258 params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
259 if let Some(setup_data) = setup_data {
260 params.hdr.setup_data = setup_data.offset();
261 }
262 if let Some((initrd_addr, initrd_size)) = initrd {
263 params.hdr.ramdisk_image = initrd_addr.offset() as u32;
264 params.hdr.ramdisk_size = initrd_size as u32;
265 }
266
267 add_e820_entry(&mut params, 0, EBDA_START, E820_RAM)?;
268
269 let mem_end = guest_mem.end_addr();
270 if mem_end < end_32bit_gap_start {
271 add_e820_entry(
272 &mut params,
273 kernel_addr.offset() as u64,
274 mem_end.offset_from(kernel_addr) as u64,
275 E820_RAM,
276 )?;
277 } else {
278 add_e820_entry(
279 &mut params,
280 kernel_addr.offset() as u64,
281 end_32bit_gap_start.offset_from(kernel_addr) as u64,
282 E820_RAM,
283 )?;
284 if mem_end > first_addr_past_32bits {
285 add_e820_entry(
286 &mut params,
287 first_addr_past_32bits.offset() as u64,
288 mem_end.offset_from(first_addr_past_32bits) as u64,
289 E820_RAM,
290 )?;
291 }
292 }
293
294 let zero_page_addr = GuestAddress(ZERO_PAGE_OFFSET);
295 guest_mem
296 .checked_offset(zero_page_addr, mem::size_of::<boot_params>() as u64)
297 .ok_or(Error::ZeroPagePastRamEnd)?;
298 guest_mem
299 .write_obj_at_addr(params, zero_page_addr)
300 .map_err(|_| Error::ZeroPageSetup)?;
301
302 Ok(())
303 }
304
305 /// Add an e820 region to the e820 map.
306 /// Returns Ok(()) if successful, or an error if there is no space left in the map.
add_e820_entry(params: &mut boot_params, addr: u64, size: u64, mem_type: u32) -> Result<()>307 fn add_e820_entry(params: &mut boot_params, addr: u64, size: u64, mem_type: u32) -> Result<()> {
308 if params.e820_entries >= params.e820_table.len() as u8 {
309 return Err(Error::E820Configuration);
310 }
311
312 params.e820_table[params.e820_entries as usize].addr = addr;
313 params.e820_table[params.e820_entries as usize].size = size;
314 params.e820_table[params.e820_entries as usize].type_ = mem_type;
315 params.e820_entries += 1;
316
317 Ok(())
318 }
319
320 /// Returns a Vec of the valid memory addresses.
321 /// These should be used to configure the GuestMemory structure for the platform.
322 /// For x86_64 all addresses are valid from the start of the kernel except a
323 /// carve out at the end of 32bit address space.
arch_memory_regions(size: u64, bios_size: Option<u64>) -> Vec<(GuestAddress, u64)>324 fn arch_memory_regions(size: u64, bios_size: Option<u64>) -> Vec<(GuestAddress, u64)> {
325 let mem_end = GuestAddress(size);
326 let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS);
327 let end_32bit_gap_start = GuestAddress(END_ADDR_BEFORE_32BITS);
328
329 let mut regions = Vec::new();
330 if mem_end <= end_32bit_gap_start {
331 regions.push((GuestAddress(0), size));
332 if let Some(bios_size) = bios_size {
333 regions.push((bios_start(bios_size), bios_size));
334 }
335 } else {
336 regions.push((GuestAddress(0), end_32bit_gap_start.offset()));
337 if let Some(bios_size) = bios_size {
338 regions.push((bios_start(bios_size), bios_size));
339 }
340 regions.push((
341 first_addr_past_32bits,
342 mem_end.offset_from(end_32bit_gap_start),
343 ));
344 }
345
346 regions
347 }
348
349 impl arch::LinuxArch for X8664arch {
350 type Error = Error;
351
guest_memory_layout( components: &VmComponents, ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error>352 fn guest_memory_layout(
353 components: &VmComponents,
354 ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error> {
355 let bios_size = match &components.vm_image {
356 VmImage::Bios(bios_file) => Some(bios_file.metadata().map_err(Error::LoadBios)?.len()),
357 VmImage::Kernel(_) => None,
358 };
359 Ok(arch_memory_regions(components.memory_size, bios_size))
360 }
361
build_vm<V, Vcpu, I, FD, FI, E1, E2>( mut components: VmComponents, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (&Option<BatteryType>, Option<Minijail>), mut vm: V, create_devices: FD, create_irq_chip: FI, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu, I>, Self::Error> where V: VmX86_64, Vcpu: VcpuX86_64, I: IrqChipX86_64, FD: FnOnce( &GuestMemory, &mut V, &mut SystemAllocator, &Event, ) -> std::result::Result<Vec<(Box<dyn PciDevice>, Option<Minijail>)>, E1>, FI: FnOnce(&V, usize) -> std::result::Result<I, E2>, E1: StdError + 'static, E2: StdError + 'static,362 fn build_vm<V, Vcpu, I, FD, FI, E1, E2>(
363 mut components: VmComponents,
364 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
365 serial_jail: Option<Minijail>,
366 battery: (&Option<BatteryType>, Option<Minijail>),
367 mut vm: V,
368 create_devices: FD,
369 create_irq_chip: FI,
370 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu, I>, Self::Error>
371 where
372 V: VmX86_64,
373 Vcpu: VcpuX86_64,
374 I: IrqChipX86_64,
375 FD: FnOnce(
376 &GuestMemory,
377 &mut V,
378 &mut SystemAllocator,
379 &Event,
380 ) -> std::result::Result<Vec<(Box<dyn PciDevice>, Option<Minijail>)>, E1>,
381 FI: FnOnce(&V, /* vcpu_count: */ usize) -> std::result::Result<I, E2>,
382 E1: StdError + 'static,
383 E2: StdError + 'static,
384 {
385 if components.protected_vm != ProtectionType::Unprotected {
386 return Err(Error::UnsupportedProtectionType);
387 }
388
389 let mem = vm.get_memory().clone();
390 let mut resources = Self::get_resource_allocator(&mem);
391
392 let vcpu_count = components.vcpu_count;
393 let mut irq_chip =
394 create_irq_chip(&vm, vcpu_count).map_err(|e| Error::CreateIrqChip(Box::new(e)))?;
395
396 let tss_addr = GuestAddress(TSS_ADDR);
397 vm.set_tss_addr(tss_addr).map_err(Error::SetTssAddr)?;
398
399 let mut mmio_bus = devices::Bus::new();
400
401 let exit_evt = Event::new().map_err(Error::CreateEvent)?;
402
403 let pci_devices = create_devices(&mem, &mut vm, &mut resources, &exit_evt)
404 .map_err(|e| Error::CreateDevices(Box::new(e)))?;
405 let (pci, pci_irqs, pid_debug_label_map) = arch::generate_pci_root(
406 pci_devices,
407 &mut irq_chip,
408 &mut mmio_bus,
409 &mut resources,
410 &mut vm,
411 4, // Share the four pin interrupts (INTx#)
412 )
413 .map_err(Error::CreatePciRoot)?;
414 let pci_bus = Arc::new(Mutex::new(PciConfigIo::new(pci)));
415
416 // Event used to notify crosvm that guest OS is trying to suspend.
417 let suspend_evt = Event::new().map_err(Error::CreateEvent)?;
418
419 let mut io_bus = Self::setup_io_bus(
420 irq_chip.pit_uses_speaker_port(),
421 exit_evt.try_clone().map_err(Error::CloneEvent)?,
422 Some(pci_bus),
423 components.memory_size,
424 )?;
425
426 Self::setup_serial_devices(
427 components.protected_vm,
428 &mut irq_chip,
429 &mut io_bus,
430 serial_parameters,
431 serial_jail,
432 )?;
433
434 let (acpi_dev_resource, bat_control) = Self::setup_acpi_devices(
435 &mut io_bus,
436 &mut resources,
437 suspend_evt.try_clone().map_err(Error::CloneEvent)?,
438 exit_evt.try_clone().map_err(Error::CloneEvent)?,
439 components.acpi_sdts,
440 &mut irq_chip,
441 battery,
442 &mut mmio_bus,
443 )?;
444
445 let ramoops_region = match components.pstore {
446 Some(pstore) => Some(
447 arch::pstore::create_memory_region(&mut vm, &mut resources, &pstore)
448 .map_err(Error::Pstore)?,
449 ),
450 None => None,
451 };
452
453 irq_chip
454 .finalize_devices(&mut resources, &mut io_bus, &mut mmio_bus)
455 .map_err(Error::RegisterIrqfd)?;
456
457 // All of these bios generated tables are set manually for the benefit of the kernel boot
458 // flow (since there's no BIOS to set it) and for the BIOS boot flow since crosvm doesn't
459 // have a way to pass the BIOS these configs.
460 // This works right now because the only guest BIOS used with crosvm (u-boot) ignores these
461 // tables and the guest OS picks them up.
462 // If another guest does need a way to pass these tables down to it's BIOS, this approach
463 // should be rethought.
464
465 // Note that this puts the mptable at 0x9FC00 in guest physical memory.
466 mptable::setup_mptable(&mem, vcpu_count as u8, pci_irqs).map_err(Error::SetupMptable)?;
467 smbios::setup_smbios(&mem, components.dmi_path).map_err(Error::SetupSmbios)?;
468
469 // TODO (tjeznach) Write RSDP to bootconfig before writing to memory
470 acpi::create_acpi_tables(&mem, vcpu_count as u8, X86_64_SCI_IRQ, acpi_dev_resource);
471
472 match components.vm_image {
473 VmImage::Bios(ref mut bios) => Self::load_bios(&mem, bios)?,
474 VmImage::Kernel(ref mut kernel_image) => {
475 let mut cmdline = Self::get_base_linux_cmdline();
476
477 get_serial_cmdline(&mut cmdline, serial_parameters, "io")
478 .map_err(Error::GetSerialCmdline)?;
479
480 for param in components.extra_kernel_params {
481 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
482 }
483 // It seems that default record_size is only 4096 byte even if crosvm allocates
484 // more memory. It means that one crash can only 4096 byte.
485 // Set record_size and console_size to 1/4 of allocated memory size.
486 // This configulation is same as the host.
487 if let Some(ramoops_region) = ramoops_region {
488 let ramoops_opts = [
489 ("mem_address", ramoops_region.address),
490 ("mem_size", ramoops_region.size as u64),
491 ("console_size", (ramoops_region.size / 4) as u64),
492 ("record_size", (ramoops_region.size / 4) as u64),
493 ("dump_oops", 1_u64),
494 ];
495 for (name, val) in &ramoops_opts {
496 cmdline
497 .insert_str(format!("ramoops.{}={:#x}", name, val))
498 .map_err(Error::Cmdline)?;
499 }
500 }
501
502 // separate out load_kernel from other setup to get a specific error for
503 // kernel loading
504 let (params, kernel_end) = Self::load_kernel(&mem, kernel_image)?;
505
506 Self::setup_system_memory(
507 &mem,
508 components.memory_size,
509 &CString::new(cmdline).unwrap(),
510 components.initrd_image,
511 components.android_fstab,
512 kernel_end,
513 params,
514 )?;
515 }
516 }
517
518 Ok(RunnableLinuxVm {
519 vm,
520 resources,
521 exit_evt,
522 vcpu_count,
523 vcpus: None,
524 vcpu_affinity: components.vcpu_affinity,
525 no_smt: components.no_smt,
526 irq_chip,
527 has_bios: matches!(components.vm_image, VmImage::Bios(_)),
528 io_bus,
529 mmio_bus,
530 pid_debug_label_map,
531 suspend_evt,
532 rt_cpus: components.rt_cpus,
533 bat_control,
534 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
535 gdb: components.gdb,
536 })
537 }
538
configure_vcpu( guest_mem: &GuestMemory, hypervisor: &dyn HypervisorX86_64, irq_chip: &mut dyn IrqChipX86_64, vcpu: &mut dyn VcpuX86_64, vcpu_id: usize, num_cpus: usize, has_bios: bool, no_smt: bool, ) -> Result<()>539 fn configure_vcpu(
540 guest_mem: &GuestMemory,
541 hypervisor: &dyn HypervisorX86_64,
542 irq_chip: &mut dyn IrqChipX86_64,
543 vcpu: &mut dyn VcpuX86_64,
544 vcpu_id: usize,
545 num_cpus: usize,
546 has_bios: bool,
547 no_smt: bool,
548 ) -> Result<()> {
549 cpuid::setup_cpuid(hypervisor, irq_chip, vcpu, vcpu_id, num_cpus, no_smt)
550 .map_err(Error::SetupCpuid)?;
551
552 if has_bios {
553 return Ok(());
554 }
555
556 let kernel_load_addr = GuestAddress(KERNEL_START_OFFSET);
557 regs::setup_msrs(vcpu, END_ADDR_BEFORE_32BITS).map_err(Error::SetupMsrs)?;
558 let kernel_end = guest_mem
559 .checked_offset(kernel_load_addr, KERNEL_64BIT_ENTRY_OFFSET)
560 .ok_or(Error::KernelOffsetPastEnd)?;
561 regs::setup_regs(
562 vcpu,
563 (kernel_end).offset() as u64,
564 BOOT_STACK_POINTER as u64,
565 ZERO_PAGE_OFFSET as u64,
566 )
567 .map_err(Error::SetupRegs)?;
568 regs::setup_fpu(vcpu).map_err(Error::SetupFpu)?;
569 regs::setup_sregs(guest_mem, vcpu).map_err(Error::SetupSregs)?;
570 interrupts::set_lint(vcpu_id, irq_chip).map_err(Error::SetLint)?;
571
572 Ok(())
573 }
574
575 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_read_registers<T: VcpuX86_64>(vcpu: &T) -> Result<X86_64CoreRegs>576 fn debug_read_registers<T: VcpuX86_64>(vcpu: &T) -> Result<X86_64CoreRegs> {
577 // General registers: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15
578 let gregs = vcpu.get_regs().map_err(Error::ReadRegs)?;
579 let regs = [
580 gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rbp, gregs.rsp,
581 gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, gregs.r15,
582 ];
583
584 // GDB exposes 32-bit eflags instead of 64-bit rflags.
585 // https://github.com/bminor/binutils-gdb/blob/master/gdb/features/i386/64bit-core.xml
586 let eflags = gregs.rflags as u32;
587 let rip = gregs.rip;
588
589 // Segment registers: CS, SS, DS, ES, FS, GS
590 let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
591 let sgs = [sregs.cs, sregs.ss, sregs.ds, sregs.es, sregs.fs, sregs.gs];
592 let mut segments = [0u32; 6];
593 // GDB uses only the selectors.
594 for i in 0..sgs.len() {
595 segments[i] = sgs[i].selector as u32;
596 }
597
598 // TODO(keiichiw): Other registers such as FPU, xmm and mxcsr.
599
600 Ok(X86_64CoreRegs {
601 regs,
602 eflags,
603 rip,
604 segments,
605 ..Default::default()
606 })
607 }
608
609 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_write_registers<T: VcpuX86_64>(vcpu: &T, regs: &X86_64CoreRegs) -> Result<()>610 fn debug_write_registers<T: VcpuX86_64>(vcpu: &T, regs: &X86_64CoreRegs) -> Result<()> {
611 // General purpose registers (RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15) + RIP + rflags
612 let orig_gregs = vcpu.get_regs().map_err(Error::ReadRegs)?;
613 let gregs = Regs {
614 rax: regs.regs[0],
615 rbx: regs.regs[1],
616 rcx: regs.regs[2],
617 rdx: regs.regs[3],
618 rsi: regs.regs[4],
619 rdi: regs.regs[5],
620 rbp: regs.regs[6],
621 rsp: regs.regs[7],
622 r8: regs.regs[8],
623 r9: regs.regs[9],
624 r10: regs.regs[10],
625 r11: regs.regs[11],
626 r12: regs.regs[12],
627 r13: regs.regs[13],
628 r14: regs.regs[14],
629 r15: regs.regs[15],
630 rip: regs.rip,
631 // Update the lower 32 bits of rflags.
632 rflags: (orig_gregs.rflags & !(u32::MAX as u64)) | (regs.eflags as u64),
633 };
634 vcpu.set_regs(&gregs).map_err(Error::WriteRegs)?;
635
636 // Segment registers: CS, SS, DS, ES, FS, GS
637 // Since GDB care only selectors, we call get_sregs() first.
638 let mut sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
639 sregs.cs.selector = regs.segments[0] as u16;
640 sregs.ss.selector = regs.segments[1] as u16;
641 sregs.ds.selector = regs.segments[2] as u16;
642 sregs.es.selector = regs.segments[3] as u16;
643 sregs.fs.selector = regs.segments[4] as u16;
644 sregs.gs.selector = regs.segments[5] as u16;
645
646 vcpu.set_sregs(&sregs).map_err(Error::WriteRegs)?;
647
648 // TODO(keiichiw): Other registers such as FPU, xmm and mxcsr.
649
650 Ok(())
651 }
652
653 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_read_memory<T: VcpuX86_64>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>654 fn debug_read_memory<T: VcpuX86_64>(
655 vcpu: &T,
656 guest_mem: &GuestMemory,
657 vaddr: GuestAddress,
658 len: usize,
659 ) -> Result<Vec<u8>> {
660 let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
661 let mut buf = vec![0; len];
662 let mut total_read = 0u64;
663 // Handle reads across page boundaries.
664
665 while total_read < len as u64 {
666 let (paddr, psize) = phys_addr(guest_mem, vaddr.0 + total_read, &sregs)?;
667 let read_len = std::cmp::min(len as u64 - total_read, psize - (paddr & (psize - 1)));
668 guest_mem
669 .get_slice_at_addr(GuestAddress(paddr), read_len as usize)
670 .map_err(Error::ReadingGuestMemory)?
671 .copy_to(&mut buf[total_read as usize..]);
672 total_read += read_len;
673 }
674 Ok(buf)
675 }
676
677 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_write_memory<T: VcpuX86_64>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>678 fn debug_write_memory<T: VcpuX86_64>(
679 vcpu: &T,
680 guest_mem: &GuestMemory,
681 vaddr: GuestAddress,
682 buf: &[u8],
683 ) -> Result<()> {
684 let sregs = vcpu.get_sregs().map_err(Error::ReadRegs)?;
685 let mut total_written = 0u64;
686 // Handle writes across page boundaries.
687 while total_written < buf.len() as u64 {
688 let (paddr, psize) = phys_addr(guest_mem, vaddr.0 + total_written, &sregs)?;
689 let write_len = std::cmp::min(
690 buf.len() as u64 - total_written,
691 psize - (paddr & (psize - 1)),
692 );
693
694 guest_mem
695 .write_all_at_addr(
696 &buf[total_written as usize..(total_written as usize + write_len as usize)],
697 GuestAddress(paddr),
698 )
699 .map_err(Error::WritingGuestMemory)?;
700 total_written += write_len;
701 }
702 Ok(())
703 }
704
705 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_enable_singlestep<T: VcpuX86_64>(vcpu: &T) -> Result<()>706 fn debug_enable_singlestep<T: VcpuX86_64>(vcpu: &T) -> Result<()> {
707 vcpu.set_guest_debug(&[], true /* enable_singlestep */)
708 .map_err(Error::EnableSinglestep)
709 }
710
711 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
debug_set_hw_breakpoints<T: VcpuX86_64>( vcpu: &T, breakpoints: &[GuestAddress], ) -> Result<()>712 fn debug_set_hw_breakpoints<T: VcpuX86_64>(
713 vcpu: &T,
714 breakpoints: &[GuestAddress],
715 ) -> Result<()> {
716 vcpu.set_guest_debug(&breakpoints, false /* enable_singlestep */)
717 .map_err(Error::SetHwBreakpoint)
718 }
719 }
720
721 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
722 // return the translated address and the size of the page it resides in.
phys_addr(mem: &GuestMemory, vaddr: u64, sregs: &Sregs) -> Result<(u64, u64)>723 fn phys_addr(mem: &GuestMemory, vaddr: u64, sregs: &Sregs) -> Result<(u64, u64)> {
724 const CR0_PG_MASK: u64 = 1 << 31;
725 const CR4_PAE_MASK: u64 = 1 << 5;
726 const CR4_LA57_MASK: u64 = 1 << 12;
727 const MSR_EFER_LMA: u64 = 1 << 10;
728 // bits 12 through 51 are the address in a PTE.
729 const PTE_ADDR_MASK: u64 = ((1 << 52) - 1) & !0x0fff;
730 const PAGE_PRESENT: u64 = 0x1;
731 const PAGE_PSE_MASK: u64 = 0x1 << 7;
732
733 const PAGE_SIZE_4K: u64 = 4 * 1024;
734 const PAGE_SIZE_2M: u64 = 2 * 1024 * 1024;
735 const PAGE_SIZE_1G: u64 = 1024 * 1024 * 1024;
736
737 fn next_pte(mem: &GuestMemory, curr_table_addr: u64, vaddr: u64, level: usize) -> Result<u64> {
738 let ent: u64 = mem
739 .read_obj_from_addr(GuestAddress(
740 (curr_table_addr & PTE_ADDR_MASK) + page_table_offset(vaddr, level),
741 ))
742 .map_err(|_| Error::TranslatingVirtAddr)?;
743 /* TODO - convert to a trace
744 println!(
745 "level {} vaddr {:x} table-addr {:x} mask {:x} ent {:x} offset {:x}",
746 level,
747 vaddr,
748 curr_table_addr,
749 PTE_ADDR_MASK,
750 ent,
751 page_table_offset(vaddr, level)
752 );
753 */
754 if ent & PAGE_PRESENT == 0 {
755 return Err(Error::PageNotPresent);
756 }
757 Ok(ent)
758 }
759
760 // Get the offset in to the page of `vaddr`.
761 fn page_offset(vaddr: u64, page_size: u64) -> u64 {
762 vaddr & (page_size - 1)
763 }
764
765 // Get the offset in to the page table of the given `level` specified by the virtual `address`.
766 // `level` is 1 through 5 in x86_64 to handle the five levels of paging.
767 fn page_table_offset(addr: u64, level: usize) -> u64 {
768 let offset = (level - 1) * 9 + 12;
769 ((addr >> offset) & 0x1ff) << 3
770 }
771
772 if sregs.cr0 & CR0_PG_MASK == 0 {
773 return Ok((vaddr, PAGE_SIZE_4K));
774 }
775
776 if sregs.cr4 & CR4_PAE_MASK == 0 {
777 return Err(Error::TranslatingVirtAddr);
778 }
779
780 if sregs.efer & MSR_EFER_LMA != 0 {
781 // TODO - check LA57
782 if sregs.cr4 & CR4_LA57_MASK != 0 {}
783 let p4_ent = next_pte(mem, sregs.cr3, vaddr, 4)?;
784 let p3_ent = next_pte(mem, p4_ent, vaddr, 3)?;
785 // TODO check if it's a 1G page with the PSE bit in p2_ent
786 if p3_ent & PAGE_PSE_MASK != 0 {
787 // It's a 1G page with the PSE bit in p3_ent
788 let paddr = p3_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_1G);
789 return Ok((paddr, PAGE_SIZE_1G));
790 }
791 let p2_ent = next_pte(mem, p3_ent, vaddr, 2)?;
792 if p2_ent & PAGE_PSE_MASK != 0 {
793 // It's a 2M page with the PSE bit in p2_ent
794 let paddr = p2_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_2M);
795 return Ok((paddr, PAGE_SIZE_2M));
796 }
797 let p1_ent = next_pte(mem, p2_ent, vaddr, 1)?;
798 let paddr = p1_ent & PTE_ADDR_MASK | page_offset(vaddr, PAGE_SIZE_4K);
799 return Ok((paddr, PAGE_SIZE_4K));
800 }
801 Err(Error::TranslatingVirtAddr)
802 }
803
804 impl X8664arch {
805 /// Loads the bios from an open file.
806 ///
807 /// # Arguments
808 ///
809 /// * `mem` - The memory to be used by the guest.
810 /// * `bios_image` - the File object for the specified bios
load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()>811 fn load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()> {
812 let bios_image_length = bios_image
813 .seek(io::SeekFrom::End(0))
814 .map_err(Error::LoadBios)?;
815 if bios_image_length >= FIRST_ADDR_PAST_32BITS {
816 return Err(Error::LoadBios(io::Error::new(
817 io::ErrorKind::InvalidData,
818 format!(
819 "bios was {} bytes, expected less than {}",
820 bios_image_length, FIRST_ADDR_PAST_32BITS,
821 ),
822 )));
823 }
824 bios_image
825 .seek(io::SeekFrom::Start(0))
826 .map_err(Error::LoadBios)?;
827 mem.read_to_memory(
828 bios_start(bios_image_length),
829 bios_image,
830 bios_image_length as usize,
831 )
832 .map_err(Error::SetupGuestMemory)?;
833 Ok(())
834 }
835
836 /// Loads the kernel from an open file.
837 ///
838 /// # Arguments
839 ///
840 /// * `mem` - The memory to be used by the guest.
841 /// * `kernel_image` - the File object for the specified kernel.
load_kernel(mem: &GuestMemory, kernel_image: &mut File) -> Result<(boot_params, u64)>842 fn load_kernel(mem: &GuestMemory, kernel_image: &mut File) -> Result<(boot_params, u64)> {
843 let elf_result =
844 kernel_loader::load_kernel(mem, GuestAddress(KERNEL_START_OFFSET), kernel_image);
845 if elf_result == Err(kernel_loader::Error::InvalidElfMagicNumber) {
846 bzimage::load_bzimage(mem, GuestAddress(KERNEL_START_OFFSET), kernel_image)
847 .map_err(Error::LoadBzImage)
848 } else {
849 let kernel_end = elf_result.map_err(Error::LoadKernel)?;
850 Ok((Default::default(), kernel_end))
851 }
852 }
853
854 /// Configures the system memory space should be called once per vm before
855 /// starting vcpu threads.
856 ///
857 /// # Arguments
858 ///
859 /// * `mem` - The memory to be used by the guest.
860 /// * `cmdline` - the kernel commandline
861 /// * `initrd_file` - an initial ramdisk image
setup_system_memory( mem: &GuestMemory, mem_size: u64, cmdline: &CStr, initrd_file: Option<File>, android_fstab: Option<File>, kernel_end: u64, params: boot_params, ) -> Result<()>862 fn setup_system_memory(
863 mem: &GuestMemory,
864 mem_size: u64,
865 cmdline: &CStr,
866 initrd_file: Option<File>,
867 android_fstab: Option<File>,
868 kernel_end: u64,
869 params: boot_params,
870 ) -> Result<()> {
871 kernel_loader::load_cmdline(mem, GuestAddress(CMDLINE_OFFSET), cmdline)
872 .map_err(Error::LoadCmdline)?;
873
874 // Track the first free address after the kernel - this is where extra
875 // data like the device tree blob and initrd will be loaded.
876 let mut free_addr = kernel_end;
877
878 let setup_data = if let Some(android_fstab) = android_fstab {
879 let free_addr_aligned = (((free_addr + 64 - 1) / 64) * 64) + 64;
880 let dtb_start = GuestAddress(free_addr_aligned);
881 let dtb_size = fdt::create_fdt(
882 X86_64_FDT_MAX_SIZE as usize,
883 mem,
884 dtb_start.offset(),
885 android_fstab,
886 )
887 .map_err(Error::CreateFdt)?;
888 free_addr = dtb_start.offset() + dtb_size as u64;
889 Some(dtb_start)
890 } else {
891 None
892 };
893
894 let initrd = match initrd_file {
895 Some(mut initrd_file) => {
896 let mut initrd_addr_max = u64::from(params.hdr.initrd_addr_max);
897 // Default initrd_addr_max for old kernels (see Documentation/x86/boot.txt).
898 if initrd_addr_max == 0 {
899 initrd_addr_max = 0x37FFFFFF;
900 }
901
902 let mem_max = mem.end_addr().offset() - 1;
903 if initrd_addr_max > mem_max {
904 initrd_addr_max = mem_max;
905 }
906
907 let (initrd_start, initrd_size) = arch::load_image_high(
908 mem,
909 &mut initrd_file,
910 GuestAddress(free_addr),
911 GuestAddress(initrd_addr_max),
912 base::pagesize() as u64,
913 )
914 .map_err(Error::LoadInitrd)?;
915 Some((initrd_start, initrd_size))
916 }
917 None => None,
918 };
919
920 configure_system(
921 mem,
922 mem_size,
923 GuestAddress(KERNEL_START_OFFSET),
924 GuestAddress(CMDLINE_OFFSET),
925 cmdline.to_bytes().len() + 1,
926 setup_data,
927 initrd,
928 params,
929 )?;
930 Ok(())
931 }
932
933 /// This returns the start address of high mmio
934 ///
935 /// # Arguments
936 ///
937 /// * mem: The memory to be used by the guest
get_high_mmio_base(mem: &GuestMemory) -> u64938 fn get_high_mmio_base(mem: &GuestMemory) -> u64 {
939 // Put device memory at a 2MB boundary after physical memory or 4gb, whichever is greater.
940 const MB: u64 = 1 << 20;
941 const GB: u64 = 1 << 30;
942 let ram_end_round_2mb = (mem.end_addr().offset() + 2 * MB - 1) / (2 * MB) * (2 * MB);
943 std::cmp::max(ram_end_round_2mb, 4 * GB)
944 }
945
946 /// This returns a minimal kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline947 fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
948 let mut cmdline = kernel_cmdline::Cmdline::new(CMDLINE_MAX_SIZE as usize);
949 cmdline.insert_str("pci=noacpi reboot=k panic=-1").unwrap();
950
951 cmdline
952 }
953
954 /// Returns a system resource allocator.
get_resource_allocator(mem: &GuestMemory) -> SystemAllocator955 fn get_resource_allocator(mem: &GuestMemory) -> SystemAllocator {
956 let high_mmio_start = Self::get_high_mmio_base(mem);
957 SystemAllocator::builder()
958 .add_io_addresses(0xc000, 0x10000)
959 .add_low_mmio_addresses(END_ADDR_BEFORE_32BITS, MMIO_SIZE)
960 .add_high_mmio_addresses(high_mmio_start, u64::max_value() - high_mmio_start)
961 .create_allocator(X86_64_IRQ_BASE)
962 .unwrap()
963 }
964
965 /// Sets up the IO bus for this platform
966 ///
967 /// # Arguments
968 ///
969 /// * - `pit_uses_speaker_port` - does the PIT use port 0x61 for the PC speaker
970 /// * - `exit_evt` - the event object which should receive exit events
971 /// * - `mem_size` - the size in bytes of physical ram for the guest
setup_io_bus( pit_uses_speaker_port: bool, exit_evt: Event, pci: Option<Arc<Mutex<devices::PciConfigIo>>>, mem_size: u64, ) -> Result<devices::Bus>972 fn setup_io_bus(
973 pit_uses_speaker_port: bool,
974 exit_evt: Event,
975 pci: Option<Arc<Mutex<devices::PciConfigIo>>>,
976 mem_size: u64,
977 ) -> Result<devices::Bus> {
978 struct NoDevice;
979 impl devices::BusDevice for NoDevice {
980 fn debug_label(&self) -> String {
981 "no device".to_owned()
982 }
983 }
984
985 let mut io_bus = devices::Bus::new();
986
987 let mem_regions = arch_memory_regions(mem_size, None);
988
989 let mem_below_4g = mem_regions
990 .iter()
991 .filter(|r| r.0.offset() < FIRST_ADDR_PAST_32BITS)
992 .map(|r| r.1)
993 .sum();
994
995 let mem_above_4g = mem_regions
996 .iter()
997 .filter(|r| r.0.offset() >= FIRST_ADDR_PAST_32BITS)
998 .map(|r| r.1)
999 .sum();
1000
1001 io_bus
1002 .insert(
1003 Arc::new(Mutex::new(devices::Cmos::new(mem_below_4g, mem_above_4g))),
1004 0x70,
1005 0x2,
1006 )
1007 .unwrap();
1008
1009 let nul_device = Arc::new(Mutex::new(NoDevice));
1010 let i8042 = Arc::new(Mutex::new(devices::I8042Device::new(
1011 exit_evt.try_clone().map_err(Error::CloneEvent)?,
1012 )));
1013
1014 if pit_uses_speaker_port {
1015 io_bus.insert(i8042, 0x062, 0x3).unwrap();
1016 } else {
1017 io_bus.insert(i8042, 0x061, 0x4).unwrap();
1018 }
1019
1020 io_bus.insert(nul_device.clone(), 0x0ed, 0x1).unwrap(); // most likely this one does nothing
1021 io_bus.insert(nul_device.clone(), 0x0f0, 0x2).unwrap(); // ignore fpu
1022
1023 if let Some(pci_root) = pci {
1024 io_bus.insert(pci_root, 0xcf8, 0x8).unwrap();
1025 } else {
1026 // ignore pci.
1027 io_bus.insert(nul_device, 0xcf8, 0x8).unwrap();
1028 }
1029
1030 Ok(io_bus)
1031 }
1032
1033 /// Sets up the acpi devices for this platform and
1034 /// return the resources which is used to set the ACPI tables.
1035 ///
1036 /// # Arguments
1037 ///
1038 /// * - `io_bus` the I/O bus to add the devices to
1039 /// * - `resources` the SystemAllocator to allocate IO and MMIO for acpi
1040 /// devices.
1041 /// * - `suspend_evt` the event object which used to suspend the vm
1042 /// * - `sdts` ACPI system description tables
1043 /// * - `irq_chip` the IrqChip object for registering irq events
1044 /// * - `battery` indicate whether to create the battery
1045 /// * - `mmio_bus` the MMIO bus to add the devices to
setup_acpi_devices( io_bus: &mut devices::Bus, resources: &mut SystemAllocator, suspend_evt: Event, exit_evt: Event, sdts: Vec<SDT>, irq_chip: &mut impl IrqChip, battery: (&Option<BatteryType>, Option<Minijail>), mmio_bus: &mut devices::Bus, ) -> Result<(acpi::ACPIDevResource, Option<BatControl>)>1046 fn setup_acpi_devices(
1047 io_bus: &mut devices::Bus,
1048 resources: &mut SystemAllocator,
1049 suspend_evt: Event,
1050 exit_evt: Event,
1051 sdts: Vec<SDT>,
1052 irq_chip: &mut impl IrqChip,
1053 battery: (&Option<BatteryType>, Option<Minijail>),
1054 mmio_bus: &mut devices::Bus,
1055 ) -> Result<(acpi::ACPIDevResource, Option<BatControl>)> {
1056 // The AML data for the acpi devices
1057 let mut amls = Vec::new();
1058
1059 let pm_alloc = resources.get_anon_alloc();
1060 let pm_iobase = match resources.io_allocator() {
1061 Some(io) => io
1062 .allocate_with_align(
1063 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1064 pm_alloc,
1065 "ACPIPM".to_string(),
1066 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1067 )
1068 .map_err(Error::AllocateIOResouce)?,
1069 None => 0x600,
1070 };
1071
1072 let pmresource = devices::ACPIPMResource::new(suspend_evt, exit_evt);
1073 Aml::to_aml_bytes(&pmresource, &mut amls);
1074 let pm = Arc::new(Mutex::new(pmresource));
1075 io_bus
1076 .insert(
1077 pm.clone(),
1078 pm_iobase as u64,
1079 devices::acpi::ACPIPM_RESOURCE_LEN as u64,
1080 )
1081 .unwrap();
1082 io_bus.notify_on_resume(pm);
1083
1084 let bat_control = if let Some(battery_type) = battery.0 {
1085 match battery_type {
1086 BatteryType::Goldfish => {
1087 let control_tube = arch::add_goldfish_battery(
1088 &mut amls,
1089 battery.1,
1090 mmio_bus,
1091 irq_chip,
1092 X86_64_SCI_IRQ,
1093 resources,
1094 )
1095 .map_err(Error::CreateBatDevices)?;
1096 Some(BatControl {
1097 type_: BatteryType::Goldfish,
1098 control_tube,
1099 })
1100 }
1101 }
1102 } else {
1103 None
1104 };
1105
1106 Ok((
1107 acpi::ACPIDevResource {
1108 amls,
1109 pm_iobase,
1110 sdts,
1111 },
1112 bat_control,
1113 ))
1114 }
1115
1116 /// Sets up the serial devices for this platform. Returns the serial port number and serial
1117 /// device to be used for stdout
1118 ///
1119 /// # Arguments
1120 ///
1121 /// * - `irq_chip` the IrqChip object for registering irq events
1122 /// * - `io_bus` the I/O bus to add the devices to
1123 /// * - `serial_parmaters` - definitions for how the serial devices should be configured
setup_serial_devices( protected_vm: ProtectionType, irq_chip: &mut impl IrqChip, io_bus: &mut devices::Bus, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, ) -> Result<()>1124 fn setup_serial_devices(
1125 protected_vm: ProtectionType,
1126 irq_chip: &mut impl IrqChip,
1127 io_bus: &mut devices::Bus,
1128 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
1129 serial_jail: Option<Minijail>,
1130 ) -> Result<()> {
1131 let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?;
1132 let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?;
1133
1134 arch::add_serial_devices(
1135 protected_vm,
1136 io_bus,
1137 &com_evt_1_3,
1138 &com_evt_2_4,
1139 &serial_parameters,
1140 serial_jail,
1141 )
1142 .map_err(Error::CreateSerialDevices)?;
1143
1144 irq_chip
1145 .register_irq_event(X86_64_SERIAL_1_3_IRQ, &com_evt_1_3, None)
1146 .map_err(Error::RegisterIrqfd)?;
1147 irq_chip
1148 .register_irq_event(X86_64_SERIAL_2_4_IRQ, &com_evt_2_4, None)
1149 .map_err(Error::RegisterIrqfd)?;
1150
1151 Ok(())
1152 }
1153 }
1154
1155 #[cfg(test)]
1156 mod test_integration;
1157
1158 #[cfg(test)]
1159 mod tests {
1160 use super::*;
1161
1162 #[test]
regions_lt_4gb_nobios()1163 fn regions_lt_4gb_nobios() {
1164 let regions = arch_memory_regions(1u64 << 29, /* bios_size */ None);
1165 assert_eq!(1, regions.len());
1166 assert_eq!(GuestAddress(0), regions[0].0);
1167 assert_eq!(1u64 << 29, regions[0].1);
1168 }
1169
1170 #[test]
regions_gt_4gb_nobios()1171 fn regions_gt_4gb_nobios() {
1172 let regions = arch_memory_regions((1u64 << 32) + 0x8000, /* bios_size */ None);
1173 assert_eq!(2, regions.len());
1174 assert_eq!(GuestAddress(0), regions[0].0);
1175 assert_eq!(GuestAddress(1u64 << 32), regions[1].0);
1176 }
1177
1178 #[test]
regions_lt_4gb_bios()1179 fn regions_lt_4gb_bios() {
1180 let bios_len = 1 << 20;
1181 let regions = arch_memory_regions(1u64 << 29, Some(bios_len));
1182 assert_eq!(2, regions.len());
1183 assert_eq!(GuestAddress(0), regions[0].0);
1184 assert_eq!(1u64 << 29, regions[0].1);
1185 assert_eq!(
1186 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
1187 regions[1].0
1188 );
1189 assert_eq!(bios_len, regions[1].1);
1190 }
1191
1192 #[test]
regions_gt_4gb_bios()1193 fn regions_gt_4gb_bios() {
1194 let bios_len = 1 << 20;
1195 let regions = arch_memory_regions((1u64 << 32) + 0x8000, Some(bios_len));
1196 assert_eq!(3, regions.len());
1197 assert_eq!(GuestAddress(0), regions[0].0);
1198 assert_eq!(
1199 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
1200 regions[1].0
1201 );
1202 assert_eq!(bios_len, regions[1].1);
1203 assert_eq!(GuestAddress(1u64 << 32), regions[2].0);
1204 }
1205
1206 #[test]
regions_eq_4gb_nobios()1207 fn regions_eq_4gb_nobios() {
1208 // Test with size = 3328, which is exactly 4 GiB minus the size of the gap (768 MiB).
1209 let regions = arch_memory_regions(3328 << 20, /* bios_size */ None);
1210 assert_eq!(1, regions.len());
1211 assert_eq!(GuestAddress(0), regions[0].0);
1212 assert_eq!(3328 << 20, regions[0].1);
1213 }
1214
1215 #[test]
regions_eq_4gb_bios()1216 fn regions_eq_4gb_bios() {
1217 // Test with size = 3328, which is exactly 4 GiB minus the size of the gap (768 MiB).
1218 let bios_len = 1 << 20;
1219 let regions = arch_memory_regions(3328 << 20, Some(bios_len));
1220 assert_eq!(2, regions.len());
1221 assert_eq!(GuestAddress(0), regions[0].0);
1222 assert_eq!(3328 << 20, regions[0].1);
1223 assert_eq!(
1224 GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
1225 regions[1].0
1226 );
1227 assert_eq!(bios_len, regions[1].1);
1228 }
1229 }
1230