1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! ARM 64-bit architecture support.
6
7 #![cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8
9 use std::collections::BTreeMap;
10 use std::fs::File;
11 use std::io;
12 use std::path::PathBuf;
13 use std::sync::atomic::AtomicU32;
14 use std::sync::mpsc;
15 use std::sync::Arc;
16
17 use arch::get_serial_cmdline;
18 use arch::CpuSet;
19 use arch::DtbOverlay;
20 use arch::FdtPosition;
21 use arch::GetSerialCmdlineError;
22 use arch::MemoryRegionConfig;
23 use arch::RunnableLinuxVm;
24 use arch::SveConfig;
25 use arch::VcpuAffinity;
26 use arch::VmComponents;
27 use arch::VmImage;
28 use base::MemoryMappingBuilder;
29 use base::SendTube;
30 use base::Tube;
31 use devices::serial_device::SerialHardware;
32 use devices::serial_device::SerialParameters;
33 use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
34 use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
35 use devices::Bus;
36 use devices::BusDeviceObj;
37 use devices::BusError;
38 use devices::BusType;
39 use devices::IrqChip;
40 use devices::IrqChipAArch64;
41 use devices::IrqEventSource;
42 use devices::PciAddress;
43 use devices::PciConfigMmio;
44 use devices::PciDevice;
45 use devices::PciRootCommand;
46 use devices::Serial;
47 #[cfg(any(target_os = "android", target_os = "linux"))]
48 use devices::VirtCpufreq;
49 #[cfg(any(target_os = "android", target_os = "linux"))]
50 use devices::VirtCpufreqV2;
51 #[cfg(feature = "gdb")]
52 use gdbstub::arch::Arch;
53 #[cfg(feature = "gdb")]
54 use gdbstub_arch::aarch64::reg::id::AArch64RegId;
55 #[cfg(feature = "gdb")]
56 use gdbstub_arch::aarch64::AArch64 as GdbArch;
57 #[cfg(feature = "gdb")]
58 use hypervisor::AArch64SysRegId;
59 use hypervisor::CpuConfigAArch64;
60 use hypervisor::DeviceKind;
61 use hypervisor::Hypervisor;
62 use hypervisor::HypervisorCap;
63 use hypervisor::MemCacheType;
64 use hypervisor::ProtectionType;
65 use hypervisor::VcpuAArch64;
66 use hypervisor::VcpuFeature;
67 use hypervisor::VcpuInitAArch64;
68 use hypervisor::VcpuRegAArch64;
69 use hypervisor::Vm;
70 use hypervisor::VmAArch64;
71 use hypervisor::VmCap;
72 #[cfg(windows)]
73 use jail::FakeMinijailStub as Minijail;
74 use kernel_loader::LoadedKernel;
75 #[cfg(any(target_os = "android", target_os = "linux"))]
76 use minijail::Minijail;
77 use remain::sorted;
78 use resources::address_allocator::AddressAllocator;
79 use resources::AddressRange;
80 use resources::MmioType;
81 use resources::SystemAllocator;
82 use resources::SystemAllocatorConfig;
83 use sync::Condvar;
84 use sync::Mutex;
85 use thiserror::Error;
86 use vm_control::BatControl;
87 use vm_control::BatteryType;
88 use vm_memory::GuestAddress;
89 use vm_memory::GuestMemory;
90 use vm_memory::GuestMemoryError;
91 use vm_memory::MemoryRegionOptions;
92 use vm_memory::MemoryRegionPurpose;
93
94 mod fdt;
95
96 const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
97 const AARCH64_FDT_ALIGN: u64 = 0x200000;
98 const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
99
100 // Maximum Linux arm64 kernel command line size (arch/arm64/include/uapi/asm/setup.h).
101 const AARCH64_CMDLINE_MAX_SIZE: usize = 2048;
102
103 // These constants indicate the address space used by the ARM vGIC.
104 const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
105 const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
106
107 // This indicates the start of DRAM inside the physical address space.
108 const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
109 const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x800000;
110
111 const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
112 const AARCH64_PROTECTED_VM_FW_START: u64 =
113 AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
114
115 const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
116 const AARCH64_PVTIME_IPA_START: u64 = 0x1ff0000;
117 const AARCH64_PVTIME_SIZE: u64 = 64;
118
119 // These constants indicate the placement of the GIC registers in the physical
120 // address space.
121 const AARCH64_GIC_DIST_BASE: u64 = 0x40000000 - AARCH64_GIC_DIST_SIZE;
122 const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
123 const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
124
125 // PSR (Processor State Register) bits
126 const PSR_MODE_EL1H: u64 = 0x00000005;
127 const PSR_F_BIT: u64 = 0x00000040;
128 const PSR_I_BIT: u64 = 0x00000080;
129 const PSR_A_BIT: u64 = 0x00000100;
130 const PSR_D_BIT: u64 = 0x00000200;
131
132 // This was the speed kvmtool used, not sure if it matters.
133 const AARCH64_SERIAL_SPEED: u32 = 1843200;
134 // The serial device gets the first interrupt line
135 // Which gets mapped to the first SPI interrupt (physical 32).
136 const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
137 const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
138
139 // Place the RTC device at page 2
140 const AARCH64_RTC_ADDR: u64 = 0x2000;
141 // The RTC device gets one 4k page
142 const AARCH64_RTC_SIZE: u64 = 0x1000;
143 // The RTC device gets the second interrupt line
144 const AARCH64_RTC_IRQ: u32 = 1;
145
146 // The Goldfish battery device gets the 3rd interrupt line
147 const AARCH64_BAT_IRQ: u32 = 3;
148
149 // Place the virtual watchdog device at page 3
150 const AARCH64_VMWDT_ADDR: u64 = 0x3000;
151 // The virtual watchdog device gets one 4k page
152 const AARCH64_VMWDT_SIZE: u64 = 0x1000;
153
154 // Default PCI MMIO configuration region base address.
155 const AARCH64_PCI_CAM_BASE_DEFAULT: u64 = 0x10000;
156 // Default PCI MMIO configuration region size.
157 const AARCH64_PCI_CAM_SIZE_DEFAULT: u64 = 0x1000000;
158 // Default PCI mem base address.
159 const AARCH64_PCI_MEM_BASE_DEFAULT: u64 = 0x2000000;
160 // Default PCI mem size.
161 const AARCH64_PCI_MEM_SIZE_DEFAULT: u64 = 0x2000000;
162 // Virtio devices start at SPI interrupt number 4
163 const AARCH64_IRQ_BASE: u32 = 4;
164
165 // Virtual CPU Frequency Device.
166 const AARCH64_VIRTFREQ_BASE: u64 = 0x1040000;
167 const AARCH64_VIRTFREQ_SIZE: u64 = 0x8;
168 const AARCH64_VIRTFREQ_MAXSIZE: u64 = 0x10000;
169 const AARCH64_VIRTFREQ_V2_SIZE: u64 = 0x1000;
170
171 // PMU PPI interrupt, same as qemu
172 const AARCH64_PMU_IRQ: u32 = 7;
173
174 // VCPU stall detector interrupt
175 const AARCH64_VMWDT_IRQ: u32 = 15;
176
177 enum PayloadType {
178 Bios {
179 entry: GuestAddress,
180 image_size: u64,
181 },
182 Kernel(LoadedKernel),
183 }
184
185 impl PayloadType {
entry(&self) -> GuestAddress186 fn entry(&self) -> GuestAddress {
187 match self {
188 Self::Bios {
189 entry,
190 image_size: _,
191 } => *entry,
192 Self::Kernel(k) => k.entry,
193 }
194 }
195
size(&self) -> u64196 fn size(&self) -> u64 {
197 match self {
198 Self::Bios {
199 entry: _,
200 image_size,
201 } => *image_size,
202 Self::Kernel(k) => k.size,
203 }
204 }
205
address_range(&self) -> AddressRange206 fn address_range(&self) -> AddressRange {
207 match self {
208 Self::Bios { entry, image_size } => {
209 AddressRange::from_start_and_size(entry.offset(), *image_size)
210 .expect("invalid BIOS address range")
211 }
212 Self::Kernel(k) => {
213 // TODO: b/389759119: use `k.address_range` to include regions that are present in
214 // memory but not in the original image file (e.g. `.bss` section).
215 AddressRange::from_start_and_size(k.entry.offset(), k.size)
216 .expect("invalid kernel address range")
217 }
218 }
219 }
220 }
221
222 // When static swiotlb allocation is required, returns the address it should be allocated at.
223 // Otherwise, returns None.
get_swiotlb_addr( memory_size: u64, swiotlb_size: u64, hypervisor: &(impl Hypervisor + ?Sized), ) -> Option<GuestAddress>224 fn get_swiotlb_addr(
225 memory_size: u64,
226 swiotlb_size: u64,
227 hypervisor: &(impl Hypervisor + ?Sized),
228 ) -> Option<GuestAddress> {
229 if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
230 Some(GuestAddress(
231 AARCH64_PHYS_MEM_START + memory_size - swiotlb_size,
232 ))
233 } else {
234 None
235 }
236 }
237
238 #[sorted]
239 #[derive(Error, Debug)]
240 pub enum Error {
241 #[error("failed to allocate IRQ number")]
242 AllocateIrq,
243 #[error("bios could not be loaded: {0}")]
244 BiosLoadFailure(arch::LoadImageError),
245 #[error("failed to build arm pvtime memory: {0}")]
246 BuildPvtimeError(base::MmapError),
247 #[error("unable to clone an Event: {0}")]
248 CloneEvent(base::Error),
249 #[error("failed to clone IRQ chip: {0}")]
250 CloneIrqChip(base::Error),
251 #[error("the given kernel command line was invalid: {0}")]
252 Cmdline(kernel_cmdline::Error),
253 #[error("bad PCI CAM configuration: {0}")]
254 ConfigurePciCam(String),
255 #[error("bad PCI mem configuration: {0}")]
256 ConfigurePciMem(String),
257 #[error("failed to configure CPU Frequencies: {0}")]
258 CpuFrequencies(base::Error),
259 #[error("failed to configure CPU topology: {0}")]
260 CpuTopology(base::Error),
261 #[error("unable to create battery devices: {0}")]
262 CreateBatDevices(arch::DeviceRegistrationError),
263 #[error("unable to make an Event: {0}")]
264 CreateEvent(base::Error),
265 #[error("FDT could not be created: {0}")]
266 CreateFdt(cros_fdt::Error),
267 #[error("failed to create GIC: {0}")]
268 CreateGICFailure(base::Error),
269 #[error("failed to create a PCI root hub: {0}")]
270 CreatePciRoot(arch::DeviceRegistrationError),
271 #[error("failed to create platform bus: {0}")]
272 CreatePlatformBus(arch::DeviceRegistrationError),
273 #[error("unable to create serial devices: {0}")]
274 CreateSerialDevices(arch::DeviceRegistrationError),
275 #[error("failed to create socket: {0}")]
276 CreateSocket(io::Error),
277 #[error("failed to create tube: {0}")]
278 CreateTube(base::TubeError),
279 #[error("failed to create VCPU: {0}")]
280 CreateVcpu(base::Error),
281 #[error("unable to create vm watchdog timer device: {0}")]
282 CreateVmwdtDevice(anyhow::Error),
283 #[error("custom pVM firmware could not be loaded: {0}")]
284 CustomPvmFwLoadFailure(arch::LoadImageError),
285 #[error("vm created wrong kind of vcpu")]
286 DowncastVcpu,
287 #[error("failed to enable singlestep execution: {0}")]
288 EnableSinglestep(base::Error),
289 #[error("failed to finalize IRQ chip: {0}")]
290 FinalizeIrqChip(base::Error),
291 #[error("failed to get HW breakpoint count: {0}")]
292 GetMaxHwBreakPoint(base::Error),
293 #[error("failed to get PSCI version: {0}")]
294 GetPsciVersion(base::Error),
295 #[error("failed to get serial cmdline: {0}")]
296 GetSerialCmdline(GetSerialCmdlineError),
297 #[error("failed to initialize arm pvtime: {0}")]
298 InitPvtimeError(base::Error),
299 #[error("initrd could not be loaded: {0}")]
300 InitrdLoadFailure(arch::LoadImageError),
301 #[error("failed to initialize virtual machine {0}")]
302 InitVmError(base::Error),
303 #[error("kernel could not be loaded: {0}")]
304 KernelLoadFailure(kernel_loader::Error),
305 #[error("error loading Kernel from Elf image: {0}")]
306 LoadElfKernel(kernel_loader::Error),
307 #[error("failed to map arm pvtime memory: {0}")]
308 MapPvtimeError(base::Error),
309 #[error("pVM firmware could not be loaded: {0}")]
310 PvmFwLoadFailure(base::Error),
311 #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
312 RamoopsAddress(u64, u64),
313 #[error("error reading guest memory: {0}")]
314 ReadGuestMemory(vm_memory::GuestMemoryError),
315 #[error("error reading CPU register: {0}")]
316 ReadReg(base::Error),
317 #[error("error reading CPU registers: {0}")]
318 ReadRegs(base::Error),
319 #[error("failed to register irq fd: {0}")]
320 RegisterIrqfd(base::Error),
321 #[error("error registering PCI bus: {0}")]
322 RegisterPci(BusError),
323 #[error("error registering virtual cpufreq device: {0}")]
324 RegisterVirtCpufreq(BusError),
325 #[error("error registering virtual socket device: {0}")]
326 RegisterVsock(arch::DeviceRegistrationError),
327 #[error("failed to set device attr: {0}")]
328 SetDeviceAttr(base::Error),
329 #[error("failed to set a hardware breakpoint: {0}")]
330 SetHwBreakpoint(base::Error),
331 #[error("failed to set register: {0}")]
332 SetReg(base::Error),
333 #[error("failed to set up guest memory: {0}")]
334 SetupGuestMemory(GuestMemoryError),
335 #[error("this function isn't supported")]
336 Unsupported,
337 #[error("failed to initialize VCPU: {0}")]
338 VcpuInit(base::Error),
339 #[error("error writing guest memory: {0}")]
340 WriteGuestMemory(GuestMemoryError),
341 #[error("error writing CPU register: {0}")]
342 WriteReg(base::Error),
343 #[error("error writing CPU registers: {0}")]
344 WriteRegs(base::Error),
345 }
346
347 pub type Result<T> = std::result::Result<T, Error>;
348
load_kernel( guest_mem: &GuestMemory, kernel_start: GuestAddress, mut kernel_image: &mut File, ) -> Result<LoadedKernel>349 fn load_kernel(
350 guest_mem: &GuestMemory,
351 kernel_start: GuestAddress,
352 mut kernel_image: &mut File,
353 ) -> Result<LoadedKernel> {
354 if let Ok(elf_kernel) = kernel_loader::load_elf(
355 guest_mem,
356 kernel_start,
357 &mut kernel_image,
358 AARCH64_PHYS_MEM_START,
359 ) {
360 return Ok(elf_kernel);
361 }
362
363 if let Ok(lz4_kernel) =
364 kernel_loader::load_arm64_kernel_lz4(guest_mem, kernel_start, &mut kernel_image)
365 {
366 return Ok(lz4_kernel);
367 }
368
369 kernel_loader::load_arm64_kernel(guest_mem, kernel_start, kernel_image)
370 .map_err(Error::KernelLoadFailure)
371 }
372
373 pub struct AArch64;
374
get_block_size() -> u64375 fn get_block_size() -> u64 {
376 let page_size = base::pagesize();
377 // Each PTE entry being 8 bytes long, we can fit in one page (page_size / 8)
378 // entries.
379 let ptes_per_page = page_size / 8;
380 let block_size = page_size * ptes_per_page;
381
382 block_size as u64
383 }
384
get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64>385 fn get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64> {
386 const MPIDR_AFF_MASK: u64 = 0xff_00ff_ffff;
387
388 Some(vcpus.get(index)?.get_mpidr().ok()? & MPIDR_AFF_MASK)
389 }
390
main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64391 fn main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64 {
392 // Static swiotlb is allocated from the end of RAM as a separate memory region, so, if
393 // enabled, make the RAM memory region smaller to leave room for it.
394 let mut main_memory_size = components.memory_size;
395 if let Some(size) = components.swiotlb {
396 if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
397 main_memory_size -= size;
398 }
399 }
400 main_memory_size
401 }
402
403 pub struct ArchMemoryLayout {
404 pci_cam: AddressRange,
405 pci_mem: AddressRange,
406 }
407
408 impl arch::LinuxArch for AArch64 {
409 type Error = Error;
410 type ArchMemoryLayout = ArchMemoryLayout;
411
arch_memory_layout( components: &VmComponents, ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>412 fn arch_memory_layout(
413 components: &VmComponents,
414 ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
415 let (pci_cam_start, pci_cam_size) = match components.pci_config.cam {
416 Some(MemoryRegionConfig { start, size }) => {
417 (start, size.unwrap_or(AARCH64_PCI_CAM_SIZE_DEFAULT))
418 }
419 None => (AARCH64_PCI_CAM_BASE_DEFAULT, AARCH64_PCI_CAM_SIZE_DEFAULT),
420 };
421 // TODO: Make the PCI slot allocator aware of the CAM size so we can remove this check.
422 if pci_cam_size != AARCH64_PCI_CAM_SIZE_DEFAULT {
423 return Err(Error::ConfigurePciCam(format!(
424 "PCI CAM size must be {AARCH64_PCI_CAM_SIZE_DEFAULT:#x}, got {pci_cam_size:#x}"
425 )));
426 }
427 let pci_cam = AddressRange::from_start_and_size(pci_cam_start, pci_cam_size).ok_or(
428 Error::ConfigurePciCam("PCI CAM region overflowed".to_string()),
429 )?;
430 if pci_cam.end >= AARCH64_PHYS_MEM_START {
431 return Err(Error::ConfigurePciCam(format!(
432 "PCI CAM ({pci_cam:?}) must be before start of RAM ({AARCH64_PHYS_MEM_START:#x})"
433 )));
434 }
435
436 let pci_mem = match components.pci_config.mem {
437 Some(MemoryRegionConfig { start, size }) => AddressRange::from_start_and_size(
438 start,
439 size.unwrap_or(AARCH64_PCI_MEM_SIZE_DEFAULT),
440 )
441 .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
442 None => AddressRange::from_start_and_size(
443 AARCH64_PCI_MEM_BASE_DEFAULT,
444 AARCH64_PCI_MEM_SIZE_DEFAULT,
445 )
446 .unwrap(),
447 };
448
449 Ok(ArchMemoryLayout { pci_cam, pci_mem })
450 }
451
452 /// Returns a Vec of the valid memory addresses.
453 /// These should be used to configure the GuestMemory structure for the platform.
guest_memory_layout( components: &VmComponents, _arch_memory_layout: &Self::ArchMemoryLayout, hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>454 fn guest_memory_layout(
455 components: &VmComponents,
456 _arch_memory_layout: &Self::ArchMemoryLayout,
457 hypervisor: &impl Hypervisor,
458 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
459 let main_memory_size = main_memory_size(components, hypervisor);
460
461 let mut memory_regions = vec![(
462 GuestAddress(AARCH64_PHYS_MEM_START),
463 main_memory_size,
464 MemoryRegionOptions::new().align(get_block_size()),
465 )];
466
467 // Allocate memory for the pVM firmware.
468 if components.hv_cfg.protection_type.runs_firmware() {
469 memory_regions.push((
470 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
471 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
472 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
473 ));
474 }
475
476 if let Some(size) = components.swiotlb {
477 if let Some(addr) = get_swiotlb_addr(components.memory_size, size, hypervisor) {
478 memory_regions.push((
479 addr,
480 size,
481 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
482 ));
483 }
484 }
485
486 Ok(memory_regions)
487 }
488
get_system_allocator_config<V: Vm>( vm: &V, arch_memory_layout: &Self::ArchMemoryLayout, ) -> SystemAllocatorConfig489 fn get_system_allocator_config<V: Vm>(
490 vm: &V,
491 arch_memory_layout: &Self::ArchMemoryLayout,
492 ) -> SystemAllocatorConfig {
493 let guest_phys_end = 1u64 << vm.get_guest_phys_addr_bits();
494 // The platform MMIO region is immediately past the end of RAM.
495 let plat_mmio_base = vm.get_memory().end_addr().offset();
496 let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
497 // The high MMIO region is the rest of the address space after the platform MMIO region.
498 let high_mmio_base = plat_mmio_base + plat_mmio_size;
499 let high_mmio_size = guest_phys_end
500 .checked_sub(high_mmio_base)
501 .unwrap_or_else(|| {
502 panic!(
503 "guest_phys_end {:#x} < high_mmio_base {:#x}",
504 guest_phys_end, high_mmio_base,
505 );
506 });
507 SystemAllocatorConfig {
508 io: None,
509 low_mmio: arch_memory_layout.pci_mem,
510 high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
511 .expect("invalid high mmio region"),
512 platform_mmio: Some(
513 AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
514 .expect("invalid platform mmio region"),
515 ),
516 first_irq: AARCH64_IRQ_BASE,
517 }
518 }
519
build_vm<V, Vcpu>( mut components: VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, _vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipAArch64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, _debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, device_tree_overlays: Vec<DtbOverlay>, fdt_position: Option<FdtPosition>, no_pmu: bool, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmAArch64, Vcpu: VcpuAArch64,520 fn build_vm<V, Vcpu>(
521 mut components: VmComponents,
522 arch_memory_layout: &Self::ArchMemoryLayout,
523 _vm_evt_wrtube: &SendTube,
524 system_allocator: &mut SystemAllocator,
525 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
526 serial_jail: Option<Minijail>,
527 (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
528 mut vm: V,
529 ramoops_region: Option<arch::pstore::RamoopsRegion>,
530 devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
531 irq_chip: &mut dyn IrqChipAArch64,
532 vcpu_ids: &mut Vec<usize>,
533 dump_device_tree_blob: Option<PathBuf>,
534 _debugcon_jail: Option<Minijail>,
535 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
536 _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
537 device_tree_overlays: Vec<DtbOverlay>,
538 fdt_position: Option<FdtPosition>,
539 no_pmu: bool,
540 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
541 where
542 V: VmAArch64,
543 Vcpu: VcpuAArch64,
544 {
545 let has_bios = matches!(components.vm_image, VmImage::Bios(_));
546 let mem = vm.get_memory().clone();
547
548 let main_memory_size = main_memory_size(&components, vm.get_hypervisor());
549
550 let fdt_position = fdt_position.unwrap_or(if has_bios {
551 FdtPosition::Start
552 } else {
553 FdtPosition::End
554 });
555 let payload_address = match fdt_position {
556 // If FDT is at the start RAM, the payload needs to go somewhere after it.
557 FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_MAX_SIZE),
558 // Otherwise, put the payload at the start of RAM.
559 FdtPosition::End | FdtPosition::AfterPayload => GuestAddress(AARCH64_PHYS_MEM_START),
560 };
561
562 // separate out image loading from other setup to get a specific error for
563 // image loading
564 let mut initrd = None;
565 let (payload, payload_end_address) = match components.vm_image {
566 VmImage::Bios(ref mut bios) => {
567 let image_size = arch::load_image(&mem, bios, payload_address, u64::MAX)
568 .map_err(Error::BiosLoadFailure)?;
569 (
570 PayloadType::Bios {
571 entry: payload_address,
572 image_size: image_size as u64,
573 },
574 payload_address
575 .checked_add(image_size.try_into().unwrap())
576 .and_then(|end| end.checked_sub(1))
577 .unwrap(),
578 )
579 }
580 VmImage::Kernel(ref mut kernel_image) => {
581 let loaded_kernel = load_kernel(&mem, payload_address, kernel_image)?;
582 let kernel_end = loaded_kernel.address_range.end;
583 let mut payload_end = GuestAddress(kernel_end);
584 initrd = match components.initrd_image {
585 Some(initrd_file) => {
586 let mut initrd_file = initrd_file;
587 let initrd_addr = (kernel_end + 1 + (AARCH64_INITRD_ALIGN - 1))
588 & !(AARCH64_INITRD_ALIGN - 1);
589 let initrd_max_size =
590 main_memory_size.saturating_sub(initrd_addr - AARCH64_PHYS_MEM_START);
591 let initrd_addr = GuestAddress(initrd_addr);
592 let initrd_size =
593 arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
594 .map_err(Error::InitrdLoadFailure)?;
595 payload_end = initrd_addr
596 .checked_add(initrd_size.try_into().unwrap())
597 .and_then(|end| end.checked_sub(1))
598 .unwrap();
599 Some((initrd_addr, initrd_size))
600 }
601 None => None,
602 };
603 (PayloadType::Kernel(loaded_kernel), payload_end)
604 }
605 };
606
607 let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + main_memory_size);
608
609 let fdt_address = match fdt_position {
610 FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START),
611 FdtPosition::End => {
612 let addr = memory_end
613 .checked_sub(AARCH64_FDT_MAX_SIZE)
614 .expect("Not enough memory for FDT")
615 .align_down(AARCH64_FDT_ALIGN);
616 assert!(addr > payload_end_address, "Not enough memory for FDT");
617 addr
618 }
619 FdtPosition::AfterPayload => payload_end_address
620 .checked_add(1)
621 .and_then(|addr| addr.align(AARCH64_FDT_ALIGN))
622 .expect("Not enough memory for FDT"),
623 };
624
625 let mut use_pmu = vm
626 .get_hypervisor()
627 .check_capability(HypervisorCap::ArmPmuV3);
628 use_pmu &= !no_pmu;
629 let vcpu_count = components.vcpu_count;
630 let mut has_pvtime = true;
631 let mut vcpus = Vec::with_capacity(vcpu_count);
632 let mut vcpu_init = Vec::with_capacity(vcpu_count);
633 for vcpu_id in 0..vcpu_count {
634 let vcpu: Vcpu = *vm
635 .create_vcpu(vcpu_id)
636 .map_err(Error::CreateVcpu)?
637 .downcast::<Vcpu>()
638 .map_err(|_| Error::DowncastVcpu)?;
639 let per_vcpu_init = if vm
640 .get_hypervisor()
641 .check_capability(HypervisorCap::HypervisorInitializedBootContext)
642 {
643 // No registers are initialized: VcpuInitAArch64.regs is an empty BTreeMap
644 Default::default()
645 } else {
646 Self::vcpu_init(
647 vcpu_id,
648 &payload,
649 fdt_address,
650 components.hv_cfg.protection_type,
651 components.boot_cpu,
652 )
653 };
654 has_pvtime &= vcpu.has_pvtime_support();
655 vcpus.push(vcpu);
656 vcpu_ids.push(vcpu_id);
657 vcpu_init.push(per_vcpu_init);
658 }
659
660 if components.sve_config.auto {
661 components.sve_config.enable = vm.check_capability(VmCap::Sve);
662 }
663
664 // Initialize Vcpus after all Vcpu objects have been created.
665 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
666 let features =
667 &Self::vcpu_features(vcpu_id, use_pmu, components.boot_cpu, components.sve_config);
668 vcpu.init(features).map_err(Error::VcpuInit)?;
669 }
670
671 irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
672
673 if has_pvtime {
674 let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
675 .build()
676 .map_err(Error::BuildPvtimeError)?;
677 vm.add_memory_region(
678 GuestAddress(AARCH64_PVTIME_IPA_START),
679 Box::new(pvtime_mem),
680 false,
681 false,
682 MemCacheType::CacheCoherent,
683 )
684 .map_err(Error::MapPvtimeError)?;
685 }
686
687 if components.hv_cfg.protection_type.needs_firmware_loaded() {
688 arch::load_image(
689 &mem,
690 &mut components
691 .pvm_fw
692 .expect("pvmfw must be available if ProtectionType loads it"),
693 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
694 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
695 )
696 .map_err(Error::CustomPvmFwLoadFailure)?;
697 } else if components.hv_cfg.protection_type.runs_firmware() {
698 // Tell the hypervisor to load the pVM firmware.
699 vm.load_protected_vm_firmware(
700 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
701 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
702 )
703 .map_err(Error::PvmFwLoadFailure)?;
704 }
705
706 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
707 use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
708 if has_pvtime {
709 vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
710 .map_err(Error::InitPvtimeError)?;
711 }
712 }
713
714 let mmio_bus = Arc::new(devices::Bus::new(BusType::Mmio));
715
716 // ARM doesn't really use the io bus like x86, so just create an empty bus.
717 let io_bus = Arc::new(devices::Bus::new(BusType::Io));
718
719 // Event used by PMDevice to notify crosvm that
720 // guest OS is trying to suspend.
721 let (suspend_tube_send, suspend_tube_recv) =
722 Tube::directional_pair().map_err(Error::CreateTube)?;
723 let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
724
725 let (pci_devices, others): (Vec<_>, Vec<_>) = devs
726 .into_iter()
727 .partition(|(dev, _)| dev.as_pci_device().is_some());
728
729 let pci_devices = pci_devices
730 .into_iter()
731 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
732 .collect();
733 let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
734 arch::generate_pci_root(
735 pci_devices,
736 irq_chip.as_irq_chip_mut(),
737 mmio_bus.clone(),
738 GuestAddress(arch_memory_layout.pci_cam.start),
739 8,
740 io_bus.clone(),
741 system_allocator,
742 &mut vm,
743 (devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
744 None,
745 #[cfg(feature = "swap")]
746 swap_controller,
747 )
748 .map_err(Error::CreatePciRoot)?;
749
750 let pci_root = Arc::new(Mutex::new(pci));
751 let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
752 let (platform_devices, _others): (Vec<_>, Vec<_>) = others
753 .into_iter()
754 .partition(|(dev, _)| dev.as_platform_device().is_some());
755
756 let platform_devices = platform_devices
757 .into_iter()
758 .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
759 .collect();
760 let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
761 arch::sys::linux::generate_platform_bus(
762 platform_devices,
763 irq_chip.as_irq_chip_mut(),
764 &mmio_bus,
765 system_allocator,
766 &mut vm,
767 #[cfg(feature = "swap")]
768 swap_controller,
769 components.hv_cfg.protection_type,
770 )
771 .map_err(Error::CreatePlatformBus)?;
772 pid_debug_label_map.append(&mut platform_pid_debug_label_map);
773
774 let (vmwdt_host_tube, vmwdt_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
775 Self::add_arch_devs(
776 irq_chip.as_irq_chip_mut(),
777 &mmio_bus,
778 vcpu_count,
779 _vm_evt_wrtube,
780 vmwdt_control_tube,
781 )?;
782
783 let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
784 let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
785 let serial_devices = arch::add_serial_devices(
786 components.hv_cfg.protection_type,
787 &mmio_bus,
788 (AARCH64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
789 (AARCH64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
790 serial_parameters,
791 serial_jail,
792 #[cfg(feature = "swap")]
793 swap_controller,
794 )
795 .map_err(Error::CreateSerialDevices)?;
796
797 let source = IrqEventSource {
798 device_id: Serial::device_id(),
799 queue_id: 0,
800 device_name: Serial::debug_label(),
801 };
802 irq_chip
803 .register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
804 .map_err(Error::RegisterIrqfd)?;
805 irq_chip
806 .register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
807 .map_err(Error::RegisterIrqfd)?;
808
809 mmio_bus
810 .insert(
811 pci_bus,
812 arch_memory_layout.pci_cam.start,
813 arch_memory_layout.pci_cam.len().unwrap(),
814 )
815 .map_err(Error::RegisterPci)?;
816
817 let (vcpufreq_host_tube, vcpufreq_control_tube) =
818 Tube::pair().map_err(Error::CreateTube)?;
819 let vcpufreq_shared_tube = Arc::new(Mutex::new(vcpufreq_control_tube));
820 #[cfg(any(target_os = "android", target_os = "linux"))]
821 if !components.cpu_frequencies.is_empty() {
822 let mut freq_domain_vcpus: BTreeMap<u32, Vec<usize>> = BTreeMap::new();
823 let mut freq_domain_perfs: BTreeMap<u32, Arc<AtomicU32>> = BTreeMap::new();
824 let mut vcpu_affinities: Vec<u32> = Vec::new();
825 for vcpu in 0..vcpu_count {
826 let freq_domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
827 freq_domain_vcpus.entry(freq_domain).or_default().push(vcpu);
828 let vcpu_affinity = match components.vcpu_affinity.clone() {
829 Some(VcpuAffinity::Global(v)) => v,
830 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(),
831 None => panic!("vcpu_affinity needs to be set for VirtCpufreq"),
832 };
833 vcpu_affinities.push(vcpu_affinity[0].try_into().unwrap());
834 }
835 for domain in freq_domain_vcpus.keys() {
836 let domain_perf = Arc::new(AtomicU32::new(0));
837 freq_domain_perfs.insert(*domain, domain_perf);
838 }
839 let largest_vcpu_affinity_idx = *vcpu_affinities.iter().max().unwrap() as usize;
840 for (vcpu, vcpu_affinity) in vcpu_affinities.iter().enumerate() {
841 let mut virtfreq_size = AARCH64_VIRTFREQ_SIZE;
842 if components.virt_cpufreq_v2 {
843 let domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
844 virtfreq_size = AARCH64_VIRTFREQ_V2_SIZE;
845 let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreqV2::new(
846 *vcpu_affinity,
847 components.cpu_frequencies.get(&vcpu).unwrap().clone(),
848 components.vcpu_domain_paths.get(&vcpu).cloned(),
849 domain,
850 *components.normalized_cpu_ipc_ratios.get(&vcpu).unwrap(),
851 largest_vcpu_affinity_idx,
852 vcpufreq_shared_tube.clone(),
853 freq_domain_vcpus.get(&domain).unwrap().clone(),
854 freq_domain_perfs.get(&domain).unwrap().clone(),
855 )));
856 mmio_bus
857 .insert(
858 virt_cpufreq,
859 AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
860 virtfreq_size,
861 )
862 .map_err(Error::RegisterVirtCpufreq)?;
863 } else {
864 let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new(
865 *vcpu_affinity,
866 *components.cpu_capacity.get(&vcpu).unwrap(),
867 *components
868 .cpu_frequencies
869 .get(&vcpu)
870 .unwrap()
871 .iter()
872 .max()
873 .unwrap(),
874 )));
875 mmio_bus
876 .insert(
877 virt_cpufreq,
878 AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
879 virtfreq_size,
880 )
881 .map_err(Error::RegisterVirtCpufreq)?;
882 }
883
884 if vcpu as u64 * AARCH64_VIRTFREQ_SIZE + virtfreq_size > AARCH64_VIRTFREQ_MAXSIZE {
885 panic!("Exceeded maximum number of virt cpufreq devices");
886 }
887 }
888 }
889
890 let mut cmdline = Self::get_base_linux_cmdline();
891 get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
892 .map_err(Error::GetSerialCmdline)?;
893 for param in components.extra_kernel_params {
894 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
895 }
896
897 if let Some(ramoops_region) = ramoops_region {
898 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
899 .map_err(Error::Cmdline)?;
900 }
901
902 let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
903
904 let pci_cfg = fdt::PciConfigRegion {
905 base: arch_memory_layout.pci_cam.start,
906 size: arch_memory_layout.pci_cam.len().unwrap(),
907 };
908
909 let mut pci_ranges: Vec<fdt::PciRange> = Vec::new();
910
911 let mut add_pci_ranges = |alloc: &AddressAllocator, prefetchable: bool| {
912 pci_ranges.extend(alloc.pools().iter().map(|range| fdt::PciRange {
913 space: fdt::PciAddressSpace::Memory64,
914 bus_address: range.start,
915 cpu_physical_address: range.start,
916 size: range.len().unwrap(),
917 prefetchable,
918 }));
919 };
920
921 add_pci_ranges(system_allocator.mmio_allocator(MmioType::Low), false);
922 add_pci_ranges(system_allocator.mmio_allocator(MmioType::High), true);
923
924 let (bat_control, bat_mmio_base_and_irq) = match bat_type {
925 Some(BatteryType::Goldfish) => {
926 let bat_irq = AARCH64_BAT_IRQ;
927
928 // a dummy AML buffer. Aarch64 crosvm doesn't use ACPI.
929 let mut amls = Vec::new();
930 let (control_tube, mmio_base) = arch::sys::linux::add_goldfish_battery(
931 &mut amls,
932 bat_jail,
933 &mmio_bus,
934 irq_chip.as_irq_chip_mut(),
935 bat_irq,
936 system_allocator,
937 #[cfg(feature = "swap")]
938 swap_controller,
939 )
940 .map_err(Error::CreateBatDevices)?;
941 (
942 Some(BatControl {
943 type_: BatteryType::Goldfish,
944 control_tube,
945 }),
946 Some((mmio_base, bat_irq)),
947 )
948 }
949 None => (None, None),
950 };
951
952 let vmwdt_cfg = fdt::VmWdtConfig {
953 base: AARCH64_VMWDT_ADDR,
954 size: AARCH64_VMWDT_SIZE,
955 clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
956 timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
957 };
958
959 fdt::create_fdt(
960 AARCH64_FDT_MAX_SIZE as usize,
961 &mem,
962 pci_irqs,
963 pci_cfg,
964 &pci_ranges,
965 dev_resources,
966 vcpu_count as u32,
967 &|n| get_vcpu_mpidr_aff(&vcpus, n),
968 components.cpu_clusters,
969 components.cpu_capacity,
970 components.cpu_frequencies,
971 fdt_address,
972 cmdline
973 .as_str_with_max_len(AARCH64_CMDLINE_MAX_SIZE - 1)
974 .map_err(Error::Cmdline)?,
975 payload.address_range(),
976 initrd,
977 components.android_fstab,
978 irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
979 use_pmu,
980 psci_version,
981 components.swiotlb.map(|size| {
982 (
983 get_swiotlb_addr(components.memory_size, size, vm.get_hypervisor()),
984 size,
985 )
986 }),
987 bat_mmio_base_and_irq,
988 vmwdt_cfg,
989 dump_device_tree_blob,
990 &|writer, phandles| vm.create_fdt(writer, phandles),
991 components.dynamic_power_coefficient,
992 device_tree_overlays,
993 &serial_devices,
994 components.virt_cpufreq_v2,
995 )
996 .map_err(Error::CreateFdt)?;
997
998 vm.init_arch(
999 payload.entry(),
1000 fdt_address,
1001 AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
1002 )
1003 .map_err(Error::InitVmError)?;
1004
1005 let vm_request_tubes = vec![vmwdt_host_tube, vcpufreq_host_tube];
1006
1007 Ok(RunnableLinuxVm {
1008 vm,
1009 vcpu_count,
1010 vcpus: Some(vcpus),
1011 vcpu_init,
1012 vcpu_affinity: components.vcpu_affinity,
1013 no_smt: components.no_smt,
1014 irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1015 io_bus,
1016 mmio_bus,
1017 pid_debug_label_map,
1018 suspend_tube: (suspend_tube_send, suspend_tube_recv),
1019 rt_cpus: components.rt_cpus,
1020 delay_rt: components.delay_rt,
1021 bat_control,
1022 pm: None,
1023 resume_notify_devices: Vec::new(),
1024 root_config: pci_root,
1025 platform_devices,
1026 hotplug_bus: BTreeMap::new(),
1027 devices_thread: None,
1028 vm_request_tubes,
1029 })
1030 }
1031
configure_vcpu<V: Vm>( _vm: &V, _hypervisor: &dyn Hypervisor, _irq_chip: &mut dyn IrqChipAArch64, vcpu: &mut dyn VcpuAArch64, vcpu_init: VcpuInitAArch64, _vcpu_id: usize, _num_cpus: usize, _cpu_config: Option<CpuConfigAArch64>, ) -> std::result::Result<(), Self::Error>1032 fn configure_vcpu<V: Vm>(
1033 _vm: &V,
1034 _hypervisor: &dyn Hypervisor,
1035 _irq_chip: &mut dyn IrqChipAArch64,
1036 vcpu: &mut dyn VcpuAArch64,
1037 vcpu_init: VcpuInitAArch64,
1038 _vcpu_id: usize,
1039 _num_cpus: usize,
1040 _cpu_config: Option<CpuConfigAArch64>,
1041 ) -> std::result::Result<(), Self::Error> {
1042 for (reg, value) in vcpu_init.regs.iter() {
1043 vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
1044 }
1045 Ok(())
1046 }
1047
register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>( _linux: &mut RunnableLinuxVm<V, Vcpu>, _device: Box<dyn PciDevice>, _minijail: Option<Minijail>, _resources: &mut SystemAllocator, _tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>, ) -> std::result::Result<PciAddress, Self::Error>1048 fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
1049 _linux: &mut RunnableLinuxVm<V, Vcpu>,
1050 _device: Box<dyn PciDevice>,
1051 _minijail: Option<Minijail>,
1052 _resources: &mut SystemAllocator,
1053 _tube: &mpsc::Sender<PciRootCommand>,
1054 #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
1055 ) -> std::result::Result<PciAddress, Self::Error> {
1056 // hotplug function isn't verified on AArch64, so set it unsupported here.
1057 Err(Error::Unsupported)
1058 }
1059
get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1060 fn get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1061 Ok(Self::collect_for_each_cpu(base::logical_core_max_freq_khz)
1062 .map_err(Error::CpuFrequencies)?
1063 .into_iter()
1064 .enumerate()
1065 .collect())
1066 }
1067
get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>1068 fn get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>
1069 {
1070 Ok(
1071 Self::collect_for_each_cpu(base::logical_core_frequencies_khz)
1072 .map_err(Error::CpuFrequencies)?
1073 .into_iter()
1074 .enumerate()
1075 .collect(),
1076 )
1077 }
1078
1079 // Returns a (cpu_id -> value) map of the DMIPS/MHz capacities of logical cores
1080 // in the host system.
get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1081 fn get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1082 Ok(Self::collect_for_each_cpu(base::logical_core_capacity)
1083 .map_err(Error::CpuTopology)?
1084 .into_iter()
1085 .enumerate()
1086 .collect())
1087 }
1088
1089 // Creates CPU cluster mask for each CPU in the host system.
get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error>1090 fn get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error> {
1091 let cluster_ids = Self::collect_for_each_cpu(base::logical_core_cluster_id)
1092 .map_err(Error::CpuTopology)?;
1093 let mut unique_clusters: Vec<CpuSet> = cluster_ids
1094 .iter()
1095 .map(|&vcpu_cluster_id| {
1096 cluster_ids
1097 .iter()
1098 .enumerate()
1099 .filter(|(_, &cpu_cluster_id)| vcpu_cluster_id == cpu_cluster_id)
1100 .map(|(cpu_id, _)| cpu_id)
1101 .collect()
1102 })
1103 .collect();
1104 unique_clusters.sort_unstable();
1105 unique_clusters.dedup();
1106 Ok(unique_clusters)
1107 }
1108 }
1109
1110 #[cfg(feature = "gdb")]
1111 impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
1112 type Error = Error;
1113
read_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>1114 fn read_memory(
1115 _vcpu: &T,
1116 guest_mem: &GuestMemory,
1117 vaddr: GuestAddress,
1118 len: usize,
1119 ) -> Result<Vec<u8>> {
1120 let mut buf = vec![0; len];
1121
1122 guest_mem
1123 .read_exact_at_addr(&mut buf, vaddr)
1124 .map_err(Error::ReadGuestMemory)?;
1125
1126 Ok(buf)
1127 }
1128
write_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>1129 fn write_memory(
1130 _vcpu: &T,
1131 guest_mem: &GuestMemory,
1132 vaddr: GuestAddress,
1133 buf: &[u8],
1134 ) -> Result<()> {
1135 guest_mem
1136 .write_all_at_addr(buf, vaddr)
1137 .map_err(Error::WriteGuestMemory)
1138 }
1139
read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers>1140 fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
1141 let mut regs: <GdbArch as Arch>::Registers = Default::default();
1142 assert!(
1143 regs.x.len() == 31,
1144 "unexpected number of Xn general purpose registers"
1145 );
1146 for (i, reg) in regs.x.iter_mut().enumerate() {
1147 let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1148 *reg = vcpu
1149 .get_one_reg(VcpuRegAArch64::X(n))
1150 .map_err(Error::ReadReg)?;
1151 }
1152 regs.sp = vcpu
1153 .get_one_reg(VcpuRegAArch64::Sp)
1154 .map_err(Error::ReadReg)?;
1155 regs.pc = vcpu
1156 .get_one_reg(VcpuRegAArch64::Pc)
1157 .map_err(Error::ReadReg)?;
1158 // hypervisor API gives a 64-bit value for Pstate, but GDB wants a 32-bit "CPSR".
1159 regs.cpsr = vcpu
1160 .get_one_reg(VcpuRegAArch64::Pstate)
1161 .map_err(Error::ReadReg)? as u32;
1162 for (i, reg) in regs.v.iter_mut().enumerate() {
1163 let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1164 *reg = vcpu.get_vector_reg(n).map_err(Error::ReadReg)?;
1165 }
1166 regs.fpcr = vcpu
1167 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPCR))
1168 .map_err(Error::ReadReg)? as u32;
1169 regs.fpsr = vcpu
1170 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPSR))
1171 .map_err(Error::ReadReg)? as u32;
1172
1173 Ok(regs)
1174 }
1175
write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()>1176 fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
1177 assert!(
1178 regs.x.len() == 31,
1179 "unexpected number of Xn general purpose registers"
1180 );
1181 for (i, reg) in regs.x.iter().enumerate() {
1182 let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1183 vcpu.set_one_reg(VcpuRegAArch64::X(n), *reg)
1184 .map_err(Error::WriteReg)?;
1185 }
1186 vcpu.set_one_reg(VcpuRegAArch64::Sp, regs.sp)
1187 .map_err(Error::WriteReg)?;
1188 vcpu.set_one_reg(VcpuRegAArch64::Pc, regs.pc)
1189 .map_err(Error::WriteReg)?;
1190 // GDB gives a 32-bit value for "CPSR", but hypervisor API wants a 64-bit Pstate.
1191 let pstate = vcpu
1192 .get_one_reg(VcpuRegAArch64::Pstate)
1193 .map_err(Error::ReadReg)?;
1194 let pstate = (pstate & 0xffff_ffff_0000_0000) | (regs.cpsr as u64);
1195 vcpu.set_one_reg(VcpuRegAArch64::Pstate, pstate)
1196 .map_err(Error::WriteReg)?;
1197 for (i, reg) in regs.v.iter().enumerate() {
1198 let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1199 vcpu.set_vector_reg(n, *reg).map_err(Error::WriteReg)?;
1200 }
1201 vcpu.set_one_reg(
1202 VcpuRegAArch64::System(AArch64SysRegId::FPCR),
1203 u64::from(regs.fpcr),
1204 )
1205 .map_err(Error::WriteReg)?;
1206 vcpu.set_one_reg(
1207 VcpuRegAArch64::System(AArch64SysRegId::FPSR),
1208 u64::from(regs.fpsr),
1209 )
1210 .map_err(Error::WriteReg)?;
1211
1212 Ok(())
1213 }
1214
read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>>1215 fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
1216 let result = match reg_id {
1217 AArch64RegId::X(n) => vcpu
1218 .get_one_reg(VcpuRegAArch64::X(n))
1219 .map(|v| v.to_ne_bytes().to_vec()),
1220 AArch64RegId::Sp => vcpu
1221 .get_one_reg(VcpuRegAArch64::Sp)
1222 .map(|v| v.to_ne_bytes().to_vec()),
1223 AArch64RegId::Pc => vcpu
1224 .get_one_reg(VcpuRegAArch64::Pc)
1225 .map(|v| v.to_ne_bytes().to_vec()),
1226 AArch64RegId::Pstate => vcpu
1227 .get_one_reg(VcpuRegAArch64::Pstate)
1228 .map(|v| (v as u32).to_ne_bytes().to_vec()),
1229 AArch64RegId::V(n) => vcpu.get_vector_reg(n).map(|v| v.to_ne_bytes().to_vec()),
1230 AArch64RegId::System(op) => vcpu
1231 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)))
1232 .map(|v| v.to_ne_bytes().to_vec()),
1233 _ => {
1234 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1235 Err(base::Error::new(libc::EINVAL))
1236 }
1237 };
1238
1239 match result {
1240 Ok(bytes) => Ok(bytes),
1241 // ENOENT is returned when KVM is aware of the register but it is unavailable
1242 Err(e) if e.errno() == libc::ENOENT => Ok(Vec::new()),
1243 Err(e) => Err(Error::ReadReg(e)),
1244 }
1245 }
1246
write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()>1247 fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
1248 fn try_into_u32(data: &[u8]) -> Result<u32> {
1249 let s = data
1250 .get(..4)
1251 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1252 let a = s
1253 .try_into()
1254 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1255 Ok(u32::from_ne_bytes(a))
1256 }
1257
1258 fn try_into_u64(data: &[u8]) -> Result<u64> {
1259 let s = data
1260 .get(..8)
1261 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1262 let a = s
1263 .try_into()
1264 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1265 Ok(u64::from_ne_bytes(a))
1266 }
1267
1268 fn try_into_u128(data: &[u8]) -> Result<u128> {
1269 let s = data
1270 .get(..16)
1271 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1272 let a = s
1273 .try_into()
1274 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1275 Ok(u128::from_ne_bytes(a))
1276 }
1277
1278 match reg_id {
1279 AArch64RegId::X(n) => vcpu.set_one_reg(VcpuRegAArch64::X(n), try_into_u64(data)?),
1280 AArch64RegId::Sp => vcpu.set_one_reg(VcpuRegAArch64::Sp, try_into_u64(data)?),
1281 AArch64RegId::Pc => vcpu.set_one_reg(VcpuRegAArch64::Pc, try_into_u64(data)?),
1282 AArch64RegId::Pstate => {
1283 vcpu.set_one_reg(VcpuRegAArch64::Pstate, u64::from(try_into_u32(data)?))
1284 }
1285 AArch64RegId::V(n) => vcpu.set_vector_reg(n, try_into_u128(data)?),
1286 AArch64RegId::System(op) => vcpu.set_one_reg(
1287 VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)),
1288 try_into_u64(data)?,
1289 ),
1290 _ => {
1291 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1292 Err(base::Error::new(libc::EINVAL))
1293 }
1294 }
1295 .map_err(Error::WriteReg)
1296 }
1297
enable_singlestep(vcpu: &T) -> Result<()>1298 fn enable_singlestep(vcpu: &T) -> Result<()> {
1299 const SINGLE_STEP: bool = true;
1300 vcpu.set_guest_debug(&[], SINGLE_STEP)
1301 .map_err(Error::EnableSinglestep)
1302 }
1303
get_max_hw_breakpoints(vcpu: &T) -> Result<usize>1304 fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
1305 vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
1306 }
1307
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()>1308 fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
1309 const SINGLE_STEP: bool = false;
1310 vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
1311 .map_err(Error::SetHwBreakpoint)
1312 }
1313 }
1314
1315 impl AArch64 {
1316 /// This returns a base part of the kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline1317 fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1318 let mut cmdline = kernel_cmdline::Cmdline::new();
1319 cmdline.insert_str("panic=-1").unwrap();
1320 cmdline
1321 }
1322
1323 /// This adds any early platform devices for this architecture.
1324 ///
1325 /// # Arguments
1326 ///
1327 /// * `irq_chip` - The IRQ chip to add irqs to.
1328 /// * `bus` - The bus to add devices to.
1329 /// * `vcpu_count` - The number of virtual CPUs for this guest VM
1330 /// * `vm_evt_wrtube` - The notification channel
add_arch_devs( irq_chip: &mut dyn IrqChip, bus: &Bus, vcpu_count: usize, vm_evt_wrtube: &SendTube, vmwdt_request_tube: Tube, ) -> Result<()>1331 fn add_arch_devs(
1332 irq_chip: &mut dyn IrqChip,
1333 bus: &Bus,
1334 vcpu_count: usize,
1335 vm_evt_wrtube: &SendTube,
1336 vmwdt_request_tube: Tube,
1337 ) -> Result<()> {
1338 let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1339 let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
1340 irq_chip
1341 .register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
1342 .map_err(Error::RegisterIrqfd)?;
1343
1344 bus.insert(
1345 Arc::new(Mutex::new(rtc)),
1346 AARCH64_RTC_ADDR,
1347 AARCH64_RTC_SIZE,
1348 )
1349 .expect("failed to add rtc device");
1350
1351 let vmwdt_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1352 let vm_wdt = devices::vmwdt::Vmwdt::new(
1353 vcpu_count,
1354 vm_evt_wrtube.try_clone().unwrap(),
1355 vmwdt_evt.try_clone().map_err(Error::CloneEvent)?,
1356 vmwdt_request_tube,
1357 )
1358 .map_err(Error::CreateVmwdtDevice)?;
1359 irq_chip
1360 .register_edge_irq_event(
1361 AARCH64_VMWDT_IRQ,
1362 &vmwdt_evt,
1363 IrqEventSource::from_device(&vm_wdt),
1364 )
1365 .map_err(Error::RegisterIrqfd)?;
1366
1367 bus.insert(
1368 Arc::new(Mutex::new(vm_wdt)),
1369 AARCH64_VMWDT_ADDR,
1370 AARCH64_VMWDT_SIZE,
1371 )
1372 .expect("failed to add vmwdt device");
1373
1374 Ok(())
1375 }
1376
1377 /// Get ARM-specific features for vcpu with index `vcpu_id`.
1378 ///
1379 /// # Arguments
1380 ///
1381 /// * `vcpu_id` - The VM's index for `vcpu`.
1382 /// * `use_pmu` - Should `vcpu` be configured to use the Performance Monitor Unit.
vcpu_features( vcpu_id: usize, use_pmu: bool, boot_cpu: usize, sve: SveConfig, ) -> Vec<VcpuFeature>1383 fn vcpu_features(
1384 vcpu_id: usize,
1385 use_pmu: bool,
1386 boot_cpu: usize,
1387 sve: SveConfig,
1388 ) -> Vec<VcpuFeature> {
1389 let mut features = vec![VcpuFeature::PsciV0_2];
1390 if use_pmu {
1391 features.push(VcpuFeature::PmuV3);
1392 }
1393 // Non-boot cpus are powered off initially
1394 if vcpu_id != boot_cpu {
1395 features.push(VcpuFeature::PowerOff);
1396 }
1397 if sve.enable {
1398 features.push(VcpuFeature::Sve);
1399 }
1400
1401 features
1402 }
1403
1404 /// Get initial register state for vcpu with index `vcpu_id`.
1405 ///
1406 /// # Arguments
1407 ///
1408 /// * `vcpu_id` - The VM's index for `vcpu`.
vcpu_init( vcpu_id: usize, payload: &PayloadType, fdt_address: GuestAddress, protection_type: ProtectionType, boot_cpu: usize, ) -> VcpuInitAArch641409 fn vcpu_init(
1410 vcpu_id: usize,
1411 payload: &PayloadType,
1412 fdt_address: GuestAddress,
1413 protection_type: ProtectionType,
1414 boot_cpu: usize,
1415 ) -> VcpuInitAArch64 {
1416 let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
1417
1418 // All interrupts masked
1419 let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
1420 regs.insert(VcpuRegAArch64::Pstate, pstate);
1421
1422 // Other cpus are powered off initially
1423 if vcpu_id == boot_cpu {
1424 let entry_addr = if protection_type.needs_firmware_loaded() {
1425 Some(AARCH64_PROTECTED_VM_FW_START)
1426 } else if protection_type.runs_firmware() {
1427 None // Initial PC value is set by the hypervisor
1428 } else {
1429 Some(payload.entry().offset())
1430 };
1431
1432 /* PC -- entry point */
1433 if let Some(entry) = entry_addr {
1434 regs.insert(VcpuRegAArch64::Pc, entry);
1435 }
1436
1437 /* X0 -- fdt address */
1438 regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
1439
1440 if protection_type.runs_firmware() {
1441 /* X1 -- payload entry point */
1442 regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
1443
1444 /* X2 -- image size */
1445 regs.insert(VcpuRegAArch64::X(2), payload.size());
1446 }
1447 }
1448
1449 VcpuInitAArch64 { regs }
1450 }
1451
collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error> where F: Fn(usize) -> std::result::Result<T, base::Error>,1452 fn collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error>
1453 where
1454 F: Fn(usize) -> std::result::Result<T, base::Error>,
1455 {
1456 (0..base::number_of_logical_cores()?).map(func).collect()
1457 }
1458 }
1459
1460 #[cfg(test)]
1461 mod tests {
1462 use super::*;
1463
1464 #[test]
vcpu_init_unprotected_kernel()1465 fn vcpu_init_unprotected_kernel() {
1466 let payload = PayloadType::Kernel(LoadedKernel {
1467 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1468 size: 0x1000,
1469 entry: GuestAddress(0x8080_0000),
1470 });
1471 assert_eq!(
1472 payload.address_range(),
1473 AddressRange {
1474 start: 0x8080_0000,
1475 end: 0x8080_0fff
1476 }
1477 );
1478 let fdt_address = GuestAddress(0x1234);
1479 let prot = ProtectionType::Unprotected;
1480
1481 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1482
1483 // PC: kernel image entry point
1484 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
1485
1486 // X0: fdt_offset
1487 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1488 }
1489
1490 #[test]
vcpu_init_unprotected_bios()1491 fn vcpu_init_unprotected_bios() {
1492 let payload = PayloadType::Bios {
1493 entry: GuestAddress(0x8020_0000),
1494 image_size: 0x1000,
1495 };
1496 assert_eq!(
1497 payload.address_range(),
1498 AddressRange {
1499 start: 0x8020_0000,
1500 end: 0x8020_0fff
1501 }
1502 );
1503 let fdt_address = GuestAddress(0x1234);
1504 let prot = ProtectionType::Unprotected;
1505
1506 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1507
1508 // PC: bios image entry point
1509 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
1510
1511 // X0: fdt_offset
1512 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1513 }
1514
1515 #[test]
vcpu_init_protected_kernel()1516 fn vcpu_init_protected_kernel() {
1517 let payload = PayloadType::Kernel(LoadedKernel {
1518 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1519 size: 0x1000,
1520 entry: GuestAddress(0x8080_0000),
1521 });
1522 assert_eq!(
1523 payload.address_range(),
1524 AddressRange {
1525 start: 0x8080_0000,
1526 end: 0x8080_0fff
1527 }
1528 );
1529 let fdt_address = GuestAddress(0x1234);
1530 let prot = ProtectionType::Protected;
1531
1532 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1533
1534 // The hypervisor provides the initial value of PC, so PC should not be present in the
1535 // vcpu_init register map.
1536 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
1537
1538 // X0: fdt_offset
1539 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1540
1541 // X1: kernel image entry point
1542 assert_eq!(
1543 vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
1544 Some(&0x8080_0000)
1545 );
1546
1547 // X2: image size
1548 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
1549 }
1550 }
1551