• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! ARM 64-bit architecture support.
6 
7 #![cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8 
9 use std::collections::BTreeMap;
10 use std::fs::File;
11 use std::io;
12 use std::path::PathBuf;
13 use std::sync::atomic::AtomicU32;
14 use std::sync::mpsc;
15 use std::sync::Arc;
16 
17 use arch::get_serial_cmdline;
18 use arch::CpuSet;
19 use arch::DtbOverlay;
20 use arch::FdtPosition;
21 use arch::GetSerialCmdlineError;
22 use arch::MemoryRegionConfig;
23 use arch::RunnableLinuxVm;
24 use arch::SveConfig;
25 use arch::VcpuAffinity;
26 use arch::VmComponents;
27 use arch::VmImage;
28 use base::MemoryMappingBuilder;
29 use base::SendTube;
30 use base::Tube;
31 use devices::serial_device::SerialHardware;
32 use devices::serial_device::SerialParameters;
33 use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
34 use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
35 use devices::Bus;
36 use devices::BusDeviceObj;
37 use devices::BusError;
38 use devices::BusType;
39 use devices::IrqChip;
40 use devices::IrqChipAArch64;
41 use devices::IrqEventSource;
42 use devices::PciAddress;
43 use devices::PciConfigMmio;
44 use devices::PciDevice;
45 use devices::PciRootCommand;
46 use devices::Serial;
47 #[cfg(any(target_os = "android", target_os = "linux"))]
48 use devices::VirtCpufreq;
49 #[cfg(any(target_os = "android", target_os = "linux"))]
50 use devices::VirtCpufreqV2;
51 #[cfg(feature = "gdb")]
52 use gdbstub::arch::Arch;
53 #[cfg(feature = "gdb")]
54 use gdbstub_arch::aarch64::reg::id::AArch64RegId;
55 #[cfg(feature = "gdb")]
56 use gdbstub_arch::aarch64::AArch64 as GdbArch;
57 #[cfg(feature = "gdb")]
58 use hypervisor::AArch64SysRegId;
59 use hypervisor::CpuConfigAArch64;
60 use hypervisor::DeviceKind;
61 use hypervisor::Hypervisor;
62 use hypervisor::HypervisorCap;
63 use hypervisor::MemCacheType;
64 use hypervisor::ProtectionType;
65 use hypervisor::VcpuAArch64;
66 use hypervisor::VcpuFeature;
67 use hypervisor::VcpuInitAArch64;
68 use hypervisor::VcpuRegAArch64;
69 use hypervisor::Vm;
70 use hypervisor::VmAArch64;
71 use hypervisor::VmCap;
72 #[cfg(windows)]
73 use jail::FakeMinijailStub as Minijail;
74 use kernel_loader::LoadedKernel;
75 #[cfg(any(target_os = "android", target_os = "linux"))]
76 use minijail::Minijail;
77 use remain::sorted;
78 use resources::address_allocator::AddressAllocator;
79 use resources::AddressRange;
80 use resources::MmioType;
81 use resources::SystemAllocator;
82 use resources::SystemAllocatorConfig;
83 use sync::Condvar;
84 use sync::Mutex;
85 use thiserror::Error;
86 use vm_control::BatControl;
87 use vm_control::BatteryType;
88 use vm_memory::GuestAddress;
89 use vm_memory::GuestMemory;
90 use vm_memory::GuestMemoryError;
91 use vm_memory::MemoryRegionOptions;
92 use vm_memory::MemoryRegionPurpose;
93 
94 mod fdt;
95 
96 const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
97 const AARCH64_FDT_ALIGN: u64 = 0x200000;
98 const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
99 
100 // Maximum Linux arm64 kernel command line size (arch/arm64/include/uapi/asm/setup.h).
101 const AARCH64_CMDLINE_MAX_SIZE: usize = 2048;
102 
103 // These constants indicate the address space used by the ARM vGIC.
104 const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
105 const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
106 
107 // This indicates the start of DRAM inside the physical address space.
108 const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
109 const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x800000;
110 
111 const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
112 const AARCH64_PROTECTED_VM_FW_START: u64 =
113     AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
114 
115 const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
116 const AARCH64_PVTIME_IPA_START: u64 = 0x1ff0000;
117 const AARCH64_PVTIME_SIZE: u64 = 64;
118 
119 // These constants indicate the placement of the GIC registers in the physical
120 // address space.
121 const AARCH64_GIC_DIST_BASE: u64 = 0x40000000 - AARCH64_GIC_DIST_SIZE;
122 const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
123 const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
124 
125 // PSR (Processor State Register) bits
126 const PSR_MODE_EL1H: u64 = 0x00000005;
127 const PSR_F_BIT: u64 = 0x00000040;
128 const PSR_I_BIT: u64 = 0x00000080;
129 const PSR_A_BIT: u64 = 0x00000100;
130 const PSR_D_BIT: u64 = 0x00000200;
131 
132 // This was the speed kvmtool used, not sure if it matters.
133 const AARCH64_SERIAL_SPEED: u32 = 1843200;
134 // The serial device gets the first interrupt line
135 // Which gets mapped to the first SPI interrupt (physical 32).
136 const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
137 const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
138 
139 // Place the RTC device at page 2
140 const AARCH64_RTC_ADDR: u64 = 0x2000;
141 // The RTC device gets one 4k page
142 const AARCH64_RTC_SIZE: u64 = 0x1000;
143 // The RTC device gets the second interrupt line
144 const AARCH64_RTC_IRQ: u32 = 1;
145 
146 // The Goldfish battery device gets the 3rd interrupt line
147 const AARCH64_BAT_IRQ: u32 = 3;
148 
149 // Place the virtual watchdog device at page 3
150 const AARCH64_VMWDT_ADDR: u64 = 0x3000;
151 // The virtual watchdog device gets one 4k page
152 const AARCH64_VMWDT_SIZE: u64 = 0x1000;
153 
154 // Default PCI MMIO configuration region base address.
155 const AARCH64_PCI_CAM_BASE_DEFAULT: u64 = 0x10000;
156 // Default PCI MMIO configuration region size.
157 const AARCH64_PCI_CAM_SIZE_DEFAULT: u64 = 0x1000000;
158 // Default PCI mem base address.
159 const AARCH64_PCI_MEM_BASE_DEFAULT: u64 = 0x2000000;
160 // Default PCI mem size.
161 const AARCH64_PCI_MEM_SIZE_DEFAULT: u64 = 0x2000000;
162 // Virtio devices start at SPI interrupt number 4
163 const AARCH64_IRQ_BASE: u32 = 4;
164 
165 // Virtual CPU Frequency Device.
166 const AARCH64_VIRTFREQ_BASE: u64 = 0x1040000;
167 const AARCH64_VIRTFREQ_SIZE: u64 = 0x8;
168 const AARCH64_VIRTFREQ_MAXSIZE: u64 = 0x10000;
169 const AARCH64_VIRTFREQ_V2_SIZE: u64 = 0x1000;
170 
171 // PMU PPI interrupt, same as qemu
172 const AARCH64_PMU_IRQ: u32 = 7;
173 
174 // VCPU stall detector interrupt
175 const AARCH64_VMWDT_IRQ: u32 = 15;
176 
177 enum PayloadType {
178     Bios {
179         entry: GuestAddress,
180         image_size: u64,
181     },
182     Kernel(LoadedKernel),
183 }
184 
185 impl PayloadType {
entry(&self) -> GuestAddress186     fn entry(&self) -> GuestAddress {
187         match self {
188             Self::Bios {
189                 entry,
190                 image_size: _,
191             } => *entry,
192             Self::Kernel(k) => k.entry,
193         }
194     }
195 
size(&self) -> u64196     fn size(&self) -> u64 {
197         match self {
198             Self::Bios {
199                 entry: _,
200                 image_size,
201             } => *image_size,
202             Self::Kernel(k) => k.size,
203         }
204     }
205 
address_range(&self) -> AddressRange206     fn address_range(&self) -> AddressRange {
207         match self {
208             Self::Bios { entry, image_size } => {
209                 AddressRange::from_start_and_size(entry.offset(), *image_size)
210                     .expect("invalid BIOS address range")
211             }
212             Self::Kernel(k) => {
213                 // TODO: b/389759119: use `k.address_range` to include regions that are present in
214                 // memory but not in the original image file (e.g. `.bss` section).
215                 AddressRange::from_start_and_size(k.entry.offset(), k.size)
216                     .expect("invalid kernel address range")
217             }
218         }
219     }
220 }
221 
222 // When static swiotlb allocation is required, returns the address it should be allocated at.
223 // Otherwise, returns None.
get_swiotlb_addr( memory_size: u64, swiotlb_size: u64, hypervisor: &(impl Hypervisor + ?Sized), ) -> Option<GuestAddress>224 fn get_swiotlb_addr(
225     memory_size: u64,
226     swiotlb_size: u64,
227     hypervisor: &(impl Hypervisor + ?Sized),
228 ) -> Option<GuestAddress> {
229     if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
230         Some(GuestAddress(
231             AARCH64_PHYS_MEM_START + memory_size - swiotlb_size,
232         ))
233     } else {
234         None
235     }
236 }
237 
238 #[sorted]
239 #[derive(Error, Debug)]
240 pub enum Error {
241     #[error("failed to allocate IRQ number")]
242     AllocateIrq,
243     #[error("bios could not be loaded: {0}")]
244     BiosLoadFailure(arch::LoadImageError),
245     #[error("failed to build arm pvtime memory: {0}")]
246     BuildPvtimeError(base::MmapError),
247     #[error("unable to clone an Event: {0}")]
248     CloneEvent(base::Error),
249     #[error("failed to clone IRQ chip: {0}")]
250     CloneIrqChip(base::Error),
251     #[error("the given kernel command line was invalid: {0}")]
252     Cmdline(kernel_cmdline::Error),
253     #[error("bad PCI CAM configuration: {0}")]
254     ConfigurePciCam(String),
255     #[error("bad PCI mem configuration: {0}")]
256     ConfigurePciMem(String),
257     #[error("failed to configure CPU Frequencies: {0}")]
258     CpuFrequencies(base::Error),
259     #[error("failed to configure CPU topology: {0}")]
260     CpuTopology(base::Error),
261     #[error("unable to create battery devices: {0}")]
262     CreateBatDevices(arch::DeviceRegistrationError),
263     #[error("unable to make an Event: {0}")]
264     CreateEvent(base::Error),
265     #[error("FDT could not be created: {0}")]
266     CreateFdt(cros_fdt::Error),
267     #[error("failed to create GIC: {0}")]
268     CreateGICFailure(base::Error),
269     #[error("failed to create a PCI root hub: {0}")]
270     CreatePciRoot(arch::DeviceRegistrationError),
271     #[error("failed to create platform bus: {0}")]
272     CreatePlatformBus(arch::DeviceRegistrationError),
273     #[error("unable to create serial devices: {0}")]
274     CreateSerialDevices(arch::DeviceRegistrationError),
275     #[error("failed to create socket: {0}")]
276     CreateSocket(io::Error),
277     #[error("failed to create tube: {0}")]
278     CreateTube(base::TubeError),
279     #[error("failed to create VCPU: {0}")]
280     CreateVcpu(base::Error),
281     #[error("unable to create vm watchdog timer device: {0}")]
282     CreateVmwdtDevice(anyhow::Error),
283     #[error("custom pVM firmware could not be loaded: {0}")]
284     CustomPvmFwLoadFailure(arch::LoadImageError),
285     #[error("vm created wrong kind of vcpu")]
286     DowncastVcpu,
287     #[error("failed to enable singlestep execution: {0}")]
288     EnableSinglestep(base::Error),
289     #[error("failed to finalize IRQ chip: {0}")]
290     FinalizeIrqChip(base::Error),
291     #[error("failed to get HW breakpoint count: {0}")]
292     GetMaxHwBreakPoint(base::Error),
293     #[error("failed to get PSCI version: {0}")]
294     GetPsciVersion(base::Error),
295     #[error("failed to get serial cmdline: {0}")]
296     GetSerialCmdline(GetSerialCmdlineError),
297     #[error("failed to initialize arm pvtime: {0}")]
298     InitPvtimeError(base::Error),
299     #[error("initrd could not be loaded: {0}")]
300     InitrdLoadFailure(arch::LoadImageError),
301     #[error("failed to initialize virtual machine {0}")]
302     InitVmError(base::Error),
303     #[error("kernel could not be loaded: {0}")]
304     KernelLoadFailure(kernel_loader::Error),
305     #[error("error loading Kernel from Elf image: {0}")]
306     LoadElfKernel(kernel_loader::Error),
307     #[error("failed to map arm pvtime memory: {0}")]
308     MapPvtimeError(base::Error),
309     #[error("pVM firmware could not be loaded: {0}")]
310     PvmFwLoadFailure(base::Error),
311     #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
312     RamoopsAddress(u64, u64),
313     #[error("error reading guest memory: {0}")]
314     ReadGuestMemory(vm_memory::GuestMemoryError),
315     #[error("error reading CPU register: {0}")]
316     ReadReg(base::Error),
317     #[error("error reading CPU registers: {0}")]
318     ReadRegs(base::Error),
319     #[error("failed to register irq fd: {0}")]
320     RegisterIrqfd(base::Error),
321     #[error("error registering PCI bus: {0}")]
322     RegisterPci(BusError),
323     #[error("error registering virtual cpufreq device: {0}")]
324     RegisterVirtCpufreq(BusError),
325     #[error("error registering virtual socket device: {0}")]
326     RegisterVsock(arch::DeviceRegistrationError),
327     #[error("failed to set device attr: {0}")]
328     SetDeviceAttr(base::Error),
329     #[error("failed to set a hardware breakpoint: {0}")]
330     SetHwBreakpoint(base::Error),
331     #[error("failed to set register: {0}")]
332     SetReg(base::Error),
333     #[error("failed to set up guest memory: {0}")]
334     SetupGuestMemory(GuestMemoryError),
335     #[error("this function isn't supported")]
336     Unsupported,
337     #[error("failed to initialize VCPU: {0}")]
338     VcpuInit(base::Error),
339     #[error("error writing guest memory: {0}")]
340     WriteGuestMemory(GuestMemoryError),
341     #[error("error writing CPU register: {0}")]
342     WriteReg(base::Error),
343     #[error("error writing CPU registers: {0}")]
344     WriteRegs(base::Error),
345 }
346 
347 pub type Result<T> = std::result::Result<T, Error>;
348 
load_kernel( guest_mem: &GuestMemory, kernel_start: GuestAddress, mut kernel_image: &mut File, ) -> Result<LoadedKernel>349 fn load_kernel(
350     guest_mem: &GuestMemory,
351     kernel_start: GuestAddress,
352     mut kernel_image: &mut File,
353 ) -> Result<LoadedKernel> {
354     if let Ok(elf_kernel) = kernel_loader::load_elf(
355         guest_mem,
356         kernel_start,
357         &mut kernel_image,
358         AARCH64_PHYS_MEM_START,
359     ) {
360         return Ok(elf_kernel);
361     }
362 
363     if let Ok(lz4_kernel) =
364         kernel_loader::load_arm64_kernel_lz4(guest_mem, kernel_start, &mut kernel_image)
365     {
366         return Ok(lz4_kernel);
367     }
368 
369     kernel_loader::load_arm64_kernel(guest_mem, kernel_start, kernel_image)
370         .map_err(Error::KernelLoadFailure)
371 }
372 
373 pub struct AArch64;
374 
get_block_size() -> u64375 fn get_block_size() -> u64 {
376     let page_size = base::pagesize();
377     // Each PTE entry being 8 bytes long, we can fit in one page (page_size / 8)
378     // entries.
379     let ptes_per_page = page_size / 8;
380     let block_size = page_size * ptes_per_page;
381 
382     block_size as u64
383 }
384 
get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64>385 fn get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64> {
386     const MPIDR_AFF_MASK: u64 = 0xff_00ff_ffff;
387 
388     Some(vcpus.get(index)?.get_mpidr().ok()? & MPIDR_AFF_MASK)
389 }
390 
main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64391 fn main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64 {
392     // Static swiotlb is allocated from the end of RAM as a separate memory region, so, if
393     // enabled, make the RAM memory region smaller to leave room for it.
394     let mut main_memory_size = components.memory_size;
395     if let Some(size) = components.swiotlb {
396         if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
397             main_memory_size -= size;
398         }
399     }
400     main_memory_size
401 }
402 
403 pub struct ArchMemoryLayout {
404     pci_cam: AddressRange,
405     pci_mem: AddressRange,
406 }
407 
408 impl arch::LinuxArch for AArch64 {
409     type Error = Error;
410     type ArchMemoryLayout = ArchMemoryLayout;
411 
arch_memory_layout( components: &VmComponents, ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>412     fn arch_memory_layout(
413         components: &VmComponents,
414     ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
415         let (pci_cam_start, pci_cam_size) = match components.pci_config.cam {
416             Some(MemoryRegionConfig { start, size }) => {
417                 (start, size.unwrap_or(AARCH64_PCI_CAM_SIZE_DEFAULT))
418             }
419             None => (AARCH64_PCI_CAM_BASE_DEFAULT, AARCH64_PCI_CAM_SIZE_DEFAULT),
420         };
421         // TODO: Make the PCI slot allocator aware of the CAM size so we can remove this check.
422         if pci_cam_size != AARCH64_PCI_CAM_SIZE_DEFAULT {
423             return Err(Error::ConfigurePciCam(format!(
424                 "PCI CAM size must be {AARCH64_PCI_CAM_SIZE_DEFAULT:#x}, got {pci_cam_size:#x}"
425             )));
426         }
427         let pci_cam = AddressRange::from_start_and_size(pci_cam_start, pci_cam_size).ok_or(
428             Error::ConfigurePciCam("PCI CAM region overflowed".to_string()),
429         )?;
430         if pci_cam.end >= AARCH64_PHYS_MEM_START {
431             return Err(Error::ConfigurePciCam(format!(
432                 "PCI CAM ({pci_cam:?}) must be before start of RAM ({AARCH64_PHYS_MEM_START:#x})"
433             )));
434         }
435 
436         let pci_mem = match components.pci_config.mem {
437             Some(MemoryRegionConfig { start, size }) => AddressRange::from_start_and_size(
438                 start,
439                 size.unwrap_or(AARCH64_PCI_MEM_SIZE_DEFAULT),
440             )
441             .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
442             None => AddressRange::from_start_and_size(
443                 AARCH64_PCI_MEM_BASE_DEFAULT,
444                 AARCH64_PCI_MEM_SIZE_DEFAULT,
445             )
446             .unwrap(),
447         };
448 
449         Ok(ArchMemoryLayout { pci_cam, pci_mem })
450     }
451 
452     /// Returns a Vec of the valid memory addresses.
453     /// These should be used to configure the GuestMemory structure for the platform.
guest_memory_layout( components: &VmComponents, _arch_memory_layout: &Self::ArchMemoryLayout, hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>454     fn guest_memory_layout(
455         components: &VmComponents,
456         _arch_memory_layout: &Self::ArchMemoryLayout,
457         hypervisor: &impl Hypervisor,
458     ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
459         let main_memory_size = main_memory_size(components, hypervisor);
460 
461         let mut memory_regions = vec![(
462             GuestAddress(AARCH64_PHYS_MEM_START),
463             main_memory_size,
464             MemoryRegionOptions::new().align(get_block_size()),
465         )];
466 
467         // Allocate memory for the pVM firmware.
468         if components.hv_cfg.protection_type.runs_firmware() {
469             memory_regions.push((
470                 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
471                 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
472                 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
473             ));
474         }
475 
476         if let Some(size) = components.swiotlb {
477             if let Some(addr) = get_swiotlb_addr(components.memory_size, size, hypervisor) {
478                 memory_regions.push((
479                     addr,
480                     size,
481                     MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
482                 ));
483             }
484         }
485 
486         Ok(memory_regions)
487     }
488 
get_system_allocator_config<V: Vm>( vm: &V, arch_memory_layout: &Self::ArchMemoryLayout, ) -> SystemAllocatorConfig489     fn get_system_allocator_config<V: Vm>(
490         vm: &V,
491         arch_memory_layout: &Self::ArchMemoryLayout,
492     ) -> SystemAllocatorConfig {
493         let guest_phys_end = 1u64 << vm.get_guest_phys_addr_bits();
494         // The platform MMIO region is immediately past the end of RAM.
495         let plat_mmio_base = vm.get_memory().end_addr().offset();
496         let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
497         // The high MMIO region is the rest of the address space after the platform MMIO region.
498         let high_mmio_base = plat_mmio_base + plat_mmio_size;
499         let high_mmio_size = guest_phys_end
500             .checked_sub(high_mmio_base)
501             .unwrap_or_else(|| {
502                 panic!(
503                     "guest_phys_end {:#x} < high_mmio_base {:#x}",
504                     guest_phys_end, high_mmio_base,
505                 );
506             });
507         SystemAllocatorConfig {
508             io: None,
509             low_mmio: arch_memory_layout.pci_mem,
510             high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
511                 .expect("invalid high mmio region"),
512             platform_mmio: Some(
513                 AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
514                     .expect("invalid platform mmio region"),
515             ),
516             first_irq: AARCH64_IRQ_BASE,
517         }
518     }
519 
build_vm<V, Vcpu>( mut components: VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, _vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipAArch64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, _debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, device_tree_overlays: Vec<DtbOverlay>, fdt_position: Option<FdtPosition>, no_pmu: bool, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmAArch64, Vcpu: VcpuAArch64,520     fn build_vm<V, Vcpu>(
521         mut components: VmComponents,
522         arch_memory_layout: &Self::ArchMemoryLayout,
523         _vm_evt_wrtube: &SendTube,
524         system_allocator: &mut SystemAllocator,
525         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
526         serial_jail: Option<Minijail>,
527         (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
528         mut vm: V,
529         ramoops_region: Option<arch::pstore::RamoopsRegion>,
530         devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
531         irq_chip: &mut dyn IrqChipAArch64,
532         vcpu_ids: &mut Vec<usize>,
533         dump_device_tree_blob: Option<PathBuf>,
534         _debugcon_jail: Option<Minijail>,
535         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
536         _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
537         device_tree_overlays: Vec<DtbOverlay>,
538         fdt_position: Option<FdtPosition>,
539         no_pmu: bool,
540     ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
541     where
542         V: VmAArch64,
543         Vcpu: VcpuAArch64,
544     {
545         let has_bios = matches!(components.vm_image, VmImage::Bios(_));
546         let mem = vm.get_memory().clone();
547 
548         let main_memory_size = main_memory_size(&components, vm.get_hypervisor());
549 
550         let fdt_position = fdt_position.unwrap_or(if has_bios {
551             FdtPosition::Start
552         } else {
553             FdtPosition::End
554         });
555         let payload_address = match fdt_position {
556             // If FDT is at the start RAM, the payload needs to go somewhere after it.
557             FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_MAX_SIZE),
558             // Otherwise, put the payload at the start of RAM.
559             FdtPosition::End | FdtPosition::AfterPayload => GuestAddress(AARCH64_PHYS_MEM_START),
560         };
561 
562         // separate out image loading from other setup to get a specific error for
563         // image loading
564         let mut initrd = None;
565         let (payload, payload_end_address) = match components.vm_image {
566             VmImage::Bios(ref mut bios) => {
567                 let image_size = arch::load_image(&mem, bios, payload_address, u64::MAX)
568                     .map_err(Error::BiosLoadFailure)?;
569                 (
570                     PayloadType::Bios {
571                         entry: payload_address,
572                         image_size: image_size as u64,
573                     },
574                     payload_address
575                         .checked_add(image_size.try_into().unwrap())
576                         .and_then(|end| end.checked_sub(1))
577                         .unwrap(),
578                 )
579             }
580             VmImage::Kernel(ref mut kernel_image) => {
581                 let loaded_kernel = load_kernel(&mem, payload_address, kernel_image)?;
582                 let kernel_end = loaded_kernel.address_range.end;
583                 let mut payload_end = GuestAddress(kernel_end);
584                 initrd = match components.initrd_image {
585                     Some(initrd_file) => {
586                         let mut initrd_file = initrd_file;
587                         let initrd_addr = (kernel_end + 1 + (AARCH64_INITRD_ALIGN - 1))
588                             & !(AARCH64_INITRD_ALIGN - 1);
589                         let initrd_max_size =
590                             main_memory_size.saturating_sub(initrd_addr - AARCH64_PHYS_MEM_START);
591                         let initrd_addr = GuestAddress(initrd_addr);
592                         let initrd_size =
593                             arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
594                                 .map_err(Error::InitrdLoadFailure)?;
595                         payload_end = initrd_addr
596                             .checked_add(initrd_size.try_into().unwrap())
597                             .and_then(|end| end.checked_sub(1))
598                             .unwrap();
599                         Some((initrd_addr, initrd_size))
600                     }
601                     None => None,
602                 };
603                 (PayloadType::Kernel(loaded_kernel), payload_end)
604             }
605         };
606 
607         let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + main_memory_size);
608 
609         let fdt_address = match fdt_position {
610             FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START),
611             FdtPosition::End => {
612                 let addr = memory_end
613                     .checked_sub(AARCH64_FDT_MAX_SIZE)
614                     .expect("Not enough memory for FDT")
615                     .align_down(AARCH64_FDT_ALIGN);
616                 assert!(addr > payload_end_address, "Not enough memory for FDT");
617                 addr
618             }
619             FdtPosition::AfterPayload => payload_end_address
620                 .checked_add(1)
621                 .and_then(|addr| addr.align(AARCH64_FDT_ALIGN))
622                 .expect("Not enough memory for FDT"),
623         };
624 
625         let mut use_pmu = vm
626             .get_hypervisor()
627             .check_capability(HypervisorCap::ArmPmuV3);
628         use_pmu &= !no_pmu;
629         let vcpu_count = components.vcpu_count;
630         let mut has_pvtime = true;
631         let mut vcpus = Vec::with_capacity(vcpu_count);
632         let mut vcpu_init = Vec::with_capacity(vcpu_count);
633         for vcpu_id in 0..vcpu_count {
634             let vcpu: Vcpu = *vm
635                 .create_vcpu(vcpu_id)
636                 .map_err(Error::CreateVcpu)?
637                 .downcast::<Vcpu>()
638                 .map_err(|_| Error::DowncastVcpu)?;
639             let per_vcpu_init = if vm
640                 .get_hypervisor()
641                 .check_capability(HypervisorCap::HypervisorInitializedBootContext)
642             {
643                 // No registers are initialized: VcpuInitAArch64.regs is an empty BTreeMap
644                 Default::default()
645             } else {
646                 Self::vcpu_init(
647                     vcpu_id,
648                     &payload,
649                     fdt_address,
650                     components.hv_cfg.protection_type,
651                     components.boot_cpu,
652                 )
653             };
654             has_pvtime &= vcpu.has_pvtime_support();
655             vcpus.push(vcpu);
656             vcpu_ids.push(vcpu_id);
657             vcpu_init.push(per_vcpu_init);
658         }
659 
660         if components.sve_config.auto {
661             components.sve_config.enable = vm.check_capability(VmCap::Sve);
662         }
663 
664         // Initialize Vcpus after all Vcpu objects have been created.
665         for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
666             let features =
667                 &Self::vcpu_features(vcpu_id, use_pmu, components.boot_cpu, components.sve_config);
668             vcpu.init(features).map_err(Error::VcpuInit)?;
669         }
670 
671         irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
672 
673         if has_pvtime {
674             let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
675                 .build()
676                 .map_err(Error::BuildPvtimeError)?;
677             vm.add_memory_region(
678                 GuestAddress(AARCH64_PVTIME_IPA_START),
679                 Box::new(pvtime_mem),
680                 false,
681                 false,
682                 MemCacheType::CacheCoherent,
683             )
684             .map_err(Error::MapPvtimeError)?;
685         }
686 
687         if components.hv_cfg.protection_type.needs_firmware_loaded() {
688             arch::load_image(
689                 &mem,
690                 &mut components
691                     .pvm_fw
692                     .expect("pvmfw must be available if ProtectionType loads it"),
693                 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
694                 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
695             )
696             .map_err(Error::CustomPvmFwLoadFailure)?;
697         } else if components.hv_cfg.protection_type.runs_firmware() {
698             // Tell the hypervisor to load the pVM firmware.
699             vm.load_protected_vm_firmware(
700                 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
701                 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
702             )
703             .map_err(Error::PvmFwLoadFailure)?;
704         }
705 
706         for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
707             use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
708             if has_pvtime {
709                 vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
710                     .map_err(Error::InitPvtimeError)?;
711             }
712         }
713 
714         let mmio_bus = Arc::new(devices::Bus::new(BusType::Mmio));
715 
716         // ARM doesn't really use the io bus like x86, so just create an empty bus.
717         let io_bus = Arc::new(devices::Bus::new(BusType::Io));
718 
719         // Event used by PMDevice to notify crosvm that
720         // guest OS is trying to suspend.
721         let (suspend_tube_send, suspend_tube_recv) =
722             Tube::directional_pair().map_err(Error::CreateTube)?;
723         let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
724 
725         let (pci_devices, others): (Vec<_>, Vec<_>) = devs
726             .into_iter()
727             .partition(|(dev, _)| dev.as_pci_device().is_some());
728 
729         let pci_devices = pci_devices
730             .into_iter()
731             .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
732             .collect();
733         let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
734             arch::generate_pci_root(
735                 pci_devices,
736                 irq_chip.as_irq_chip_mut(),
737                 mmio_bus.clone(),
738                 GuestAddress(arch_memory_layout.pci_cam.start),
739                 8,
740                 io_bus.clone(),
741                 system_allocator,
742                 &mut vm,
743                 (devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
744                 None,
745                 #[cfg(feature = "swap")]
746                 swap_controller,
747             )
748             .map_err(Error::CreatePciRoot)?;
749 
750         let pci_root = Arc::new(Mutex::new(pci));
751         let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
752         let (platform_devices, _others): (Vec<_>, Vec<_>) = others
753             .into_iter()
754             .partition(|(dev, _)| dev.as_platform_device().is_some());
755 
756         let platform_devices = platform_devices
757             .into_iter()
758             .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
759             .collect();
760         let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
761             arch::sys::linux::generate_platform_bus(
762                 platform_devices,
763                 irq_chip.as_irq_chip_mut(),
764                 &mmio_bus,
765                 system_allocator,
766                 &mut vm,
767                 #[cfg(feature = "swap")]
768                 swap_controller,
769                 components.hv_cfg.protection_type,
770             )
771             .map_err(Error::CreatePlatformBus)?;
772         pid_debug_label_map.append(&mut platform_pid_debug_label_map);
773 
774         let (vmwdt_host_tube, vmwdt_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
775         Self::add_arch_devs(
776             irq_chip.as_irq_chip_mut(),
777             &mmio_bus,
778             vcpu_count,
779             _vm_evt_wrtube,
780             vmwdt_control_tube,
781         )?;
782 
783         let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
784         let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
785         let serial_devices = arch::add_serial_devices(
786             components.hv_cfg.protection_type,
787             &mmio_bus,
788             (AARCH64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
789             (AARCH64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
790             serial_parameters,
791             serial_jail,
792             #[cfg(feature = "swap")]
793             swap_controller,
794         )
795         .map_err(Error::CreateSerialDevices)?;
796 
797         let source = IrqEventSource {
798             device_id: Serial::device_id(),
799             queue_id: 0,
800             device_name: Serial::debug_label(),
801         };
802         irq_chip
803             .register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
804             .map_err(Error::RegisterIrqfd)?;
805         irq_chip
806             .register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
807             .map_err(Error::RegisterIrqfd)?;
808 
809         mmio_bus
810             .insert(
811                 pci_bus,
812                 arch_memory_layout.pci_cam.start,
813                 arch_memory_layout.pci_cam.len().unwrap(),
814             )
815             .map_err(Error::RegisterPci)?;
816 
817         let (vcpufreq_host_tube, vcpufreq_control_tube) =
818             Tube::pair().map_err(Error::CreateTube)?;
819         let vcpufreq_shared_tube = Arc::new(Mutex::new(vcpufreq_control_tube));
820         #[cfg(any(target_os = "android", target_os = "linux"))]
821         if !components.cpu_frequencies.is_empty() {
822             let mut freq_domain_vcpus: BTreeMap<u32, Vec<usize>> = BTreeMap::new();
823             let mut freq_domain_perfs: BTreeMap<u32, Arc<AtomicU32>> = BTreeMap::new();
824             let mut vcpu_affinities: Vec<u32> = Vec::new();
825             for vcpu in 0..vcpu_count {
826                 let freq_domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
827                 freq_domain_vcpus.entry(freq_domain).or_default().push(vcpu);
828                 let vcpu_affinity = match components.vcpu_affinity.clone() {
829                     Some(VcpuAffinity::Global(v)) => v,
830                     Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(),
831                     None => panic!("vcpu_affinity needs to be set for VirtCpufreq"),
832                 };
833                 vcpu_affinities.push(vcpu_affinity[0].try_into().unwrap());
834             }
835             for domain in freq_domain_vcpus.keys() {
836                 let domain_perf = Arc::new(AtomicU32::new(0));
837                 freq_domain_perfs.insert(*domain, domain_perf);
838             }
839             let largest_vcpu_affinity_idx = *vcpu_affinities.iter().max().unwrap() as usize;
840             for (vcpu, vcpu_affinity) in vcpu_affinities.iter().enumerate() {
841                 let mut virtfreq_size = AARCH64_VIRTFREQ_SIZE;
842                 if components.virt_cpufreq_v2 {
843                     let domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
844                     virtfreq_size = AARCH64_VIRTFREQ_V2_SIZE;
845                     let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreqV2::new(
846                         *vcpu_affinity,
847                         components.cpu_frequencies.get(&vcpu).unwrap().clone(),
848                         components.vcpu_domain_paths.get(&vcpu).cloned(),
849                         domain,
850                         *components.normalized_cpu_ipc_ratios.get(&vcpu).unwrap(),
851                         largest_vcpu_affinity_idx,
852                         vcpufreq_shared_tube.clone(),
853                         freq_domain_vcpus.get(&domain).unwrap().clone(),
854                         freq_domain_perfs.get(&domain).unwrap().clone(),
855                     )));
856                     mmio_bus
857                         .insert(
858                             virt_cpufreq,
859                             AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
860                             virtfreq_size,
861                         )
862                         .map_err(Error::RegisterVirtCpufreq)?;
863                 } else {
864                     let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new(
865                         *vcpu_affinity,
866                         *components.cpu_capacity.get(&vcpu).unwrap(),
867                         *components
868                             .cpu_frequencies
869                             .get(&vcpu)
870                             .unwrap()
871                             .iter()
872                             .max()
873                             .unwrap(),
874                     )));
875                     mmio_bus
876                         .insert(
877                             virt_cpufreq,
878                             AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
879                             virtfreq_size,
880                         )
881                         .map_err(Error::RegisterVirtCpufreq)?;
882                 }
883 
884                 if vcpu as u64 * AARCH64_VIRTFREQ_SIZE + virtfreq_size > AARCH64_VIRTFREQ_MAXSIZE {
885                     panic!("Exceeded maximum number of virt cpufreq devices");
886                 }
887             }
888         }
889 
890         let mut cmdline = Self::get_base_linux_cmdline();
891         get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
892             .map_err(Error::GetSerialCmdline)?;
893         for param in components.extra_kernel_params {
894             cmdline.insert_str(&param).map_err(Error::Cmdline)?;
895         }
896 
897         if let Some(ramoops_region) = ramoops_region {
898             arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
899                 .map_err(Error::Cmdline)?;
900         }
901 
902         let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
903 
904         let pci_cfg = fdt::PciConfigRegion {
905             base: arch_memory_layout.pci_cam.start,
906             size: arch_memory_layout.pci_cam.len().unwrap(),
907         };
908 
909         let mut pci_ranges: Vec<fdt::PciRange> = Vec::new();
910 
911         let mut add_pci_ranges = |alloc: &AddressAllocator, prefetchable: bool| {
912             pci_ranges.extend(alloc.pools().iter().map(|range| fdt::PciRange {
913                 space: fdt::PciAddressSpace::Memory64,
914                 bus_address: range.start,
915                 cpu_physical_address: range.start,
916                 size: range.len().unwrap(),
917                 prefetchable,
918             }));
919         };
920 
921         add_pci_ranges(system_allocator.mmio_allocator(MmioType::Low), false);
922         add_pci_ranges(system_allocator.mmio_allocator(MmioType::High), true);
923 
924         let (bat_control, bat_mmio_base_and_irq) = match bat_type {
925             Some(BatteryType::Goldfish) => {
926                 let bat_irq = AARCH64_BAT_IRQ;
927 
928                 // a dummy AML buffer. Aarch64 crosvm doesn't use ACPI.
929                 let mut amls = Vec::new();
930                 let (control_tube, mmio_base) = arch::sys::linux::add_goldfish_battery(
931                     &mut amls,
932                     bat_jail,
933                     &mmio_bus,
934                     irq_chip.as_irq_chip_mut(),
935                     bat_irq,
936                     system_allocator,
937                     #[cfg(feature = "swap")]
938                     swap_controller,
939                 )
940                 .map_err(Error::CreateBatDevices)?;
941                 (
942                     Some(BatControl {
943                         type_: BatteryType::Goldfish,
944                         control_tube,
945                     }),
946                     Some((mmio_base, bat_irq)),
947                 )
948             }
949             None => (None, None),
950         };
951 
952         let vmwdt_cfg = fdt::VmWdtConfig {
953             base: AARCH64_VMWDT_ADDR,
954             size: AARCH64_VMWDT_SIZE,
955             clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
956             timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
957         };
958 
959         fdt::create_fdt(
960             AARCH64_FDT_MAX_SIZE as usize,
961             &mem,
962             pci_irqs,
963             pci_cfg,
964             &pci_ranges,
965             dev_resources,
966             vcpu_count as u32,
967             &|n| get_vcpu_mpidr_aff(&vcpus, n),
968             components.cpu_clusters,
969             components.cpu_capacity,
970             components.cpu_frequencies,
971             fdt_address,
972             cmdline
973                 .as_str_with_max_len(AARCH64_CMDLINE_MAX_SIZE - 1)
974                 .map_err(Error::Cmdline)?,
975             payload.address_range(),
976             initrd,
977             components.android_fstab,
978             irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
979             use_pmu,
980             psci_version,
981             components.swiotlb.map(|size| {
982                 (
983                     get_swiotlb_addr(components.memory_size, size, vm.get_hypervisor()),
984                     size,
985                 )
986             }),
987             bat_mmio_base_and_irq,
988             vmwdt_cfg,
989             dump_device_tree_blob,
990             &|writer, phandles| vm.create_fdt(writer, phandles),
991             components.dynamic_power_coefficient,
992             device_tree_overlays,
993             &serial_devices,
994             components.virt_cpufreq_v2,
995         )
996         .map_err(Error::CreateFdt)?;
997 
998         vm.init_arch(
999             payload.entry(),
1000             fdt_address,
1001             AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
1002         )
1003         .map_err(Error::InitVmError)?;
1004 
1005         let vm_request_tubes = vec![vmwdt_host_tube, vcpufreq_host_tube];
1006 
1007         Ok(RunnableLinuxVm {
1008             vm,
1009             vcpu_count,
1010             vcpus: Some(vcpus),
1011             vcpu_init,
1012             vcpu_affinity: components.vcpu_affinity,
1013             no_smt: components.no_smt,
1014             irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1015             io_bus,
1016             mmio_bus,
1017             pid_debug_label_map,
1018             suspend_tube: (suspend_tube_send, suspend_tube_recv),
1019             rt_cpus: components.rt_cpus,
1020             delay_rt: components.delay_rt,
1021             bat_control,
1022             pm: None,
1023             resume_notify_devices: Vec::new(),
1024             root_config: pci_root,
1025             platform_devices,
1026             hotplug_bus: BTreeMap::new(),
1027             devices_thread: None,
1028             vm_request_tubes,
1029         })
1030     }
1031 
configure_vcpu<V: Vm>( _vm: &V, _hypervisor: &dyn Hypervisor, _irq_chip: &mut dyn IrqChipAArch64, vcpu: &mut dyn VcpuAArch64, vcpu_init: VcpuInitAArch64, _vcpu_id: usize, _num_cpus: usize, _cpu_config: Option<CpuConfigAArch64>, ) -> std::result::Result<(), Self::Error>1032     fn configure_vcpu<V: Vm>(
1033         _vm: &V,
1034         _hypervisor: &dyn Hypervisor,
1035         _irq_chip: &mut dyn IrqChipAArch64,
1036         vcpu: &mut dyn VcpuAArch64,
1037         vcpu_init: VcpuInitAArch64,
1038         _vcpu_id: usize,
1039         _num_cpus: usize,
1040         _cpu_config: Option<CpuConfigAArch64>,
1041     ) -> std::result::Result<(), Self::Error> {
1042         for (reg, value) in vcpu_init.regs.iter() {
1043             vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
1044         }
1045         Ok(())
1046     }
1047 
register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>( _linux: &mut RunnableLinuxVm<V, Vcpu>, _device: Box<dyn PciDevice>, _minijail: Option<Minijail>, _resources: &mut SystemAllocator, _tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>, ) -> std::result::Result<PciAddress, Self::Error>1048     fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
1049         _linux: &mut RunnableLinuxVm<V, Vcpu>,
1050         _device: Box<dyn PciDevice>,
1051         _minijail: Option<Minijail>,
1052         _resources: &mut SystemAllocator,
1053         _tube: &mpsc::Sender<PciRootCommand>,
1054         #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
1055     ) -> std::result::Result<PciAddress, Self::Error> {
1056         // hotplug function isn't verified on AArch64, so set it unsupported here.
1057         Err(Error::Unsupported)
1058     }
1059 
get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1060     fn get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1061         Ok(Self::collect_for_each_cpu(base::logical_core_max_freq_khz)
1062             .map_err(Error::CpuFrequencies)?
1063             .into_iter()
1064             .enumerate()
1065             .collect())
1066     }
1067 
get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>1068     fn get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>
1069     {
1070         Ok(
1071             Self::collect_for_each_cpu(base::logical_core_frequencies_khz)
1072                 .map_err(Error::CpuFrequencies)?
1073                 .into_iter()
1074                 .enumerate()
1075                 .collect(),
1076         )
1077     }
1078 
1079     // Returns a (cpu_id -> value) map of the DMIPS/MHz capacities of logical cores
1080     // in the host system.
get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1081     fn get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1082         Ok(Self::collect_for_each_cpu(base::logical_core_capacity)
1083             .map_err(Error::CpuTopology)?
1084             .into_iter()
1085             .enumerate()
1086             .collect())
1087     }
1088 
1089     // Creates CPU cluster mask for each CPU in the host system.
get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error>1090     fn get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error> {
1091         let cluster_ids = Self::collect_for_each_cpu(base::logical_core_cluster_id)
1092             .map_err(Error::CpuTopology)?;
1093         let mut unique_clusters: Vec<CpuSet> = cluster_ids
1094             .iter()
1095             .map(|&vcpu_cluster_id| {
1096                 cluster_ids
1097                     .iter()
1098                     .enumerate()
1099                     .filter(|(_, &cpu_cluster_id)| vcpu_cluster_id == cpu_cluster_id)
1100                     .map(|(cpu_id, _)| cpu_id)
1101                     .collect()
1102             })
1103             .collect();
1104         unique_clusters.sort_unstable();
1105         unique_clusters.dedup();
1106         Ok(unique_clusters)
1107     }
1108 }
1109 
1110 #[cfg(feature = "gdb")]
1111 impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
1112     type Error = Error;
1113 
read_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>1114     fn read_memory(
1115         _vcpu: &T,
1116         guest_mem: &GuestMemory,
1117         vaddr: GuestAddress,
1118         len: usize,
1119     ) -> Result<Vec<u8>> {
1120         let mut buf = vec![0; len];
1121 
1122         guest_mem
1123             .read_exact_at_addr(&mut buf, vaddr)
1124             .map_err(Error::ReadGuestMemory)?;
1125 
1126         Ok(buf)
1127     }
1128 
write_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>1129     fn write_memory(
1130         _vcpu: &T,
1131         guest_mem: &GuestMemory,
1132         vaddr: GuestAddress,
1133         buf: &[u8],
1134     ) -> Result<()> {
1135         guest_mem
1136             .write_all_at_addr(buf, vaddr)
1137             .map_err(Error::WriteGuestMemory)
1138     }
1139 
read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers>1140     fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
1141         let mut regs: <GdbArch as Arch>::Registers = Default::default();
1142         assert!(
1143             regs.x.len() == 31,
1144             "unexpected number of Xn general purpose registers"
1145         );
1146         for (i, reg) in regs.x.iter_mut().enumerate() {
1147             let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1148             *reg = vcpu
1149                 .get_one_reg(VcpuRegAArch64::X(n))
1150                 .map_err(Error::ReadReg)?;
1151         }
1152         regs.sp = vcpu
1153             .get_one_reg(VcpuRegAArch64::Sp)
1154             .map_err(Error::ReadReg)?;
1155         regs.pc = vcpu
1156             .get_one_reg(VcpuRegAArch64::Pc)
1157             .map_err(Error::ReadReg)?;
1158         // hypervisor API gives a 64-bit value for Pstate, but GDB wants a 32-bit "CPSR".
1159         regs.cpsr = vcpu
1160             .get_one_reg(VcpuRegAArch64::Pstate)
1161             .map_err(Error::ReadReg)? as u32;
1162         for (i, reg) in regs.v.iter_mut().enumerate() {
1163             let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1164             *reg = vcpu.get_vector_reg(n).map_err(Error::ReadReg)?;
1165         }
1166         regs.fpcr = vcpu
1167             .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPCR))
1168             .map_err(Error::ReadReg)? as u32;
1169         regs.fpsr = vcpu
1170             .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPSR))
1171             .map_err(Error::ReadReg)? as u32;
1172 
1173         Ok(regs)
1174     }
1175 
write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()>1176     fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
1177         assert!(
1178             regs.x.len() == 31,
1179             "unexpected number of Xn general purpose registers"
1180         );
1181         for (i, reg) in regs.x.iter().enumerate() {
1182             let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1183             vcpu.set_one_reg(VcpuRegAArch64::X(n), *reg)
1184                 .map_err(Error::WriteReg)?;
1185         }
1186         vcpu.set_one_reg(VcpuRegAArch64::Sp, regs.sp)
1187             .map_err(Error::WriteReg)?;
1188         vcpu.set_one_reg(VcpuRegAArch64::Pc, regs.pc)
1189             .map_err(Error::WriteReg)?;
1190         // GDB gives a 32-bit value for "CPSR", but hypervisor API wants a 64-bit Pstate.
1191         let pstate = vcpu
1192             .get_one_reg(VcpuRegAArch64::Pstate)
1193             .map_err(Error::ReadReg)?;
1194         let pstate = (pstate & 0xffff_ffff_0000_0000) | (regs.cpsr as u64);
1195         vcpu.set_one_reg(VcpuRegAArch64::Pstate, pstate)
1196             .map_err(Error::WriteReg)?;
1197         for (i, reg) in regs.v.iter().enumerate() {
1198             let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1199             vcpu.set_vector_reg(n, *reg).map_err(Error::WriteReg)?;
1200         }
1201         vcpu.set_one_reg(
1202             VcpuRegAArch64::System(AArch64SysRegId::FPCR),
1203             u64::from(regs.fpcr),
1204         )
1205         .map_err(Error::WriteReg)?;
1206         vcpu.set_one_reg(
1207             VcpuRegAArch64::System(AArch64SysRegId::FPSR),
1208             u64::from(regs.fpsr),
1209         )
1210         .map_err(Error::WriteReg)?;
1211 
1212         Ok(())
1213     }
1214 
read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>>1215     fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
1216         let result = match reg_id {
1217             AArch64RegId::X(n) => vcpu
1218                 .get_one_reg(VcpuRegAArch64::X(n))
1219                 .map(|v| v.to_ne_bytes().to_vec()),
1220             AArch64RegId::Sp => vcpu
1221                 .get_one_reg(VcpuRegAArch64::Sp)
1222                 .map(|v| v.to_ne_bytes().to_vec()),
1223             AArch64RegId::Pc => vcpu
1224                 .get_one_reg(VcpuRegAArch64::Pc)
1225                 .map(|v| v.to_ne_bytes().to_vec()),
1226             AArch64RegId::Pstate => vcpu
1227                 .get_one_reg(VcpuRegAArch64::Pstate)
1228                 .map(|v| (v as u32).to_ne_bytes().to_vec()),
1229             AArch64RegId::V(n) => vcpu.get_vector_reg(n).map(|v| v.to_ne_bytes().to_vec()),
1230             AArch64RegId::System(op) => vcpu
1231                 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)))
1232                 .map(|v| v.to_ne_bytes().to_vec()),
1233             _ => {
1234                 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1235                 Err(base::Error::new(libc::EINVAL))
1236             }
1237         };
1238 
1239         match result {
1240             Ok(bytes) => Ok(bytes),
1241             // ENOENT is returned when KVM is aware of the register but it is unavailable
1242             Err(e) if e.errno() == libc::ENOENT => Ok(Vec::new()),
1243             Err(e) => Err(Error::ReadReg(e)),
1244         }
1245     }
1246 
write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()>1247     fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
1248         fn try_into_u32(data: &[u8]) -> Result<u32> {
1249             let s = data
1250                 .get(..4)
1251                 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1252             let a = s
1253                 .try_into()
1254                 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1255             Ok(u32::from_ne_bytes(a))
1256         }
1257 
1258         fn try_into_u64(data: &[u8]) -> Result<u64> {
1259             let s = data
1260                 .get(..8)
1261                 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1262             let a = s
1263                 .try_into()
1264                 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1265             Ok(u64::from_ne_bytes(a))
1266         }
1267 
1268         fn try_into_u128(data: &[u8]) -> Result<u128> {
1269             let s = data
1270                 .get(..16)
1271                 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1272             let a = s
1273                 .try_into()
1274                 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1275             Ok(u128::from_ne_bytes(a))
1276         }
1277 
1278         match reg_id {
1279             AArch64RegId::X(n) => vcpu.set_one_reg(VcpuRegAArch64::X(n), try_into_u64(data)?),
1280             AArch64RegId::Sp => vcpu.set_one_reg(VcpuRegAArch64::Sp, try_into_u64(data)?),
1281             AArch64RegId::Pc => vcpu.set_one_reg(VcpuRegAArch64::Pc, try_into_u64(data)?),
1282             AArch64RegId::Pstate => {
1283                 vcpu.set_one_reg(VcpuRegAArch64::Pstate, u64::from(try_into_u32(data)?))
1284             }
1285             AArch64RegId::V(n) => vcpu.set_vector_reg(n, try_into_u128(data)?),
1286             AArch64RegId::System(op) => vcpu.set_one_reg(
1287                 VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)),
1288                 try_into_u64(data)?,
1289             ),
1290             _ => {
1291                 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1292                 Err(base::Error::new(libc::EINVAL))
1293             }
1294         }
1295         .map_err(Error::WriteReg)
1296     }
1297 
enable_singlestep(vcpu: &T) -> Result<()>1298     fn enable_singlestep(vcpu: &T) -> Result<()> {
1299         const SINGLE_STEP: bool = true;
1300         vcpu.set_guest_debug(&[], SINGLE_STEP)
1301             .map_err(Error::EnableSinglestep)
1302     }
1303 
get_max_hw_breakpoints(vcpu: &T) -> Result<usize>1304     fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
1305         vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
1306     }
1307 
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()>1308     fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
1309         const SINGLE_STEP: bool = false;
1310         vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
1311             .map_err(Error::SetHwBreakpoint)
1312     }
1313 }
1314 
1315 impl AArch64 {
1316     /// This returns a base part of the kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline1317     fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1318         let mut cmdline = kernel_cmdline::Cmdline::new();
1319         cmdline.insert_str("panic=-1").unwrap();
1320         cmdline
1321     }
1322 
1323     /// This adds any early platform devices for this architecture.
1324     ///
1325     /// # Arguments
1326     ///
1327     /// * `irq_chip` - The IRQ chip to add irqs to.
1328     /// * `bus` - The bus to add devices to.
1329     /// * `vcpu_count` - The number of virtual CPUs for this guest VM
1330     /// * `vm_evt_wrtube` - The notification channel
add_arch_devs( irq_chip: &mut dyn IrqChip, bus: &Bus, vcpu_count: usize, vm_evt_wrtube: &SendTube, vmwdt_request_tube: Tube, ) -> Result<()>1331     fn add_arch_devs(
1332         irq_chip: &mut dyn IrqChip,
1333         bus: &Bus,
1334         vcpu_count: usize,
1335         vm_evt_wrtube: &SendTube,
1336         vmwdt_request_tube: Tube,
1337     ) -> Result<()> {
1338         let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1339         let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
1340         irq_chip
1341             .register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
1342             .map_err(Error::RegisterIrqfd)?;
1343 
1344         bus.insert(
1345             Arc::new(Mutex::new(rtc)),
1346             AARCH64_RTC_ADDR,
1347             AARCH64_RTC_SIZE,
1348         )
1349         .expect("failed to add rtc device");
1350 
1351         let vmwdt_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1352         let vm_wdt = devices::vmwdt::Vmwdt::new(
1353             vcpu_count,
1354             vm_evt_wrtube.try_clone().unwrap(),
1355             vmwdt_evt.try_clone().map_err(Error::CloneEvent)?,
1356             vmwdt_request_tube,
1357         )
1358         .map_err(Error::CreateVmwdtDevice)?;
1359         irq_chip
1360             .register_edge_irq_event(
1361                 AARCH64_VMWDT_IRQ,
1362                 &vmwdt_evt,
1363                 IrqEventSource::from_device(&vm_wdt),
1364             )
1365             .map_err(Error::RegisterIrqfd)?;
1366 
1367         bus.insert(
1368             Arc::new(Mutex::new(vm_wdt)),
1369             AARCH64_VMWDT_ADDR,
1370             AARCH64_VMWDT_SIZE,
1371         )
1372         .expect("failed to add vmwdt device");
1373 
1374         Ok(())
1375     }
1376 
1377     /// Get ARM-specific features for vcpu with index `vcpu_id`.
1378     ///
1379     /// # Arguments
1380     ///
1381     /// * `vcpu_id` - The VM's index for `vcpu`.
1382     /// * `use_pmu` - Should `vcpu` be configured to use the Performance Monitor Unit.
vcpu_features( vcpu_id: usize, use_pmu: bool, boot_cpu: usize, sve: SveConfig, ) -> Vec<VcpuFeature>1383     fn vcpu_features(
1384         vcpu_id: usize,
1385         use_pmu: bool,
1386         boot_cpu: usize,
1387         sve: SveConfig,
1388     ) -> Vec<VcpuFeature> {
1389         let mut features = vec![VcpuFeature::PsciV0_2];
1390         if use_pmu {
1391             features.push(VcpuFeature::PmuV3);
1392         }
1393         // Non-boot cpus are powered off initially
1394         if vcpu_id != boot_cpu {
1395             features.push(VcpuFeature::PowerOff);
1396         }
1397         if sve.enable {
1398             features.push(VcpuFeature::Sve);
1399         }
1400 
1401         features
1402     }
1403 
1404     /// Get initial register state for vcpu with index `vcpu_id`.
1405     ///
1406     /// # Arguments
1407     ///
1408     /// * `vcpu_id` - The VM's index for `vcpu`.
vcpu_init( vcpu_id: usize, payload: &PayloadType, fdt_address: GuestAddress, protection_type: ProtectionType, boot_cpu: usize, ) -> VcpuInitAArch641409     fn vcpu_init(
1410         vcpu_id: usize,
1411         payload: &PayloadType,
1412         fdt_address: GuestAddress,
1413         protection_type: ProtectionType,
1414         boot_cpu: usize,
1415     ) -> VcpuInitAArch64 {
1416         let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
1417 
1418         // All interrupts masked
1419         let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
1420         regs.insert(VcpuRegAArch64::Pstate, pstate);
1421 
1422         // Other cpus are powered off initially
1423         if vcpu_id == boot_cpu {
1424             let entry_addr = if protection_type.needs_firmware_loaded() {
1425                 Some(AARCH64_PROTECTED_VM_FW_START)
1426             } else if protection_type.runs_firmware() {
1427                 None // Initial PC value is set by the hypervisor
1428             } else {
1429                 Some(payload.entry().offset())
1430             };
1431 
1432             /* PC -- entry point */
1433             if let Some(entry) = entry_addr {
1434                 regs.insert(VcpuRegAArch64::Pc, entry);
1435             }
1436 
1437             /* X0 -- fdt address */
1438             regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
1439 
1440             if protection_type.runs_firmware() {
1441                 /* X1 -- payload entry point */
1442                 regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
1443 
1444                 /* X2 -- image size */
1445                 regs.insert(VcpuRegAArch64::X(2), payload.size());
1446             }
1447         }
1448 
1449         VcpuInitAArch64 { regs }
1450     }
1451 
collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error> where F: Fn(usize) -> std::result::Result<T, base::Error>,1452     fn collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error>
1453     where
1454         F: Fn(usize) -> std::result::Result<T, base::Error>,
1455     {
1456         (0..base::number_of_logical_cores()?).map(func).collect()
1457     }
1458 }
1459 
1460 #[cfg(test)]
1461 mod tests {
1462     use super::*;
1463 
1464     #[test]
vcpu_init_unprotected_kernel()1465     fn vcpu_init_unprotected_kernel() {
1466         let payload = PayloadType::Kernel(LoadedKernel {
1467             address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1468             size: 0x1000,
1469             entry: GuestAddress(0x8080_0000),
1470         });
1471         assert_eq!(
1472             payload.address_range(),
1473             AddressRange {
1474                 start: 0x8080_0000,
1475                 end: 0x8080_0fff
1476             }
1477         );
1478         let fdt_address = GuestAddress(0x1234);
1479         let prot = ProtectionType::Unprotected;
1480 
1481         let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1482 
1483         // PC: kernel image entry point
1484         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
1485 
1486         // X0: fdt_offset
1487         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1488     }
1489 
1490     #[test]
vcpu_init_unprotected_bios()1491     fn vcpu_init_unprotected_bios() {
1492         let payload = PayloadType::Bios {
1493             entry: GuestAddress(0x8020_0000),
1494             image_size: 0x1000,
1495         };
1496         assert_eq!(
1497             payload.address_range(),
1498             AddressRange {
1499                 start: 0x8020_0000,
1500                 end: 0x8020_0fff
1501             }
1502         );
1503         let fdt_address = GuestAddress(0x1234);
1504         let prot = ProtectionType::Unprotected;
1505 
1506         let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1507 
1508         // PC: bios image entry point
1509         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
1510 
1511         // X0: fdt_offset
1512         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1513     }
1514 
1515     #[test]
vcpu_init_protected_kernel()1516     fn vcpu_init_protected_kernel() {
1517         let payload = PayloadType::Kernel(LoadedKernel {
1518             address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1519             size: 0x1000,
1520             entry: GuestAddress(0x8080_0000),
1521         });
1522         assert_eq!(
1523             payload.address_range(),
1524             AddressRange {
1525                 start: 0x8080_0000,
1526                 end: 0x8080_0fff
1527             }
1528         );
1529         let fdt_address = GuestAddress(0x1234);
1530         let prot = ProtectionType::Protected;
1531 
1532         let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1533 
1534         // The hypervisor provides the initial value of PC, so PC should not be present in the
1535         // vcpu_init register map.
1536         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
1537 
1538         // X0: fdt_offset
1539         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1540 
1541         // X1: kernel image entry point
1542         assert_eq!(
1543             vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
1544             Some(&0x8080_0000)
1545         );
1546 
1547         // X2: image size
1548         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
1549     }
1550 }
1551