1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! ARM 64-bit architecture support.
6
7 #![cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8
9 use std::collections::BTreeMap;
10 use std::io;
11 use std::path::PathBuf;
12 use std::sync::mpsc;
13 use std::sync::Arc;
14
15 use arch::get_serial_cmdline;
16 use arch::GetSerialCmdlineError;
17 use arch::MsrConfig;
18 use arch::MsrExitHandlerError;
19 use arch::RunnableLinuxVm;
20 use arch::VmComponents;
21 use arch::VmImage;
22 use base::Event;
23 use base::MemoryMappingBuilder;
24 use base::SendTube;
25 use devices::serial_device::SerialHardware;
26 use devices::serial_device::SerialParameters;
27 use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
28 use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
29 use devices::Bus;
30 use devices::BusDeviceObj;
31 use devices::BusError;
32 use devices::IrqChip;
33 use devices::IrqChipAArch64;
34 use devices::IrqEventSource;
35 use devices::PciAddress;
36 use devices::PciConfigMmio;
37 use devices::PciDevice;
38 use devices::PciRootCommand;
39 use devices::Serial;
40 #[cfg(all(target_arch = "aarch64", feature = "gdb"))]
41 use gdbstub::arch::Arch;
42 #[cfg(all(target_arch = "aarch64", feature = "gdb"))]
43 use gdbstub_arch::aarch64::AArch64 as GdbArch;
44 use hypervisor::CpuConfigAArch64;
45 use hypervisor::DeviceKind;
46 use hypervisor::Hypervisor;
47 use hypervisor::HypervisorCap;
48 use hypervisor::ProtectionType;
49 use hypervisor::VcpuAArch64;
50 use hypervisor::VcpuFeature;
51 use hypervisor::VcpuInitAArch64;
52 use hypervisor::VcpuRegAArch64;
53 use hypervisor::Vm;
54 use hypervisor::VmAArch64;
55 #[cfg(windows)]
56 use jail::FakeMinijailStub as Minijail;
57 use kernel_loader::LoadedKernel;
58 #[cfg(unix)]
59 use minijail::Minijail;
60 use remain::sorted;
61 use resources::AddressRange;
62 use resources::SystemAllocator;
63 use resources::SystemAllocatorConfig;
64 use sync::Mutex;
65 use thiserror::Error;
66 use vm_control::BatControl;
67 use vm_control::BatteryType;
68 use vm_memory::GuestAddress;
69 #[cfg(all(target_arch = "aarch64", feature = "gdb"))]
70 use vm_memory::GuestMemory;
71 use vm_memory::GuestMemoryError;
72 use vm_memory::MemoryRegionOptions;
73 use vm_memory::MemoryRegionPurpose;
74
75 mod fdt;
76
77 // We place the kernel at the very beginning of physical memory.
78 const AARCH64_KERNEL_OFFSET: u64 = 0;
79 const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
80 const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
81
82 // These constants indicate the address space used by the ARM vGIC.
83 const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
84 const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
85
86 // This indicates the start of DRAM inside the physical address space.
87 const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
88 const AARCH64_AXI_BASE: u64 = 0x40000000;
89 const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x800000;
90
91 // FDT is placed at the front of RAM when booting in BIOS mode.
92 const AARCH64_FDT_OFFSET_IN_BIOS_MODE: u64 = 0x0;
93 // Therefore, the BIOS is placed after the FDT in memory.
94 const AARCH64_BIOS_OFFSET: u64 = AARCH64_FDT_MAX_SIZE;
95 const AARCH64_BIOS_MAX_LEN: u64 = 1 << 20;
96
97 const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
98 const AARCH64_PROTECTED_VM_FW_START: u64 =
99 AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
100
101 const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
102 const AARCH64_PVTIME_IPA_START: u64 = AARCH64_MMIO_BASE - AARCH64_PVTIME_IPA_MAX_SIZE;
103 const AARCH64_PVTIME_SIZE: u64 = 64;
104
105 // These constants indicate the placement of the GIC registers in the physical
106 // address space.
107 const AARCH64_GIC_DIST_BASE: u64 = AARCH64_AXI_BASE - AARCH64_GIC_DIST_SIZE;
108 const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
109 const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
110
111 // PSR (Processor State Register) bits
112 const PSR_MODE_EL1H: u64 = 0x00000005;
113 const PSR_F_BIT: u64 = 0x00000040;
114 const PSR_I_BIT: u64 = 0x00000080;
115 const PSR_A_BIT: u64 = 0x00000100;
116 const PSR_D_BIT: u64 = 0x00000200;
117
118 enum PayloadType {
119 Bios {
120 entry: GuestAddress,
121 image_size: u64,
122 },
123 Kernel(LoadedKernel),
124 }
125
126 impl PayloadType {
entry(&self) -> GuestAddress127 fn entry(&self) -> GuestAddress {
128 match self {
129 Self::Bios {
130 entry,
131 image_size: _,
132 } => *entry,
133 Self::Kernel(k) => k.entry,
134 }
135 }
136
size(&self) -> u64137 fn size(&self) -> u64 {
138 match self {
139 Self::Bios {
140 entry: _,
141 image_size,
142 } => *image_size,
143 Self::Kernel(k) => k.size,
144 }
145 }
146 }
147
get_kernel_addr() -> GuestAddress148 fn get_kernel_addr() -> GuestAddress {
149 GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_KERNEL_OFFSET)
150 }
151
get_bios_addr() -> GuestAddress152 fn get_bios_addr() -> GuestAddress {
153 GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_BIOS_OFFSET)
154 }
155
156 // When static swiotlb allocation is required, returns the address it should be allocated at.
157 // Otherwise, returns None.
get_swiotlb_addr( memory_size: u64, hypervisor: &(impl Hypervisor + ?Sized), ) -> Option<GuestAddress>158 fn get_swiotlb_addr(
159 memory_size: u64,
160 hypervisor: &(impl Hypervisor + ?Sized),
161 ) -> Option<GuestAddress> {
162 if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
163 Some(GuestAddress(AARCH64_PHYS_MEM_START + memory_size))
164 } else {
165 None
166 }
167 }
168
169 // Serial device requires 8 bytes of registers;
170 const AARCH64_SERIAL_SIZE: u64 = 0x8;
171 // This was the speed kvmtool used, not sure if it matters.
172 const AARCH64_SERIAL_SPEED: u32 = 1843200;
173 // The serial device gets the first interrupt line
174 // Which gets mapped to the first SPI interrupt (physical 32).
175 const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
176 const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
177
178 // Place the RTC device at page 2
179 const AARCH64_RTC_ADDR: u64 = 0x2000;
180 // The RTC device gets one 4k page
181 const AARCH64_RTC_SIZE: u64 = 0x1000;
182 // The RTC device gets the second interrupt line
183 const AARCH64_RTC_IRQ: u32 = 1;
184
185 // The Goldfish battery device gets the 3rd interrupt line
186 const AARCH64_BAT_IRQ: u32 = 3;
187
188 // Place the virtual watchdog device at page 3
189 const AARCH64_VMWDT_ADDR: u64 = 0x3000;
190 // The virtual watchdog device gets one 4k page
191 const AARCH64_VMWDT_SIZE: u64 = 0x1000;
192
193 // PCI MMIO configuration region base address.
194 const AARCH64_PCI_CFG_BASE: u64 = 0x10000;
195 // PCI MMIO configuration region size.
196 const AARCH64_PCI_CFG_SIZE: u64 = 0x1000000;
197 // This is the base address of MMIO devices.
198 const AARCH64_MMIO_BASE: u64 = 0x2000000;
199 // Size of the whole MMIO region.
200 const AARCH64_MMIO_SIZE: u64 = 0x2000000;
201 // Virtio devices start at SPI interrupt number 4
202 const AARCH64_IRQ_BASE: u32 = 4;
203
204 // PMU PPI interrupt, same as qemu
205 const AARCH64_PMU_IRQ: u32 = 7;
206
207 #[sorted]
208 #[derive(Error, Debug)]
209 pub enum Error {
210 #[error("failed to allocate IRQ number")]
211 AllocateIrq,
212 #[error("bios could not be loaded: {0}")]
213 BiosLoadFailure(arch::LoadImageError),
214 #[error("failed to build arm pvtime memory: {0}")]
215 BuildPvtimeError(base::MmapError),
216 #[error("unable to clone an Event: {0}")]
217 CloneEvent(base::Error),
218 #[error("failed to clone IRQ chip: {0}")]
219 CloneIrqChip(base::Error),
220 #[error("the given kernel command line was invalid: {0}")]
221 Cmdline(kernel_cmdline::Error),
222 #[error("unable to create battery devices: {0}")]
223 CreateBatDevices(arch::DeviceRegistrationError),
224 #[error("unable to make an Event: {0}")]
225 CreateEvent(base::Error),
226 #[error("FDT could not be created: {0}")]
227 CreateFdt(cros_fdt::Error),
228 #[error("failed to create GIC: {0}")]
229 CreateGICFailure(base::Error),
230 #[error("failed to create a PCI root hub: {0}")]
231 CreatePciRoot(arch::DeviceRegistrationError),
232 #[error("failed to create platform bus: {0}")]
233 CreatePlatformBus(arch::DeviceRegistrationError),
234 #[error("unable to create serial devices: {0}")]
235 CreateSerialDevices(arch::DeviceRegistrationError),
236 #[error("failed to create socket: {0}")]
237 CreateSocket(io::Error),
238 #[error("failed to create VCPU: {0}")]
239 CreateVcpu(base::Error),
240 #[error("custom pVM firmware could not be loaded: {0}")]
241 CustomPvmFwLoadFailure(arch::LoadImageError),
242 #[error("vm created wrong kind of vcpu")]
243 DowncastVcpu,
244 #[error("failed to enable singlestep execution: {0}")]
245 EnableSinglestep(base::Error),
246 #[error("failed to finalize IRQ chip: {0}")]
247 FinalizeIrqChip(base::Error),
248 #[error("failed to get HW breakpoint count: {0}")]
249 GetMaxHwBreakPoint(base::Error),
250 #[error("failed to get PSCI version: {0}")]
251 GetPsciVersion(base::Error),
252 #[error("failed to get serial cmdline: {0}")]
253 GetSerialCmdline(GetSerialCmdlineError),
254 #[error("failed to initialize arm pvtime: {0}")]
255 InitPvtimeError(base::Error),
256 #[error("initrd could not be loaded: {0}")]
257 InitrdLoadFailure(arch::LoadImageError),
258 #[error("failed to initialize virtual machine {0}")]
259 InitVmError(base::Error),
260 #[error("kernel could not be loaded: {0}")]
261 KernelLoadFailure(kernel_loader::Error),
262 #[error("error loading Kernel from Elf image: {0}")]
263 LoadElfKernel(kernel_loader::Error),
264 #[error("failed to map arm pvtime memory: {0}")]
265 MapPvtimeError(base::Error),
266 #[error("pVM firmware could not be loaded: {0}")]
267 PvmFwLoadFailure(base::Error),
268 #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
269 RamoopsAddress(u64, u64),
270 #[error("error reading guest memory: {0}")]
271 ReadGuestMemory(vm_memory::GuestMemoryError),
272 #[error("error reading CPU register: {0}")]
273 ReadReg(base::Error),
274 #[error("error reading CPU registers: {0}")]
275 ReadRegs(base::Error),
276 #[error("failed to register irq fd: {0}")]
277 RegisterIrqfd(base::Error),
278 #[error("error registering PCI bus: {0}")]
279 RegisterPci(BusError),
280 #[error("error registering virtual socket device: {0}")]
281 RegisterVsock(arch::DeviceRegistrationError),
282 #[error("failed to set device attr: {0}")]
283 SetDeviceAttr(base::Error),
284 #[error("failed to set a hardware breakpoint: {0}")]
285 SetHwBreakpoint(base::Error),
286 #[error("failed to set register: {0}")]
287 SetReg(base::Error),
288 #[error("failed to set up guest memory: {0}")]
289 SetupGuestMemory(GuestMemoryError),
290 #[error("this function isn't supported")]
291 Unsupported,
292 #[error("failed to initialize VCPU: {0}")]
293 VcpuInit(base::Error),
294 #[error("error writing guest memory: {0}")]
295 WriteGuestMemory(GuestMemoryError),
296 #[error("error writing CPU register: {0}")]
297 WriteReg(base::Error),
298 #[error("error writing CPU registers: {0}")]
299 WriteRegs(base::Error),
300 }
301
302 pub type Result<T> = std::result::Result<T, Error>;
303
304 /// Returns the address in guest memory at which the FDT should be located.
fdt_address(memory_end: GuestAddress, has_bios: bool) -> GuestAddress305 fn fdt_address(memory_end: GuestAddress, has_bios: bool) -> GuestAddress {
306 // TODO(rammuthiah) make kernel and BIOS startup use FDT from the same location. ARCVM startup
307 // currently expects the kernel at 0x80080000 and the FDT at the end of RAM for unknown reasons.
308 // Root cause and figure out how to fold these code paths together.
309 if has_bios {
310 GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_OFFSET_IN_BIOS_MODE)
311 } else {
312 // Put fdt up near the top of memory
313 // TODO(sonnyrao): will have to handle this differently if there's
314 // > 4GB memory
315 memory_end
316 .checked_sub(AARCH64_FDT_MAX_SIZE)
317 .expect("Not enough memory for FDT")
318 .checked_sub(0x10000)
319 .expect("Not enough memory for FDT")
320 }
321 }
322
323 pub struct AArch64;
324
325 impl arch::LinuxArch for AArch64 {
326 type Error = Error;
327
328 /// Returns a Vec of the valid memory addresses.
329 /// These should be used to configure the GuestMemory structure for the platform.
guest_memory_layout( components: &VmComponents, hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>330 fn guest_memory_layout(
331 components: &VmComponents,
332 hypervisor: &impl Hypervisor,
333 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
334 let mut memory_regions = vec![(
335 GuestAddress(AARCH64_PHYS_MEM_START),
336 components.memory_size,
337 Default::default(),
338 )];
339
340 // Allocate memory for the pVM firmware.
341 if components.hv_cfg.protection_type.runs_firmware() {
342 memory_regions.push((
343 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
344 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
345 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
346 ));
347 }
348
349 if let Some(size) = components.swiotlb {
350 if let Some(addr) = get_swiotlb_addr(components.memory_size, hypervisor) {
351 memory_regions.push((
352 addr,
353 size,
354 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
355 ));
356 }
357 }
358
359 Ok(memory_regions)
360 }
361
get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig362 fn get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig {
363 Self::get_resource_allocator_config(
364 vm.get_memory().end_addr(),
365 vm.get_guest_phys_addr_bits(),
366 )
367 }
368
build_vm<V, Vcpu>( mut components: VmComponents, _vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipAArch64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, _debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmAArch64, Vcpu: VcpuAArch64,369 fn build_vm<V, Vcpu>(
370 mut components: VmComponents,
371 _vm_evt_wrtube: &SendTube,
372 system_allocator: &mut SystemAllocator,
373 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
374 serial_jail: Option<Minijail>,
375 (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
376 mut vm: V,
377 ramoops_region: Option<arch::pstore::RamoopsRegion>,
378 devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
379 irq_chip: &mut dyn IrqChipAArch64,
380 vcpu_ids: &mut Vec<usize>,
381 dump_device_tree_blob: Option<PathBuf>,
382 _debugcon_jail: Option<Minijail>,
383 #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
384 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
385 where
386 V: VmAArch64,
387 Vcpu: VcpuAArch64,
388 {
389 let has_bios = matches!(components.vm_image, VmImage::Bios(_));
390 let mem = vm.get_memory().clone();
391
392 // separate out image loading from other setup to get a specific error for
393 // image loading
394 let mut initrd = None;
395 let payload = match components.vm_image {
396 VmImage::Bios(ref mut bios) => {
397 let image_size =
398 arch::load_image(&mem, bios, get_bios_addr(), AARCH64_BIOS_MAX_LEN)
399 .map_err(Error::BiosLoadFailure)?;
400 PayloadType::Bios {
401 entry: get_bios_addr(),
402 image_size: image_size as u64,
403 }
404 }
405 VmImage::Kernel(ref mut kernel_image) => {
406 let loaded_kernel = if let Ok(elf_kernel) = kernel_loader::load_elf(
407 &mem,
408 get_kernel_addr(),
409 kernel_image,
410 AARCH64_PHYS_MEM_START,
411 ) {
412 elf_kernel
413 } else {
414 kernel_loader::load_arm64_kernel(&mem, get_kernel_addr(), kernel_image)
415 .map_err(Error::KernelLoadFailure)?
416 };
417 let kernel_end = loaded_kernel.address_range.end;
418 initrd = match components.initrd_image {
419 Some(initrd_file) => {
420 let mut initrd_file = initrd_file;
421 let initrd_addr =
422 (kernel_end + (AARCH64_INITRD_ALIGN - 1)) & !(AARCH64_INITRD_ALIGN - 1);
423 let initrd_max_size =
424 components.memory_size - (initrd_addr - AARCH64_PHYS_MEM_START);
425 let initrd_addr = GuestAddress(initrd_addr);
426 let initrd_size =
427 arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
428 .map_err(Error::InitrdLoadFailure)?;
429 Some((initrd_addr, initrd_size))
430 }
431 None => None,
432 };
433 PayloadType::Kernel(loaded_kernel)
434 }
435 };
436
437 let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + components.memory_size);
438 let fdt_offset = fdt_address(memory_end, has_bios);
439
440 let mut use_pmu = vm
441 .get_hypervisor()
442 .check_capability(HypervisorCap::ArmPmuV3);
443 let vcpu_count = components.vcpu_count;
444 let mut has_pvtime = true;
445 let mut vcpus = Vec::with_capacity(vcpu_count);
446 let mut vcpu_init = Vec::with_capacity(vcpu_count);
447 for vcpu_id in 0..vcpu_count {
448 let vcpu: Vcpu = *vm
449 .create_vcpu(vcpu_id)
450 .map_err(Error::CreateVcpu)?
451 .downcast::<Vcpu>()
452 .map_err(|_| Error::DowncastVcpu)?;
453 let per_vcpu_init = if vm
454 .get_hypervisor()
455 .check_capability(HypervisorCap::HypervisorInitializedBootContext)
456 {
457 // No registers are initialized: VcpuInitAArch64.regs is an empty BTreeMap
458 Default::default()
459 } else {
460 Self::vcpu_init(
461 vcpu_id,
462 &payload,
463 fdt_offset,
464 components.hv_cfg.protection_type,
465 )
466 };
467 has_pvtime &= vcpu.has_pvtime_support();
468 vcpus.push(vcpu);
469 vcpu_ids.push(vcpu_id);
470 vcpu_init.push(per_vcpu_init);
471 }
472
473 // Initialize Vcpus after all Vcpu objects have been created.
474 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
475 vcpu.init(&Self::vcpu_features(vcpu_id, use_pmu))
476 .map_err(Error::VcpuInit)?;
477 }
478
479 irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
480
481 if has_pvtime {
482 let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
483 .build()
484 .map_err(Error::BuildPvtimeError)?;
485 vm.add_memory_region(
486 GuestAddress(AARCH64_PVTIME_IPA_START),
487 Box::new(pvtime_mem),
488 false,
489 false,
490 )
491 .map_err(Error::MapPvtimeError)?;
492 }
493
494 if components.hv_cfg.protection_type.loads_firmware() {
495 arch::load_image(
496 &mem,
497 &mut components
498 .pvm_fw
499 .expect("pvmfw must be available if ProtectionType loads it"),
500 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
501 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
502 )
503 .map_err(Error::CustomPvmFwLoadFailure)?;
504 } else if components.hv_cfg.protection_type.runs_firmware() {
505 // Tell the hypervisor to load the pVM firmware.
506 vm.load_protected_vm_firmware(
507 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
508 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
509 )
510 .map_err(Error::PvmFwLoadFailure)?;
511 }
512
513 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
514 use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
515 if has_pvtime {
516 vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
517 .map_err(Error::InitPvtimeError)?;
518 }
519 }
520
521 let mmio_bus = Arc::new(devices::Bus::new());
522
523 // ARM doesn't really use the io bus like x86, so just create an empty bus.
524 let io_bus = Arc::new(devices::Bus::new());
525
526 // Event used by PMDevice to notify crosvm that
527 // guest OS is trying to suspend.
528 let suspend_evt = Event::new().map_err(Error::CreateEvent)?;
529
530 let (pci_devices, others): (Vec<_>, Vec<_>) = devs
531 .into_iter()
532 .partition(|(dev, _)| dev.as_pci_device().is_some());
533
534 let pci_devices = pci_devices
535 .into_iter()
536 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
537 .collect();
538 let (pci, pci_irqs, mut pid_debug_label_map, _amls) = arch::generate_pci_root(
539 pci_devices,
540 irq_chip.as_irq_chip_mut(),
541 mmio_bus.clone(),
542 io_bus.clone(),
543 system_allocator,
544 &mut vm,
545 (devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
546 None,
547 #[cfg(feature = "swap")]
548 swap_controller,
549 )
550 .map_err(Error::CreatePciRoot)?;
551
552 let pci_root = Arc::new(Mutex::new(pci));
553 let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
554 let (platform_devices, _others): (Vec<_>, Vec<_>) = others
555 .into_iter()
556 .partition(|(dev, _)| dev.as_platform_device().is_some());
557
558 let platform_devices = platform_devices
559 .into_iter()
560 .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
561 .collect();
562 let (platform_devices, mut platform_pid_debug_label_map) =
563 arch::sys::unix::generate_platform_bus(
564 platform_devices,
565 irq_chip.as_irq_chip_mut(),
566 &mmio_bus,
567 system_allocator,
568 #[cfg(feature = "swap")]
569 swap_controller,
570 )
571 .map_err(Error::CreatePlatformBus)?;
572 pid_debug_label_map.append(&mut platform_pid_debug_label_map);
573
574 Self::add_arch_devs(
575 irq_chip.as_irq_chip_mut(),
576 &mmio_bus,
577 vcpu_count,
578 _vm_evt_wrtube,
579 )?;
580
581 let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
582 let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
583 arch::add_serial_devices(
584 components.hv_cfg.protection_type,
585 &mmio_bus,
586 com_evt_1_3.get_trigger(),
587 com_evt_2_4.get_trigger(),
588 serial_parameters,
589 serial_jail,
590 #[cfg(feature = "swap")]
591 swap_controller,
592 )
593 .map_err(Error::CreateSerialDevices)?;
594
595 let source = IrqEventSource {
596 device_id: Serial::device_id(),
597 queue_id: 0,
598 device_name: Serial::debug_label(),
599 };
600 irq_chip
601 .register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
602 .map_err(Error::RegisterIrqfd)?;
603 irq_chip
604 .register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
605 .map_err(Error::RegisterIrqfd)?;
606
607 mmio_bus
608 .insert(pci_bus, AARCH64_PCI_CFG_BASE, AARCH64_PCI_CFG_SIZE)
609 .map_err(Error::RegisterPci)?;
610
611 let mut cmdline = Self::get_base_linux_cmdline();
612 get_serial_cmdline(&mut cmdline, serial_parameters, "mmio")
613 .map_err(Error::GetSerialCmdline)?;
614 for param in components.extra_kernel_params {
615 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
616 }
617
618 if let Some(ramoops_region) = ramoops_region {
619 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
620 .map_err(Error::Cmdline)?;
621 }
622
623 let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
624
625 let pci_cfg = fdt::PciConfigRegion {
626 base: AARCH64_PCI_CFG_BASE,
627 size: AARCH64_PCI_CFG_SIZE,
628 };
629
630 let pci_ranges: Vec<fdt::PciRange> = system_allocator
631 .mmio_pools()
632 .iter()
633 .map(|range| fdt::PciRange {
634 space: fdt::PciAddressSpace::Memory64,
635 bus_address: range.start,
636 cpu_physical_address: range.start,
637 size: range.len().unwrap(),
638 prefetchable: false,
639 })
640 .collect();
641
642 let (bat_control, bat_mmio_base_and_irq) = match bat_type {
643 Some(BatteryType::Goldfish) => {
644 let bat_irq = AARCH64_BAT_IRQ;
645
646 // a dummy AML buffer. Aarch64 crosvm doesn't use ACPI.
647 let mut amls = Vec::new();
648 let (control_tube, mmio_base) = arch::sys::unix::add_goldfish_battery(
649 &mut amls,
650 bat_jail,
651 &mmio_bus,
652 irq_chip.as_irq_chip_mut(),
653 bat_irq,
654 system_allocator,
655 #[cfg(feature = "swap")]
656 swap_controller,
657 )
658 .map_err(Error::CreateBatDevices)?;
659 (
660 Some(BatControl {
661 type_: BatteryType::Goldfish,
662 control_tube,
663 }),
664 Some((mmio_base, bat_irq)),
665 )
666 }
667 None => (None, None),
668 };
669
670 let vmwdt_cfg = fdt::VmWdtConfig {
671 base: AARCH64_VMWDT_ADDR,
672 size: AARCH64_VMWDT_SIZE,
673 clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
674 timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
675 };
676
677 fdt::create_fdt(
678 AARCH64_FDT_MAX_SIZE as usize,
679 &mem,
680 pci_irqs,
681 pci_cfg,
682 &pci_ranges,
683 vcpu_count as u32,
684 components.cpu_clusters,
685 components.cpu_capacity,
686 fdt_offset,
687 cmdline.as_str(),
688 (payload.entry(), payload.size() as usize),
689 initrd,
690 components.android_fstab,
691 irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
692 use_pmu,
693 psci_version,
694 components.swiotlb.map(|size| {
695 (
696 get_swiotlb_addr(components.memory_size, vm.get_hypervisor()),
697 size,
698 )
699 }),
700 bat_mmio_base_and_irq,
701 vmwdt_cfg,
702 dump_device_tree_blob,
703 &|writer, phandles| vm.create_fdt(writer, phandles),
704 )
705 .map_err(Error::CreateFdt)?;
706
707 vm.init_arch(
708 payload.entry(),
709 fdt_offset,
710 AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
711 )
712 .map_err(Error::InitVmError)?;
713
714 Ok(RunnableLinuxVm {
715 vm,
716 vcpu_count,
717 vcpus: Some(vcpus),
718 vcpu_init,
719 vcpu_affinity: components.vcpu_affinity,
720 no_smt: components.no_smt,
721 irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
722 has_bios,
723 io_bus,
724 mmio_bus,
725 pid_debug_label_map,
726 suspend_evt,
727 rt_cpus: components.rt_cpus,
728 delay_rt: components.delay_rt,
729 bat_control,
730 #[cfg(all(target_arch = "aarch64", feature = "gdb"))]
731 gdb: components.gdb,
732 pm: None,
733 resume_notify_devices: Vec::new(),
734 root_config: pci_root,
735 platform_devices,
736 hotplug_bus: BTreeMap::new(),
737 devices_thread: None,
738 vm_request_tube: None,
739 })
740 }
741
configure_vcpu<V: Vm>( _vm: &V, _hypervisor: &dyn Hypervisor, _irq_chip: &mut dyn IrqChipAArch64, vcpu: &mut dyn VcpuAArch64, vcpu_init: VcpuInitAArch64, _vcpu_id: usize, _num_cpus: usize, _has_bios: bool, _cpu_config: Option<CpuConfigAArch64>, ) -> std::result::Result<(), Self::Error>742 fn configure_vcpu<V: Vm>(
743 _vm: &V,
744 _hypervisor: &dyn Hypervisor,
745 _irq_chip: &mut dyn IrqChipAArch64,
746 vcpu: &mut dyn VcpuAArch64,
747 vcpu_init: VcpuInitAArch64,
748 _vcpu_id: usize,
749 _num_cpus: usize,
750 _has_bios: bool,
751 _cpu_config: Option<CpuConfigAArch64>,
752 ) -> std::result::Result<(), Self::Error> {
753 for (reg, value) in vcpu_init.regs.iter() {
754 vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
755 }
756 Ok(())
757 }
758
register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>( _linux: &mut RunnableLinuxVm<V, Vcpu>, _device: Box<dyn PciDevice>, _minijail: Option<Minijail>, _resources: &mut SystemAllocator, _tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] _swap_controller: Option<&swap::SwapController>, ) -> std::result::Result<PciAddress, Self::Error>759 fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
760 _linux: &mut RunnableLinuxVm<V, Vcpu>,
761 _device: Box<dyn PciDevice>,
762 _minijail: Option<Minijail>,
763 _resources: &mut SystemAllocator,
764 _tube: &mpsc::Sender<PciRootCommand>,
765 #[cfg(feature = "swap")] _swap_controller: Option<&swap::SwapController>,
766 ) -> std::result::Result<PciAddress, Self::Error> {
767 // hotplug function isn't verified on AArch64, so set it unsupported here.
768 Err(Error::Unsupported)
769 }
770 }
771
772 #[cfg(all(target_arch = "aarch64", feature = "gdb"))]
773 impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
774 type Error = Error;
775
read_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>776 fn read_memory(
777 _vcpu: &T,
778 guest_mem: &GuestMemory,
779 vaddr: GuestAddress,
780 len: usize,
781 ) -> Result<Vec<u8>> {
782 let mut buf = vec![0; len];
783
784 guest_mem
785 .read_exact_at_addr(&mut buf, vaddr)
786 .map_err(Error::ReadGuestMemory)?;
787
788 Ok(buf)
789 }
790
write_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>791 fn write_memory(
792 _vcpu: &T,
793 guest_mem: &GuestMemory,
794 vaddr: GuestAddress,
795 buf: &[u8],
796 ) -> Result<()> {
797 guest_mem
798 .write_all_at_addr(buf, vaddr)
799 .map_err(Error::WriteGuestMemory)
800 }
801
read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers>802 fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
803 let mut regs: <GdbArch as Arch>::Registers = Default::default();
804
805 vcpu.get_gdb_registers(&mut regs).map_err(Error::ReadRegs)?;
806
807 Ok(regs)
808 }
809
write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()>810 fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
811 vcpu.set_gdb_registers(regs).map_err(Error::WriteRegs)
812 }
813
read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>>814 fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
815 let mut reg = vec![0; std::mem::size_of::<u128>()];
816 let size = vcpu
817 .get_gdb_register(reg_id, reg.as_mut_slice())
818 .map_err(Error::ReadReg)?;
819 reg.truncate(size);
820 Ok(reg)
821 }
822
write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()>823 fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
824 vcpu.set_gdb_register(reg_id, data).map_err(Error::WriteReg)
825 }
826
enable_singlestep(vcpu: &T) -> Result<()>827 fn enable_singlestep(vcpu: &T) -> Result<()> {
828 const SINGLE_STEP: bool = true;
829 vcpu.set_guest_debug(&[], SINGLE_STEP)
830 .map_err(Error::EnableSinglestep)
831 }
832
get_max_hw_breakpoints(vcpu: &T) -> Result<usize>833 fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
834 vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
835 }
836
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()>837 fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
838 const SINGLE_STEP: bool = false;
839 vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
840 .map_err(Error::SetHwBreakpoint)
841 }
842 }
843
844 impl AArch64 {
845 /// This returns a base part of the kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline846 fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
847 let mut cmdline = kernel_cmdline::Cmdline::new(base::pagesize());
848 cmdline.insert_str("panic=-1").unwrap();
849 cmdline
850 }
851
852 /// Returns a system resource allocator configuration.
853 ///
854 /// # Arguments
855 ///
856 /// * `memory_end` - The first address beyond the end of guest memory.
857 /// * `guest_phys_addr_bits` - Size of guest physical addresses (IPA) in bits.
get_resource_allocator_config( memory_end: GuestAddress, guest_phys_addr_bits: u8, ) -> SystemAllocatorConfig858 fn get_resource_allocator_config(
859 memory_end: GuestAddress,
860 guest_phys_addr_bits: u8,
861 ) -> SystemAllocatorConfig {
862 let guest_phys_end = 1u64 << guest_phys_addr_bits;
863 // The platform MMIO region is immediately past the end of RAM.
864 let plat_mmio_base = memory_end.offset();
865 let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
866 // The high MMIO region is the rest of the address space after the platform MMIO region.
867 let high_mmio_base = plat_mmio_base + plat_mmio_size;
868 let high_mmio_size = guest_phys_end
869 .checked_sub(high_mmio_base)
870 .unwrap_or_else(|| {
871 panic!(
872 "guest_phys_end {:#x} < high_mmio_base {:#x}",
873 guest_phys_end, high_mmio_base,
874 );
875 });
876 SystemAllocatorConfig {
877 io: None,
878 low_mmio: AddressRange::from_start_and_size(AARCH64_MMIO_BASE, AARCH64_MMIO_SIZE)
879 .expect("invalid mmio region"),
880 high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
881 .expect("invalid high mmio region"),
882 platform_mmio: Some(
883 AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
884 .expect("invalid platform mmio region"),
885 ),
886 first_irq: AARCH64_IRQ_BASE,
887 }
888 }
889
890 /// This adds any early platform devices for this architecture.
891 ///
892 /// # Arguments
893 ///
894 /// * `irq_chip` - The IRQ chip to add irqs to.
895 /// * `bus` - The bus to add devices to.
896 /// * `vcpu_count` - The number of virtual CPUs for this guest VM
897 /// * `vm_evt_wrtube` - The notification channel
add_arch_devs( irq_chip: &mut dyn IrqChip, bus: &Bus, vcpu_count: usize, vm_evt_wrtube: &SendTube, ) -> Result<()>898 fn add_arch_devs(
899 irq_chip: &mut dyn IrqChip,
900 bus: &Bus,
901 vcpu_count: usize,
902 vm_evt_wrtube: &SendTube,
903 ) -> Result<()> {
904 let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
905 let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
906 irq_chip
907 .register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
908 .map_err(Error::RegisterIrqfd)?;
909
910 bus.insert(
911 Arc::new(Mutex::new(rtc)),
912 AARCH64_RTC_ADDR,
913 AARCH64_RTC_SIZE,
914 )
915 .expect("failed to add rtc device");
916
917 let vm_wdt = Arc::new(Mutex::new(
918 devices::vmwdt::Vmwdt::new(vcpu_count, vm_evt_wrtube.try_clone().unwrap()).unwrap(),
919 ));
920 bus.insert(vm_wdt, AARCH64_VMWDT_ADDR, AARCH64_VMWDT_SIZE)
921 .expect("failed to add vmwdt device");
922
923 Ok(())
924 }
925
926 /// Get ARM-specific features for vcpu with index `vcpu_id`.
927 ///
928 /// # Arguments
929 ///
930 /// * `vcpu_id` - The VM's index for `vcpu`.
931 /// * `use_pmu` - Should `vcpu` be configured to use the Performance Monitor Unit.
vcpu_features(vcpu_id: usize, use_pmu: bool) -> Vec<VcpuFeature>932 fn vcpu_features(vcpu_id: usize, use_pmu: bool) -> Vec<VcpuFeature> {
933 let mut features = vec![VcpuFeature::PsciV0_2];
934 if use_pmu {
935 features.push(VcpuFeature::PmuV3);
936 }
937 // Non-boot cpus are powered off initially
938 if vcpu_id != 0 {
939 features.push(VcpuFeature::PowerOff);
940 }
941
942 features
943 }
944
945 /// Get initial register state for vcpu with index `vcpu_id`.
946 ///
947 /// # Arguments
948 ///
949 /// * `vcpu_id` - The VM's index for `vcpu`.
vcpu_init( vcpu_id: usize, payload: &PayloadType, fdt_address: GuestAddress, protection_type: ProtectionType, ) -> VcpuInitAArch64950 fn vcpu_init(
951 vcpu_id: usize,
952 payload: &PayloadType,
953 fdt_address: GuestAddress,
954 protection_type: ProtectionType,
955 ) -> VcpuInitAArch64 {
956 let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
957
958 // All interrupts masked
959 let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
960 regs.insert(VcpuRegAArch64::Pstate, pstate);
961
962 // Other cpus are powered off initially
963 if vcpu_id == 0 {
964 let entry_addr = if protection_type.loads_firmware() {
965 Some(AARCH64_PROTECTED_VM_FW_START)
966 } else if protection_type.runs_firmware() {
967 None // Initial PC value is set by the hypervisor
968 } else {
969 Some(payload.entry().offset())
970 };
971
972 /* PC -- entry point */
973 if let Some(entry) = entry_addr {
974 regs.insert(VcpuRegAArch64::Pc, entry);
975 }
976
977 /* X0 -- fdt address */
978 regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
979
980 if protection_type.runs_firmware() {
981 /* X1 -- payload entry point */
982 regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
983
984 /* X2 -- image size */
985 regs.insert(VcpuRegAArch64::X(2), payload.size());
986 }
987 }
988
989 VcpuInitAArch64 { regs }
990 }
991 }
992
993 pub struct MsrHandlers;
994
995 impl MsrHandlers {
new() -> Self996 pub fn new() -> Self {
997 Self {}
998 }
999
read(&self, _index: u32) -> Option<u64>1000 pub fn read(&self, _index: u32) -> Option<u64> {
1001 None
1002 }
1003
write(&self, _index: u32, _data: u64) -> Option<()>1004 pub fn write(&self, _index: u32, _data: u64) -> Option<()> {
1005 None
1006 }
1007
add_handler( &mut self, _index: u32, _msr_config: MsrConfig, _cpu_id: usize, ) -> std::result::Result<(), MsrExitHandlerError>1008 pub fn add_handler(
1009 &mut self,
1010 _index: u32,
1011 _msr_config: MsrConfig,
1012 _cpu_id: usize,
1013 ) -> std::result::Result<(), MsrExitHandlerError> {
1014 Ok(())
1015 }
1016 }
1017
1018 #[cfg(test)]
1019 mod tests {
1020 use super::*;
1021
1022 #[test]
vcpu_init_unprotected_kernel()1023 fn vcpu_init_unprotected_kernel() {
1024 let payload = PayloadType::Kernel(LoadedKernel {
1025 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1026 size: 0x1000,
1027 entry: GuestAddress(0x8080_0000),
1028 });
1029 let fdt_address = GuestAddress(0x1234);
1030 let prot = ProtectionType::Unprotected;
1031
1032 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot);
1033
1034 // PC: kernel image entry point
1035 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
1036
1037 // X0: fdt_offset
1038 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1039 }
1040
1041 #[test]
vcpu_init_unprotected_bios()1042 fn vcpu_init_unprotected_bios() {
1043 let payload = PayloadType::Bios {
1044 entry: GuestAddress(0x8020_0000),
1045 image_size: 0x1000,
1046 };
1047 let fdt_address = GuestAddress(0x1234);
1048 let prot = ProtectionType::Unprotected;
1049
1050 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot);
1051
1052 // PC: bios image entry point
1053 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
1054
1055 // X0: fdt_offset
1056 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1057 }
1058
1059 #[test]
vcpu_init_protected_kernel()1060 fn vcpu_init_protected_kernel() {
1061 let payload = PayloadType::Kernel(LoadedKernel {
1062 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1063 size: 0x1000,
1064 entry: GuestAddress(0x8080_0000),
1065 });
1066 let fdt_address = GuestAddress(0x1234);
1067 let prot = ProtectionType::Protected;
1068
1069 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot);
1070
1071 // The hypervisor provides the initial value of PC, so PC should not be present in the
1072 // vcpu_init register map.
1073 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
1074
1075 // X0: fdt_offset
1076 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1077
1078 // X1: kernel image entry point
1079 assert_eq!(
1080 vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
1081 Some(&0x8080_0000)
1082 );
1083
1084 // X2: image size
1085 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
1086 }
1087 }
1088