1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::BTreeMap;
6 use std::collections::HashSet;
7 use std::fs::File;
8 use std::path::PathBuf;
9
10 use arch::apply_device_tree_overlays;
11 use arch::serial::SerialDeviceInfo;
12 use arch::CpuSet;
13 use arch::DtbOverlay;
14 #[cfg(any(target_os = "android", target_os = "linux"))]
15 use arch::PlatformBusResources;
16 use cros_fdt::Error;
17 use cros_fdt::Fdt;
18 use cros_fdt::Result;
19 // This is a Battery related constant
20 use devices::bat::GOLDFISHBAT_MMIO_LEN;
21 use devices::pl030::PL030_AMBA_ID;
22 use devices::IommuDevType;
23 use devices::PciAddress;
24 use devices::PciInterruptPin;
25 use hypervisor::PsciVersion;
26 use hypervisor::VmAArch64;
27 use hypervisor::PSCI_0_2;
28 use hypervisor::PSCI_1_0;
29 use rand::rngs::OsRng;
30 use rand::RngCore;
31 use vm_memory::GuestAddress;
32 use vm_memory::GuestMemory;
33
34 // These are GIC address-space location constants.
35 use crate::AARCH64_GIC_CPUI_BASE;
36 use crate::AARCH64_GIC_CPUI_SIZE;
37 use crate::AARCH64_GIC_DIST_BASE;
38 use crate::AARCH64_GIC_DIST_SIZE;
39 use crate::AARCH64_GIC_REDIST_SIZE;
40 use crate::AARCH64_PMU_IRQ;
41 use crate::AARCH64_PROTECTED_VM_FW_START;
42 // These are RTC related constants
43 use crate::AARCH64_RTC_ADDR;
44 use crate::AARCH64_RTC_IRQ;
45 use crate::AARCH64_RTC_SIZE;
46 // These are serial device related constants.
47 use crate::AARCH64_SERIAL_SPEED;
48 use crate::AARCH64_VIRTFREQ_BASE;
49 use crate::AARCH64_VIRTFREQ_SIZE;
50
51 // This is an arbitrary number to specify the node for the GIC.
52 // If we had a more complex interrupt architecture, then we'd need an enum for
53 // these.
54 const PHANDLE_GIC: u32 = 1;
55 const PHANDLE_RESTRICTED_DMA_POOL: u32 = 2;
56
57 // CPUs are assigned phandles starting with this number.
58 const PHANDLE_CPU0: u32 = 0x100;
59
60 const PHANDLE_OPP_DOMAIN_BASE: u32 = 0x1000;
61
62 // pKVM pvIOMMUs are assigned phandles starting with this number.
63 const PHANDLE_PKVM_PVIOMMU: u32 = 0x2000;
64
65 // These are specified by the Linux GIC bindings
66 const GIC_FDT_IRQ_NUM_CELLS: u32 = 3;
67 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
68 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
69 const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8;
70 const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << GIC_FDT_IRQ_PPI_CPU_SHIFT;
71 const IRQ_TYPE_EDGE_RISING: u32 = 0x00000001;
72 const IRQ_TYPE_LEVEL_HIGH: u32 = 0x00000004;
73 const IRQ_TYPE_LEVEL_LOW: u32 = 0x00000008;
74
create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()>75 fn create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()> {
76 let mut mem_reg_prop = Vec::new();
77 let mut previous_memory_region_end = None;
78 let mut regions = guest_mem.guest_memory_regions();
79 regions.sort();
80 for region in regions {
81 if region.0.offset() == AARCH64_PROTECTED_VM_FW_START {
82 continue;
83 }
84 // Merge with the previous region if possible.
85 if let Some(previous_end) = previous_memory_region_end {
86 if region.0 == previous_end {
87 *mem_reg_prop.last_mut().unwrap() += region.1 as u64;
88 previous_memory_region_end =
89 Some(previous_end.checked_add(region.1 as u64).unwrap());
90 continue;
91 }
92 assert!(region.0 > previous_end, "Memory regions overlap");
93 }
94
95 mem_reg_prop.push(region.0.offset());
96 mem_reg_prop.push(region.1 as u64);
97 previous_memory_region_end = Some(region.0.checked_add(region.1 as u64).unwrap());
98 }
99
100 let memory_node = fdt.root_mut().subnode_mut("memory")?;
101 memory_node.set_prop("device_type", "memory")?;
102 memory_node.set_prop("reg", mem_reg_prop)?;
103 Ok(())
104 }
105
create_resv_memory_node( fdt: &mut Fdt, resv_addr_and_size: (Option<GuestAddress>, u64), ) -> Result<u32>106 fn create_resv_memory_node(
107 fdt: &mut Fdt,
108 resv_addr_and_size: (Option<GuestAddress>, u64),
109 ) -> Result<u32> {
110 let (resv_addr, resv_size) = resv_addr_and_size;
111
112 let resv_memory_node = fdt.root_mut().subnode_mut("reserved-memory")?;
113 resv_memory_node.set_prop("#address-cells", 0x2u32)?;
114 resv_memory_node.set_prop("#size-cells", 0x2u32)?;
115 resv_memory_node.set_prop("ranges", ())?;
116
117 let restricted_dma_pool_node = if let Some(resv_addr) = resv_addr {
118 let node =
119 resv_memory_node.subnode_mut(&format!("restricted_dma_reserved@{:x}", resv_addr.0))?;
120 node.set_prop("reg", &[resv_addr.0, resv_size])?;
121 node
122 } else {
123 let node = resv_memory_node.subnode_mut("restricted_dma_reserved")?;
124 node.set_prop("size", resv_size)?;
125 node
126 };
127 restricted_dma_pool_node.set_prop("phandle", PHANDLE_RESTRICTED_DMA_POOL)?;
128 restricted_dma_pool_node.set_prop("compatible", "restricted-dma-pool")?;
129 restricted_dma_pool_node.set_prop("alignment", base::pagesize() as u64)?;
130 Ok(PHANDLE_RESTRICTED_DMA_POOL)
131 }
132
create_cpu_nodes( fdt: &mut Fdt, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, dynamic_power_coefficient: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, ) -> Result<()>133 fn create_cpu_nodes(
134 fdt: &mut Fdt,
135 num_cpus: u32,
136 cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
137 cpu_clusters: Vec<CpuSet>,
138 cpu_capacity: BTreeMap<usize, u32>,
139 dynamic_power_coefficient: BTreeMap<usize, u32>,
140 cpu_frequencies: BTreeMap<usize, Vec<u32>>,
141 ) -> Result<()> {
142 let root_node = fdt.root_mut();
143 let cpus_node = root_node.subnode_mut("cpus")?;
144 cpus_node.set_prop("#address-cells", 0x1u32)?;
145 cpus_node.set_prop("#size-cells", 0x0u32)?;
146
147 for cpu_id in 0..num_cpus {
148 let reg = u32::try_from(
149 cpu_mpidr_generator(cpu_id.try_into().unwrap()).ok_or(Error::PropertyValueInvalid)?,
150 )
151 .map_err(|_| Error::PropertyValueTooLarge)?;
152 let cpu_name = format!("cpu@{:x}", reg);
153 let cpu_node = cpus_node.subnode_mut(&cpu_name)?;
154 cpu_node.set_prop("device_type", "cpu")?;
155 cpu_node.set_prop("compatible", "arm,armv8")?;
156 if num_cpus > 1 {
157 cpu_node.set_prop("enable-method", "psci")?;
158 }
159 cpu_node.set_prop("reg", reg)?;
160 cpu_node.set_prop("phandle", PHANDLE_CPU0 + cpu_id)?;
161
162 if let Some(pwr_coefficient) = dynamic_power_coefficient.get(&(cpu_id as usize)) {
163 cpu_node.set_prop("dynamic-power-coefficient", *pwr_coefficient)?;
164 }
165 if let Some(capacity) = cpu_capacity.get(&(cpu_id as usize)) {
166 cpu_node.set_prop("capacity-dmips-mhz", *capacity)?;
167 }
168 // Placed inside cpu nodes for ease of parsing for some secure firmwares(PvmFw).
169 if let Some(frequencies) = cpu_frequencies.get(&(cpu_id as usize)) {
170 cpu_node.set_prop("operating-points-v2", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
171 let opp_table_node = cpu_node.subnode_mut(&format!("opp_table{}", cpu_id))?;
172 opp_table_node.set_prop("phandle", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
173 opp_table_node.set_prop("compatible", "operating-points-v2")?;
174 for freq in frequencies.iter() {
175 let opp_hz = (*freq) as u64 * 1000;
176 let opp_node = opp_table_node.subnode_mut(&format!("opp{}", opp_hz))?;
177 opp_node.set_prop("opp-hz", opp_hz)?;
178 }
179 }
180 }
181
182 if !cpu_clusters.is_empty() {
183 let cpu_map_node = cpus_node.subnode_mut("cpu-map")?;
184 for (cluster_idx, cpus) in cpu_clusters.iter().enumerate() {
185 let cluster_node = cpu_map_node.subnode_mut(&format!("cluster{}", cluster_idx))?;
186 for (core_idx, cpu_id) in cpus.iter().enumerate() {
187 let core_node = cluster_node.subnode_mut(&format!("core{}", core_idx))?;
188 core_node.set_prop("cpu", PHANDLE_CPU0 + *cpu_id as u32)?;
189 }
190 }
191 }
192
193 Ok(())
194 }
195
create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()>196 fn create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()> {
197 let mut gic_reg_prop = [AARCH64_GIC_DIST_BASE, AARCH64_GIC_DIST_SIZE, 0, 0];
198
199 let intc_node = fdt.root_mut().subnode_mut("intc")?;
200 if is_gicv3 {
201 intc_node.set_prop("compatible", "arm,gic-v3")?;
202 gic_reg_prop[2] = AARCH64_GIC_DIST_BASE - (AARCH64_GIC_REDIST_SIZE * num_cpus);
203 gic_reg_prop[3] = AARCH64_GIC_REDIST_SIZE * num_cpus;
204 } else {
205 intc_node.set_prop("compatible", "arm,cortex-a15-gic")?;
206 gic_reg_prop[2] = AARCH64_GIC_CPUI_BASE;
207 gic_reg_prop[3] = AARCH64_GIC_CPUI_SIZE;
208 }
209 intc_node.set_prop("#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS)?;
210 intc_node.set_prop("interrupt-controller", ())?;
211 intc_node.set_prop("reg", &gic_reg_prop)?;
212 intc_node.set_prop("phandle", PHANDLE_GIC)?;
213 intc_node.set_prop("#address-cells", 2u32)?;
214 intc_node.set_prop("#size-cells", 2u32)?;
215 add_symbols_entry(fdt, "intc", "/intc")?;
216 Ok(())
217 }
218
create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>219 fn create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
220 // These are fixed interrupt numbers for the timer device.
221 let irqs = [13, 14, 11, 10];
222 let compatible = "arm,armv8-timer";
223 let cpu_mask: u32 =
224 (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
225
226 let mut timer_reg_cells = Vec::new();
227 for &irq in &irqs {
228 timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
229 timer_reg_cells.push(irq);
230 timer_reg_cells.push(cpu_mask | IRQ_TYPE_LEVEL_LOW);
231 }
232
233 let timer_node = fdt.root_mut().subnode_mut("timer")?;
234 timer_node.set_prop("compatible", compatible)?;
235 timer_node.set_prop("interrupts", timer_reg_cells)?;
236 timer_node.set_prop("always-on", ())?;
237 Ok(())
238 }
239
create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()>240 fn create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()> {
241 let compatible = "virtual,android-v-only-cpufreq";
242 let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
243 let reg = [AARCH64_VIRTFREQ_BASE, AARCH64_VIRTFREQ_SIZE * num_cpus];
244
245 vcf_node.set_prop("compatible", compatible)?;
246 vcf_node.set_prop("reg", ®)?;
247 Ok(())
248 }
249
create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>250 fn create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
251 let compatible = "arm,armv8-pmuv3";
252 let cpu_mask: u32 =
253 (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
254 let irq = [
255 GIC_FDT_IRQ_TYPE_PPI,
256 AARCH64_PMU_IRQ,
257 cpu_mask | IRQ_TYPE_LEVEL_HIGH,
258 ];
259
260 let pmu_node = fdt.root_mut().subnode_mut("pmu")?;
261 pmu_node.set_prop("compatible", compatible)?;
262 pmu_node.set_prop("interrupts", &irq)?;
263 Ok(())
264 }
265
create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()>266 fn create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()> {
267 let serial_reg_prop = [addr, size];
268 let irq = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_EDGE_RISING];
269
270 let serial_node = fdt
271 .root_mut()
272 .subnode_mut(&format!("U6_16550A@{:x}", addr))?;
273 serial_node.set_prop("compatible", "ns16550a")?;
274 serial_node.set_prop("reg", &serial_reg_prop)?;
275 serial_node.set_prop("clock-frequency", AARCH64_SERIAL_SPEED)?;
276 serial_node.set_prop("interrupts", &irq)?;
277
278 Ok(())
279 }
280
create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()>281 fn create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()> {
282 for dev in serial_devices {
283 create_serial_node(fdt, dev.address, dev.size, dev.irq)?;
284 }
285
286 Ok(())
287 }
288
psci_compatible(version: &PsciVersion) -> Vec<&str>289 fn psci_compatible(version: &PsciVersion) -> Vec<&str> {
290 // The PSCI kernel driver only supports compatible strings for the following
291 // backward-compatible versions.
292 let supported = [(PSCI_1_0, "arm,psci-1.0"), (PSCI_0_2, "arm,psci-0.2")];
293
294 let mut compatible: Vec<_> = supported
295 .iter()
296 .filter(|&(v, _)| *version >= *v)
297 .map(|&(_, c)| c)
298 .collect();
299
300 // The PSCI kernel driver also supports PSCI v0.1, which is NOT forward-compatible.
301 if compatible.is_empty() {
302 compatible = vec!["arm,psci"];
303 }
304
305 compatible
306 }
307
create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()>308 fn create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()> {
309 let compatible = psci_compatible(version);
310 let psci_node = fdt.root_mut().subnode_mut("psci")?;
311 psci_node.set_prop("compatible", compatible.as_slice())?;
312 // Only support aarch64 guest
313 psci_node.set_prop("method", "hvc")?;
314 Ok(())
315 }
316
create_chosen_node( fdt: &mut Fdt, cmdline: &str, initrd: Option<(GuestAddress, usize)>, stdout_path: Option<&str>, ) -> Result<()>317 fn create_chosen_node(
318 fdt: &mut Fdt,
319 cmdline: &str,
320 initrd: Option<(GuestAddress, usize)>,
321 stdout_path: Option<&str>,
322 ) -> Result<()> {
323 let chosen_node = fdt.root_mut().subnode_mut("chosen")?;
324 chosen_node.set_prop("linux,pci-probe-only", 1u32)?;
325 chosen_node.set_prop("bootargs", cmdline)?;
326 if let Some(stdout_path) = stdout_path {
327 // Used by android bootloader for boot console output
328 chosen_node.set_prop("stdout-path", stdout_path)?;
329 }
330
331 let mut kaslr_seed_bytes = [0u8; 8];
332 OsRng.fill_bytes(&mut kaslr_seed_bytes);
333 let kaslr_seed = u64::from_le_bytes(kaslr_seed_bytes);
334 chosen_node.set_prop("kaslr-seed", kaslr_seed)?;
335
336 let mut rng_seed_bytes = [0u8; 256];
337 OsRng.fill_bytes(&mut rng_seed_bytes);
338 chosen_node.set_prop("rng-seed", &rng_seed_bytes)?;
339
340 if let Some((initrd_addr, initrd_size)) = initrd {
341 let initrd_start = initrd_addr.offset() as u32;
342 let initrd_end = initrd_start + initrd_size as u32;
343 chosen_node.set_prop("linux,initrd-start", initrd_start)?;
344 chosen_node.set_prop("linux,initrd-end", initrd_end)?;
345 }
346
347 Ok(())
348 }
349
create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()>350 fn create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()> {
351 let addr: u32 = addr
352 .offset()
353 .try_into()
354 .map_err(|_| Error::PropertyValueTooLarge)?;
355 let size: u32 = size.try_into().map_err(|_| Error::PropertyValueTooLarge)?;
356
357 let config_node = fdt.root_mut().subnode_mut("config")?;
358 config_node.set_prop("kernel-address", addr)?;
359 config_node.set_prop("kernel-size", size)?;
360 Ok(())
361 }
362
create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()>363 fn create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()> {
364 let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
365 vcf_node.set_prop("compatible", "virtual,kvm-cpufreq")?;
366 Ok(())
367 }
368
369 #[cfg(any(target_os = "android", target_os = "linux"))]
get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>>370 fn get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>> {
371 let mut ids = HashSet::new();
372
373 for res in platform_dev_resources {
374 for iommu in &res.iommus {
375 if let (IommuDevType::PkvmPviommu, Some(id), _) = iommu {
376 ids.insert(*id);
377 }
378 }
379 }
380
381 Ok(Vec::from_iter(ids))
382 }
383
create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32>384 fn create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32> {
385 let name = format!("pviommu{index}");
386 let phandle = PHANDLE_PKVM_PVIOMMU
387 .checked_add(index.try_into().unwrap())
388 .unwrap();
389
390 let iommu_node = fdt.root_mut().subnode_mut(&name)?;
391 iommu_node.set_prop("phandle", phandle)?;
392 iommu_node.set_prop("#iommu-cells", 1u32)?;
393 iommu_node.set_prop("compatible", "pkvm,pviommu")?;
394 iommu_node.set_prop("id", id)?;
395
396 Ok(phandle)
397 }
398
399 /// PCI host controller address range.
400 ///
401 /// This represents a single entry in the "ranges" property for a PCI host controller.
402 ///
403 /// See [PCI Bus Binding to Open Firmware](https://www.openfirmware.info/data/docs/bus.pci.pdf)
404 /// and https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt
405 /// for more information.
406 #[derive(Copy, Clone)]
407 pub struct PciRange {
408 pub space: PciAddressSpace,
409 pub bus_address: u64,
410 pub cpu_physical_address: u64,
411 pub size: u64,
412 pub prefetchable: bool,
413 }
414
415 /// PCI address space.
416 #[derive(Copy, Clone)]
417 #[allow(dead_code)]
418 pub enum PciAddressSpace {
419 /// PCI configuration space
420 Configuration = 0b00,
421 /// I/O space
422 Io = 0b01,
423 /// 32-bit memory space
424 Memory = 0b10,
425 /// 64-bit memory space
426 Memory64 = 0b11,
427 }
428
429 /// Location of memory-mapped PCI configuration space.
430 #[derive(Copy, Clone)]
431 pub struct PciConfigRegion {
432 /// Physical address of the base of the memory-mapped PCI configuration region.
433 pub base: u64,
434 /// Size of the PCI configuration region in bytes.
435 pub size: u64,
436 }
437
438 /// Location of memory-mapped vm watchdog
439 #[derive(Copy, Clone)]
440 pub struct VmWdtConfig {
441 /// Physical address of the base of the memory-mapped vm watchdog region.
442 pub base: u64,
443 /// Size of the vm watchdog region in bytes.
444 pub size: u64,
445 /// The internal clock frequency of the watchdog.
446 pub clock_hz: u32,
447 /// The expiration timeout measured in seconds.
448 pub timeout_sec: u32,
449 }
450
create_pci_nodes( fdt: &mut Fdt, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, cfg: PciConfigRegion, ranges: &[PciRange], dma_pool_phandle: Option<u32>, ) -> Result<()>451 fn create_pci_nodes(
452 fdt: &mut Fdt,
453 pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
454 cfg: PciConfigRegion,
455 ranges: &[PciRange],
456 dma_pool_phandle: Option<u32>,
457 ) -> Result<()> {
458 // Add devicetree nodes describing a PCI generic host controller.
459 // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
460 // and "PCI Bus Binding to IEEE Std 1275-1994".
461 let ranges: Vec<u32> = ranges
462 .iter()
463 .flat_map(|r| {
464 let ss = r.space as u32;
465 let p = r.prefetchable as u32;
466 [
467 // BUS_ADDRESS(3) encoded as defined in OF PCI Bus Binding
468 (ss << 24) | (p << 30),
469 (r.bus_address >> 32) as u32,
470 r.bus_address as u32,
471 // CPU_PHYSICAL(2)
472 (r.cpu_physical_address >> 32) as u32,
473 r.cpu_physical_address as u32,
474 // SIZE(2)
475 (r.size >> 32) as u32,
476 r.size as u32,
477 ]
478 })
479 .collect();
480
481 let bus_range = [0u32, 0u32]; // Only bus 0
482 let reg = [cfg.base, cfg.size];
483
484 let mut interrupts: Vec<u32> = Vec::new();
485 let mut masks: Vec<u32> = Vec::new();
486
487 for (address, irq_num, irq_pin) in pci_irqs.iter() {
488 // PCI_DEVICE(3)
489 interrupts.push(address.to_config_address(0, 8));
490 interrupts.push(0);
491 interrupts.push(0);
492
493 // INT#(1)
494 interrupts.push(irq_pin.to_mask() + 1);
495
496 // CONTROLLER(PHANDLE)
497 interrupts.push(PHANDLE_GIC);
498 interrupts.push(0);
499 interrupts.push(0);
500
501 // CONTROLLER_DATA(3)
502 interrupts.push(GIC_FDT_IRQ_TYPE_SPI);
503 interrupts.push(*irq_num);
504 interrupts.push(IRQ_TYPE_LEVEL_HIGH);
505
506 // PCI_DEVICE(3)
507 masks.push(0xf800); // bits 11..15 (device)
508 masks.push(0);
509 masks.push(0);
510
511 // INT#(1)
512 masks.push(0x7); // allow INTA#-INTD# (1 | 2 | 3 | 4)
513 }
514
515 let pci_node = fdt.root_mut().subnode_mut("pci")?;
516 pci_node.set_prop("compatible", "pci-host-cam-generic")?;
517 pci_node.set_prop("device_type", "pci")?;
518 pci_node.set_prop("ranges", ranges)?;
519 pci_node.set_prop("bus-range", &bus_range)?;
520 pci_node.set_prop("#address-cells", 3u32)?;
521 pci_node.set_prop("#size-cells", 2u32)?;
522 pci_node.set_prop("reg", ®)?;
523 pci_node.set_prop("#interrupt-cells", 1u32)?;
524 pci_node.set_prop("interrupt-map", interrupts)?;
525 pci_node.set_prop("interrupt-map-mask", masks)?;
526 pci_node.set_prop("dma-coherent", ())?;
527 if let Some(dma_pool_phandle) = dma_pool_phandle {
528 pci_node.set_prop("memory-region", dma_pool_phandle)?;
529 }
530 Ok(())
531 }
532
create_rtc_node(fdt: &mut Fdt) -> Result<()>533 fn create_rtc_node(fdt: &mut Fdt) -> Result<()> {
534 // the kernel driver for pl030 really really wants a clock node
535 // associated with an AMBA device or it will fail to probe, so we
536 // need to make up a clock node to associate with the pl030 rtc
537 // node and an associated handle with a unique phandle value.
538 const CLK_PHANDLE: u32 = 24;
539 let clock_node = fdt.root_mut().subnode_mut("pclk@3M")?;
540 clock_node.set_prop("#clock-cells", 0u32)?;
541 clock_node.set_prop("compatible", "fixed-clock")?;
542 clock_node.set_prop("clock-frequency", 3141592u32)?;
543 clock_node.set_prop("phandle", CLK_PHANDLE)?;
544
545 let rtc_name = format!("rtc@{:x}", AARCH64_RTC_ADDR);
546 let reg = [AARCH64_RTC_ADDR, AARCH64_RTC_SIZE];
547 let irq = [GIC_FDT_IRQ_TYPE_SPI, AARCH64_RTC_IRQ, IRQ_TYPE_LEVEL_HIGH];
548
549 let rtc_node = fdt.root_mut().subnode_mut(&rtc_name)?;
550 rtc_node.set_prop("compatible", "arm,primecell")?;
551 rtc_node.set_prop("arm,primecell-periphid", PL030_AMBA_ID)?;
552 rtc_node.set_prop("reg", ®)?;
553 rtc_node.set_prop("interrupts", &irq)?;
554 rtc_node.set_prop("clocks", CLK_PHANDLE)?;
555 rtc_node.set_prop("clock-names", "apb_pclk")?;
556 Ok(())
557 }
558
559 /// Create a flattened device tree node for Goldfish Battery device.
560 ///
561 /// # Arguments
562 ///
563 /// * `fdt` - An Fdt in which the node is created
564 /// * `mmio_base` - The MMIO base address of the battery
565 /// * `irq` - The IRQ number of the battery
create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()>566 fn create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()> {
567 let reg = [mmio_base, GOLDFISHBAT_MMIO_LEN];
568 let irqs = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_LEVEL_HIGH];
569 let bat_node = fdt.root_mut().subnode_mut("goldfish_battery")?;
570 bat_node.set_prop("compatible", "google,goldfish-battery")?;
571 bat_node.set_prop("reg", ®)?;
572 bat_node.set_prop("interrupts", &irqs)?;
573 Ok(())
574 }
575
create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig) -> Result<()>576 fn create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig) -> Result<()> {
577 let vmwdt_name = format!("vmwdt@{:x}", vmwdt_cfg.base);
578 let reg = [vmwdt_cfg.base, vmwdt_cfg.size];
579 let vmwdt_node = fdt.root_mut().subnode_mut(&vmwdt_name)?;
580 vmwdt_node.set_prop("compatible", "qemu,vcpu-stall-detector")?;
581 vmwdt_node.set_prop("reg", ®)?;
582 vmwdt_node.set_prop("clock-frequency", vmwdt_cfg.clock_hz)?;
583 vmwdt_node.set_prop("timeout-sec", vmwdt_cfg.timeout_sec)?;
584 Ok(())
585 }
586
587 // Add a node path to __symbols__ node of the FDT, so it can be referenced by an overlay.
add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()>588 fn add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()> {
589 // Ensure the path points to a valid node with a defined phandle
590 let Some(target_node) = fdt.get_node(path) else {
591 return Err(Error::InvalidPath(format!("{path} does not exist")));
592 };
593 target_node
594 .get_prop::<u32>("phandle")
595 .or_else(|| target_node.get_prop("linux,phandle"))
596 .ok_or_else(|| Error::InvalidPath(format!("{path} must have a phandle")))?;
597 // Add the label -> path mapping.
598 let symbols_node = fdt.root_mut().subnode_mut("__symbols__")?;
599 symbols_node.set_prop(symbol, path)?;
600 Ok(())
601 }
602
603 /// Creates a flattened device tree containing all of the parameters for the
604 /// kernel and loads it into the guest memory at the specified offset.
605 ///
606 /// # Arguments
607 ///
608 /// * `fdt_max_size` - The amount of space reserved for the device tree
609 /// * `guest_mem` - The guest memory object
610 /// * `pci_irqs` - List of PCI device address to PCI interrupt number and pin mappings
611 /// * `pci_cfg` - Location of the memory-mapped PCI configuration space.
612 /// * `pci_ranges` - Memory ranges accessible via the PCI host controller.
613 /// * `num_cpus` - Number of virtual CPUs the guest will have
614 /// * `fdt_address` - The offset into physical memory for the device tree
615 /// * `cmdline` - The kernel commandline
616 /// * `initrd` - An optional tuple of initrd guest physical address and size
617 /// * `android_fstab` - An optional file holding Android fstab entries
618 /// * `is_gicv3` - True if gicv3, false if v2
619 /// * `psci_version` - the current PSCI version
620 /// * `swiotlb` - Reserve a memory pool for DMA. Tuple of base address and size.
621 /// * `bat_mmio_base_and_irq` - The battery base address and irq number
622 /// * `vmwdt_cfg` - The virtual watchdog configuration
623 /// * `dump_device_tree_blob` - Option path to write DTB to
624 /// * `vm_generator` - Callback to add additional nodes to DTB. create_vm uses Aarch64Vm::create_fdt
create_fdt( fdt_max_size: usize, guest_mem: &GuestMemory, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, pci_cfg: PciConfigRegion, pci_ranges: &[PciRange], #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec< PlatformBusResources, >, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, fdt_address: GuestAddress, cmdline: &str, image: (GuestAddress, usize), initrd: Option<(GuestAddress, usize)>, android_fstab: Option<File>, is_gicv3: bool, use_pmu: bool, psci_version: PsciVersion, swiotlb: Option<(Option<GuestAddress>, u64)>, bat_mmio_base_and_irq: Option<(u64, u32)>, vmwdt_cfg: VmWdtConfig, dump_device_tree_blob: Option<PathBuf>, vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>, dynamic_power_coefficient: BTreeMap<usize, u32>, device_tree_overlays: Vec<DtbOverlay>, serial_devices: &[SerialDeviceInfo], ) -> Result<()>625 pub fn create_fdt(
626 fdt_max_size: usize,
627 guest_mem: &GuestMemory,
628 pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
629 pci_cfg: PciConfigRegion,
630 pci_ranges: &[PciRange],
631 #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec<
632 PlatformBusResources,
633 >,
634 num_cpus: u32,
635 cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
636 cpu_clusters: Vec<CpuSet>,
637 cpu_capacity: BTreeMap<usize, u32>,
638 cpu_frequencies: BTreeMap<usize, Vec<u32>>,
639 fdt_address: GuestAddress,
640 cmdline: &str,
641 image: (GuestAddress, usize),
642 initrd: Option<(GuestAddress, usize)>,
643 android_fstab: Option<File>,
644 is_gicv3: bool,
645 use_pmu: bool,
646 psci_version: PsciVersion,
647 swiotlb: Option<(Option<GuestAddress>, u64)>,
648 bat_mmio_base_and_irq: Option<(u64, u32)>,
649 vmwdt_cfg: VmWdtConfig,
650 dump_device_tree_blob: Option<PathBuf>,
651 vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>,
652 dynamic_power_coefficient: BTreeMap<usize, u32>,
653 device_tree_overlays: Vec<DtbOverlay>,
654 serial_devices: &[SerialDeviceInfo],
655 ) -> Result<()> {
656 let mut fdt = Fdt::new(&[]);
657 let mut phandles_key_cache = Vec::new();
658 let mut phandles = BTreeMap::new();
659
660 // The whole thing is put into one giant node with some top level properties
661 let root_node = fdt.root_mut();
662 root_node.set_prop("interrupt-parent", PHANDLE_GIC)?;
663 phandles.insert("intc", PHANDLE_GIC);
664 root_node.set_prop("compatible", "linux,dummy-virt")?;
665 root_node.set_prop("#address-cells", 0x2u32)?;
666 root_node.set_prop("#size-cells", 0x2u32)?;
667 if let Some(android_fstab) = android_fstab {
668 arch::android::create_android_fdt(&mut fdt, android_fstab)?;
669 }
670 let stdout_path = serial_devices
671 .first()
672 .map(|first_serial| format!("/U6_16550A@{:x}", first_serial.address));
673 create_chosen_node(&mut fdt, cmdline, initrd, stdout_path.as_deref())?;
674 create_config_node(&mut fdt, image)?;
675 create_memory_node(&mut fdt, guest_mem)?;
676 let dma_pool_phandle = match swiotlb {
677 Some(x) => {
678 let phandle = create_resv_memory_node(&mut fdt, x)?;
679 phandles.insert("restricted_dma_reserved", phandle);
680 Some(phandle)
681 }
682 None => None,
683 };
684 create_cpu_nodes(
685 &mut fdt,
686 num_cpus,
687 cpu_mpidr_generator,
688 cpu_clusters,
689 cpu_capacity,
690 dynamic_power_coefficient,
691 cpu_frequencies.clone(),
692 )?;
693 create_gic_node(&mut fdt, is_gicv3, num_cpus as u64)?;
694 create_timer_node(&mut fdt, num_cpus)?;
695 if use_pmu {
696 create_pmu_node(&mut fdt, num_cpus)?;
697 }
698 create_serial_nodes(&mut fdt, serial_devices)?;
699 create_psci_node(&mut fdt, &psci_version)?;
700 create_pci_nodes(&mut fdt, pci_irqs, pci_cfg, pci_ranges, dma_pool_phandle)?;
701 create_rtc_node(&mut fdt)?;
702 if let Some((bat_mmio_base, bat_irq)) = bat_mmio_base_and_irq {
703 create_battery_node(&mut fdt, bat_mmio_base, bat_irq)?;
704 }
705 create_vmwdt_node(&mut fdt, vmwdt_cfg)?;
706 create_kvm_cpufreq_node(&mut fdt)?;
707 vm_generator(&mut fdt, &phandles)?;
708 if !cpu_frequencies.is_empty() {
709 create_virt_cpufreq_node(&mut fdt, num_cpus as u64)?;
710 }
711
712 let pviommu_ids = get_pkvm_pviommu_ids(&platform_dev_resources)?;
713
714 let cache_offset = phandles_key_cache.len();
715 // Hack to extend the lifetime of the Strings as keys of phandles (i.e. &str).
716 phandles_key_cache.extend(pviommu_ids.iter().map(|id| format!("pviommu{id}")));
717 let pviommu_phandle_keys = &phandles_key_cache[cache_offset..];
718
719 for (index, (id, key)) in pviommu_ids.iter().zip(pviommu_phandle_keys).enumerate() {
720 let phandle = create_pkvm_pviommu_node(&mut fdt, index, *id)?;
721 phandles.insert(key, phandle);
722 }
723
724 // Done writing base FDT, now apply DT overlays
725 apply_device_tree_overlays(
726 &mut fdt,
727 device_tree_overlays,
728 #[cfg(any(target_os = "android", target_os = "linux"))]
729 platform_dev_resources,
730 #[cfg(any(target_os = "android", target_os = "linux"))]
731 &phandles,
732 )?;
733
734 let fdt_final = fdt.finish()?;
735
736 if let Some(file_path) = dump_device_tree_blob {
737 std::fs::write(&file_path, &fdt_final)
738 .map_err(|e| Error::FdtDumpIoError(e, file_path.clone()))?;
739 }
740
741 if fdt_final.len() > fdt_max_size {
742 return Err(Error::TotalSizeTooLarge);
743 }
744
745 let written = guest_mem
746 .write_at_addr(fdt_final.as_slice(), fdt_address)
747 .map_err(|_| Error::FdtGuestMemoryWriteError)?;
748 if written < fdt_final.len() {
749 return Err(Error::FdtGuestMemoryWriteError);
750 }
751
752 Ok(())
753 }
754
755 #[cfg(test)]
756 mod tests {
757 use super::*;
758
759 #[test]
psci_compatible_v0_1()760 fn psci_compatible_v0_1() {
761 assert_eq!(
762 psci_compatible(&PsciVersion::new(0, 1).unwrap()),
763 vec!["arm,psci"]
764 );
765 }
766
767 #[test]
psci_compatible_v0_2()768 fn psci_compatible_v0_2() {
769 assert_eq!(
770 psci_compatible(&PsciVersion::new(0, 2).unwrap()),
771 vec!["arm,psci-0.2"]
772 );
773 }
774
775 #[test]
psci_compatible_v0_5()776 fn psci_compatible_v0_5() {
777 // Only the 0.2 version supported by the kernel should be added.
778 assert_eq!(
779 psci_compatible(&PsciVersion::new(0, 5).unwrap()),
780 vec!["arm,psci-0.2"]
781 );
782 }
783
784 #[test]
psci_compatible_v1_0()785 fn psci_compatible_v1_0() {
786 // Both 1.0 and 0.2 should be listed, in that order.
787 assert_eq!(
788 psci_compatible(&PsciVersion::new(1, 0).unwrap()),
789 vec!["arm,psci-1.0", "arm,psci-0.2"]
790 );
791 }
792
793 #[test]
psci_compatible_v1_5()794 fn psci_compatible_v1_5() {
795 // Only the 1.0 and 0.2 versions supported by the kernel should be listed.
796 assert_eq!(
797 psci_compatible(&PsciVersion::new(1, 5).unwrap()),
798 vec!["arm,psci-1.0", "arm,psci-0.2"]
799 );
800 }
801
802 #[test]
symbols_entries()803 fn symbols_entries() {
804 const TEST_SYMBOL: &str = "dev";
805 const TEST_PATH: &str = "/dev";
806
807 let mut fdt = Fdt::new(&[]);
808 add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing node");
809
810 fdt.root_mut().subnode_mut(TEST_SYMBOL).unwrap();
811 add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing phandle");
812
813 let intc_node = fdt.get_node_mut(TEST_PATH).unwrap();
814 intc_node.set_prop("phandle", 1u32).unwrap();
815 add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect("valid path");
816
817 let symbols = fdt.get_node("/__symbols__").unwrap();
818 assert_eq!(symbols.get_prop::<String>(TEST_SYMBOL).unwrap(), TEST_PATH);
819 }
820 }
821