• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::collections::HashSet;
7 use std::fs::File;
8 use std::path::PathBuf;
9 
10 use arch::apply_device_tree_overlays;
11 use arch::serial::SerialDeviceInfo;
12 use arch::CpuSet;
13 use arch::DtbOverlay;
14 #[cfg(any(target_os = "android", target_os = "linux"))]
15 use arch::PlatformBusResources;
16 use cros_fdt::Error;
17 use cros_fdt::Fdt;
18 use cros_fdt::Result;
19 // This is a Battery related constant
20 use devices::bat::GOLDFISHBAT_MMIO_LEN;
21 use devices::pl030::PL030_AMBA_ID;
22 use devices::IommuDevType;
23 use devices::PciAddress;
24 use devices::PciInterruptPin;
25 use hypervisor::PsciVersion;
26 use hypervisor::VmAArch64;
27 use hypervisor::PSCI_0_2;
28 use hypervisor::PSCI_1_0;
29 use rand::rngs::OsRng;
30 use rand::RngCore;
31 use vm_memory::GuestAddress;
32 use vm_memory::GuestMemory;
33 
34 // These are GIC address-space location constants.
35 use crate::AARCH64_GIC_CPUI_BASE;
36 use crate::AARCH64_GIC_CPUI_SIZE;
37 use crate::AARCH64_GIC_DIST_BASE;
38 use crate::AARCH64_GIC_DIST_SIZE;
39 use crate::AARCH64_GIC_REDIST_SIZE;
40 use crate::AARCH64_PMU_IRQ;
41 use crate::AARCH64_PROTECTED_VM_FW_START;
42 // These are RTC related constants
43 use crate::AARCH64_RTC_ADDR;
44 use crate::AARCH64_RTC_IRQ;
45 use crate::AARCH64_RTC_SIZE;
46 // These are serial device related constants.
47 use crate::AARCH64_SERIAL_SPEED;
48 use crate::AARCH64_VIRTFREQ_BASE;
49 use crate::AARCH64_VIRTFREQ_SIZE;
50 
51 // This is an arbitrary number to specify the node for the GIC.
52 // If we had a more complex interrupt architecture, then we'd need an enum for
53 // these.
54 const PHANDLE_GIC: u32 = 1;
55 const PHANDLE_RESTRICTED_DMA_POOL: u32 = 2;
56 
57 // CPUs are assigned phandles starting with this number.
58 const PHANDLE_CPU0: u32 = 0x100;
59 
60 const PHANDLE_OPP_DOMAIN_BASE: u32 = 0x1000;
61 
62 // pKVM pvIOMMUs are assigned phandles starting with this number.
63 const PHANDLE_PKVM_PVIOMMU: u32 = 0x2000;
64 
65 // These are specified by the Linux GIC bindings
66 const GIC_FDT_IRQ_NUM_CELLS: u32 = 3;
67 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
68 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
69 const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8;
70 const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << GIC_FDT_IRQ_PPI_CPU_SHIFT;
71 const IRQ_TYPE_EDGE_RISING: u32 = 0x00000001;
72 const IRQ_TYPE_LEVEL_HIGH: u32 = 0x00000004;
73 const IRQ_TYPE_LEVEL_LOW: u32 = 0x00000008;
74 
create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()>75 fn create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()> {
76     let mut mem_reg_prop = Vec::new();
77     let mut previous_memory_region_end = None;
78     let mut regions = guest_mem.guest_memory_regions();
79     regions.sort();
80     for region in regions {
81         if region.0.offset() == AARCH64_PROTECTED_VM_FW_START {
82             continue;
83         }
84         // Merge with the previous region if possible.
85         if let Some(previous_end) = previous_memory_region_end {
86             if region.0 == previous_end {
87                 *mem_reg_prop.last_mut().unwrap() += region.1 as u64;
88                 previous_memory_region_end =
89                     Some(previous_end.checked_add(region.1 as u64).unwrap());
90                 continue;
91             }
92             assert!(region.0 > previous_end, "Memory regions overlap");
93         }
94 
95         mem_reg_prop.push(region.0.offset());
96         mem_reg_prop.push(region.1 as u64);
97         previous_memory_region_end = Some(region.0.checked_add(region.1 as u64).unwrap());
98     }
99 
100     let memory_node = fdt.root_mut().subnode_mut("memory")?;
101     memory_node.set_prop("device_type", "memory")?;
102     memory_node.set_prop("reg", mem_reg_prop)?;
103     Ok(())
104 }
105 
create_resv_memory_node( fdt: &mut Fdt, resv_addr_and_size: (Option<GuestAddress>, u64), ) -> Result<u32>106 fn create_resv_memory_node(
107     fdt: &mut Fdt,
108     resv_addr_and_size: (Option<GuestAddress>, u64),
109 ) -> Result<u32> {
110     let (resv_addr, resv_size) = resv_addr_and_size;
111 
112     let resv_memory_node = fdt.root_mut().subnode_mut("reserved-memory")?;
113     resv_memory_node.set_prop("#address-cells", 0x2u32)?;
114     resv_memory_node.set_prop("#size-cells", 0x2u32)?;
115     resv_memory_node.set_prop("ranges", ())?;
116 
117     let restricted_dma_pool_node = if let Some(resv_addr) = resv_addr {
118         let node =
119             resv_memory_node.subnode_mut(&format!("restricted_dma_reserved@{:x}", resv_addr.0))?;
120         node.set_prop("reg", &[resv_addr.0, resv_size])?;
121         node
122     } else {
123         let node = resv_memory_node.subnode_mut("restricted_dma_reserved")?;
124         node.set_prop("size", resv_size)?;
125         node
126     };
127     restricted_dma_pool_node.set_prop("phandle", PHANDLE_RESTRICTED_DMA_POOL)?;
128     restricted_dma_pool_node.set_prop("compatible", "restricted-dma-pool")?;
129     restricted_dma_pool_node.set_prop("alignment", base::pagesize() as u64)?;
130     Ok(PHANDLE_RESTRICTED_DMA_POOL)
131 }
132 
create_cpu_nodes( fdt: &mut Fdt, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, dynamic_power_coefficient: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, ) -> Result<()>133 fn create_cpu_nodes(
134     fdt: &mut Fdt,
135     num_cpus: u32,
136     cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
137     cpu_clusters: Vec<CpuSet>,
138     cpu_capacity: BTreeMap<usize, u32>,
139     dynamic_power_coefficient: BTreeMap<usize, u32>,
140     cpu_frequencies: BTreeMap<usize, Vec<u32>>,
141 ) -> Result<()> {
142     let root_node = fdt.root_mut();
143     let cpus_node = root_node.subnode_mut("cpus")?;
144     cpus_node.set_prop("#address-cells", 0x1u32)?;
145     cpus_node.set_prop("#size-cells", 0x0u32)?;
146 
147     for cpu_id in 0..num_cpus {
148         let reg = u32::try_from(
149             cpu_mpidr_generator(cpu_id.try_into().unwrap()).ok_or(Error::PropertyValueInvalid)?,
150         )
151         .map_err(|_| Error::PropertyValueTooLarge)?;
152         let cpu_name = format!("cpu@{:x}", reg);
153         let cpu_node = cpus_node.subnode_mut(&cpu_name)?;
154         cpu_node.set_prop("device_type", "cpu")?;
155         cpu_node.set_prop("compatible", "arm,armv8")?;
156         if num_cpus > 1 {
157             cpu_node.set_prop("enable-method", "psci")?;
158         }
159         cpu_node.set_prop("reg", reg)?;
160         cpu_node.set_prop("phandle", PHANDLE_CPU0 + cpu_id)?;
161 
162         if let Some(pwr_coefficient) = dynamic_power_coefficient.get(&(cpu_id as usize)) {
163             cpu_node.set_prop("dynamic-power-coefficient", *pwr_coefficient)?;
164         }
165         if let Some(capacity) = cpu_capacity.get(&(cpu_id as usize)) {
166             cpu_node.set_prop("capacity-dmips-mhz", *capacity)?;
167         }
168         // Placed inside cpu nodes for ease of parsing for some secure firmwares(PvmFw).
169         if let Some(frequencies) = cpu_frequencies.get(&(cpu_id as usize)) {
170             cpu_node.set_prop("operating-points-v2", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
171             let opp_table_node = cpu_node.subnode_mut(&format!("opp_table{}", cpu_id))?;
172             opp_table_node.set_prop("phandle", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
173             opp_table_node.set_prop("compatible", "operating-points-v2")?;
174             for freq in frequencies.iter() {
175                 let opp_hz = (*freq) as u64 * 1000;
176                 let opp_node = opp_table_node.subnode_mut(&format!("opp{}", opp_hz))?;
177                 opp_node.set_prop("opp-hz", opp_hz)?;
178             }
179         }
180     }
181 
182     if !cpu_clusters.is_empty() {
183         let cpu_map_node = cpus_node.subnode_mut("cpu-map")?;
184         for (cluster_idx, cpus) in cpu_clusters.iter().enumerate() {
185             let cluster_node = cpu_map_node.subnode_mut(&format!("cluster{}", cluster_idx))?;
186             for (core_idx, cpu_id) in cpus.iter().enumerate() {
187                 let core_node = cluster_node.subnode_mut(&format!("core{}", core_idx))?;
188                 core_node.set_prop("cpu", PHANDLE_CPU0 + *cpu_id as u32)?;
189             }
190         }
191     }
192 
193     Ok(())
194 }
195 
create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()>196 fn create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()> {
197     let mut gic_reg_prop = [AARCH64_GIC_DIST_BASE, AARCH64_GIC_DIST_SIZE, 0, 0];
198 
199     let intc_node = fdt.root_mut().subnode_mut("intc")?;
200     if is_gicv3 {
201         intc_node.set_prop("compatible", "arm,gic-v3")?;
202         gic_reg_prop[2] = AARCH64_GIC_DIST_BASE - (AARCH64_GIC_REDIST_SIZE * num_cpus);
203         gic_reg_prop[3] = AARCH64_GIC_REDIST_SIZE * num_cpus;
204     } else {
205         intc_node.set_prop("compatible", "arm,cortex-a15-gic")?;
206         gic_reg_prop[2] = AARCH64_GIC_CPUI_BASE;
207         gic_reg_prop[3] = AARCH64_GIC_CPUI_SIZE;
208     }
209     intc_node.set_prop("#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS)?;
210     intc_node.set_prop("interrupt-controller", ())?;
211     intc_node.set_prop("reg", &gic_reg_prop)?;
212     intc_node.set_prop("phandle", PHANDLE_GIC)?;
213     intc_node.set_prop("#address-cells", 2u32)?;
214     intc_node.set_prop("#size-cells", 2u32)?;
215     add_symbols_entry(fdt, "intc", "/intc")?;
216     Ok(())
217 }
218 
create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>219 fn create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
220     // These are fixed interrupt numbers for the timer device.
221     let irqs = [13, 14, 11, 10];
222     let compatible = "arm,armv8-timer";
223     let cpu_mask: u32 =
224         (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
225 
226     let mut timer_reg_cells = Vec::new();
227     for &irq in &irqs {
228         timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
229         timer_reg_cells.push(irq);
230         timer_reg_cells.push(cpu_mask | IRQ_TYPE_LEVEL_LOW);
231     }
232 
233     let timer_node = fdt.root_mut().subnode_mut("timer")?;
234     timer_node.set_prop("compatible", compatible)?;
235     timer_node.set_prop("interrupts", timer_reg_cells)?;
236     timer_node.set_prop("always-on", ())?;
237     Ok(())
238 }
239 
create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()>240 fn create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()> {
241     let compatible = "virtual,android-v-only-cpufreq";
242     let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
243     let reg = [AARCH64_VIRTFREQ_BASE, AARCH64_VIRTFREQ_SIZE * num_cpus];
244 
245     vcf_node.set_prop("compatible", compatible)?;
246     vcf_node.set_prop("reg", &reg)?;
247     Ok(())
248 }
249 
create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>250 fn create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
251     let compatible = "arm,armv8-pmuv3";
252     let cpu_mask: u32 =
253         (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
254     let irq = [
255         GIC_FDT_IRQ_TYPE_PPI,
256         AARCH64_PMU_IRQ,
257         cpu_mask | IRQ_TYPE_LEVEL_HIGH,
258     ];
259 
260     let pmu_node = fdt.root_mut().subnode_mut("pmu")?;
261     pmu_node.set_prop("compatible", compatible)?;
262     pmu_node.set_prop("interrupts", &irq)?;
263     Ok(())
264 }
265 
create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()>266 fn create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()> {
267     let serial_reg_prop = [addr, size];
268     let irq = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_EDGE_RISING];
269 
270     let serial_node = fdt
271         .root_mut()
272         .subnode_mut(&format!("U6_16550A@{:x}", addr))?;
273     serial_node.set_prop("compatible", "ns16550a")?;
274     serial_node.set_prop("reg", &serial_reg_prop)?;
275     serial_node.set_prop("clock-frequency", AARCH64_SERIAL_SPEED)?;
276     serial_node.set_prop("interrupts", &irq)?;
277 
278     Ok(())
279 }
280 
create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()>281 fn create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()> {
282     for dev in serial_devices {
283         create_serial_node(fdt, dev.address, dev.size, dev.irq)?;
284     }
285 
286     Ok(())
287 }
288 
psci_compatible(version: &PsciVersion) -> Vec<&str>289 fn psci_compatible(version: &PsciVersion) -> Vec<&str> {
290     // The PSCI kernel driver only supports compatible strings for the following
291     // backward-compatible versions.
292     let supported = [(PSCI_1_0, "arm,psci-1.0"), (PSCI_0_2, "arm,psci-0.2")];
293 
294     let mut compatible: Vec<_> = supported
295         .iter()
296         .filter(|&(v, _)| *version >= *v)
297         .map(|&(_, c)| c)
298         .collect();
299 
300     // The PSCI kernel driver also supports PSCI v0.1, which is NOT forward-compatible.
301     if compatible.is_empty() {
302         compatible = vec!["arm,psci"];
303     }
304 
305     compatible
306 }
307 
create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()>308 fn create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()> {
309     let compatible = psci_compatible(version);
310     let psci_node = fdt.root_mut().subnode_mut("psci")?;
311     psci_node.set_prop("compatible", compatible.as_slice())?;
312     // Only support aarch64 guest
313     psci_node.set_prop("method", "hvc")?;
314     Ok(())
315 }
316 
create_chosen_node( fdt: &mut Fdt, cmdline: &str, initrd: Option<(GuestAddress, usize)>, stdout_path: Option<&str>, ) -> Result<()>317 fn create_chosen_node(
318     fdt: &mut Fdt,
319     cmdline: &str,
320     initrd: Option<(GuestAddress, usize)>,
321     stdout_path: Option<&str>,
322 ) -> Result<()> {
323     let chosen_node = fdt.root_mut().subnode_mut("chosen")?;
324     chosen_node.set_prop("linux,pci-probe-only", 1u32)?;
325     chosen_node.set_prop("bootargs", cmdline)?;
326     if let Some(stdout_path) = stdout_path {
327         // Used by android bootloader for boot console output
328         chosen_node.set_prop("stdout-path", stdout_path)?;
329     }
330 
331     let mut kaslr_seed_bytes = [0u8; 8];
332     OsRng.fill_bytes(&mut kaslr_seed_bytes);
333     let kaslr_seed = u64::from_le_bytes(kaslr_seed_bytes);
334     chosen_node.set_prop("kaslr-seed", kaslr_seed)?;
335 
336     let mut rng_seed_bytes = [0u8; 256];
337     OsRng.fill_bytes(&mut rng_seed_bytes);
338     chosen_node.set_prop("rng-seed", &rng_seed_bytes)?;
339 
340     if let Some((initrd_addr, initrd_size)) = initrd {
341         let initrd_start = initrd_addr.offset() as u32;
342         let initrd_end = initrd_start + initrd_size as u32;
343         chosen_node.set_prop("linux,initrd-start", initrd_start)?;
344         chosen_node.set_prop("linux,initrd-end", initrd_end)?;
345     }
346 
347     Ok(())
348 }
349 
create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()>350 fn create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()> {
351     let addr: u32 = addr
352         .offset()
353         .try_into()
354         .map_err(|_| Error::PropertyValueTooLarge)?;
355     let size: u32 = size.try_into().map_err(|_| Error::PropertyValueTooLarge)?;
356 
357     let config_node = fdt.root_mut().subnode_mut("config")?;
358     config_node.set_prop("kernel-address", addr)?;
359     config_node.set_prop("kernel-size", size)?;
360     Ok(())
361 }
362 
create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()>363 fn create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()> {
364     let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
365     vcf_node.set_prop("compatible", "virtual,kvm-cpufreq")?;
366     Ok(())
367 }
368 
369 #[cfg(any(target_os = "android", target_os = "linux"))]
get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>>370 fn get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>> {
371     let mut ids = HashSet::new();
372 
373     for res in platform_dev_resources {
374         for iommu in &res.iommus {
375             if let (IommuDevType::PkvmPviommu, Some(id), _) = iommu {
376                 ids.insert(*id);
377             }
378         }
379     }
380 
381     Ok(Vec::from_iter(ids))
382 }
383 
create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32>384 fn create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32> {
385     let name = format!("pviommu{index}");
386     let phandle = PHANDLE_PKVM_PVIOMMU
387         .checked_add(index.try_into().unwrap())
388         .unwrap();
389 
390     let iommu_node = fdt.root_mut().subnode_mut(&name)?;
391     iommu_node.set_prop("phandle", phandle)?;
392     iommu_node.set_prop("#iommu-cells", 1u32)?;
393     iommu_node.set_prop("compatible", "pkvm,pviommu")?;
394     iommu_node.set_prop("id", id)?;
395 
396     Ok(phandle)
397 }
398 
399 /// PCI host controller address range.
400 ///
401 /// This represents a single entry in the "ranges" property for a PCI host controller.
402 ///
403 /// See [PCI Bus Binding to Open Firmware](https://www.openfirmware.info/data/docs/bus.pci.pdf)
404 /// and https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt
405 /// for more information.
406 #[derive(Copy, Clone)]
407 pub struct PciRange {
408     pub space: PciAddressSpace,
409     pub bus_address: u64,
410     pub cpu_physical_address: u64,
411     pub size: u64,
412     pub prefetchable: bool,
413 }
414 
415 /// PCI address space.
416 #[derive(Copy, Clone)]
417 #[allow(dead_code)]
418 pub enum PciAddressSpace {
419     /// PCI configuration space
420     Configuration = 0b00,
421     /// I/O space
422     Io = 0b01,
423     /// 32-bit memory space
424     Memory = 0b10,
425     /// 64-bit memory space
426     Memory64 = 0b11,
427 }
428 
429 /// Location of memory-mapped PCI configuration space.
430 #[derive(Copy, Clone)]
431 pub struct PciConfigRegion {
432     /// Physical address of the base of the memory-mapped PCI configuration region.
433     pub base: u64,
434     /// Size of the PCI configuration region in bytes.
435     pub size: u64,
436 }
437 
438 /// Location of memory-mapped vm watchdog
439 #[derive(Copy, Clone)]
440 pub struct VmWdtConfig {
441     /// Physical address of the base of the memory-mapped vm watchdog region.
442     pub base: u64,
443     /// Size of the vm watchdog region in bytes.
444     pub size: u64,
445     /// The internal clock frequency of the watchdog.
446     pub clock_hz: u32,
447     /// The expiration timeout measured in seconds.
448     pub timeout_sec: u32,
449 }
450 
create_pci_nodes( fdt: &mut Fdt, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, cfg: PciConfigRegion, ranges: &[PciRange], dma_pool_phandle: Option<u32>, ) -> Result<()>451 fn create_pci_nodes(
452     fdt: &mut Fdt,
453     pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
454     cfg: PciConfigRegion,
455     ranges: &[PciRange],
456     dma_pool_phandle: Option<u32>,
457 ) -> Result<()> {
458     // Add devicetree nodes describing a PCI generic host controller.
459     // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
460     // and "PCI Bus Binding to IEEE Std 1275-1994".
461     let ranges: Vec<u32> = ranges
462         .iter()
463         .flat_map(|r| {
464             let ss = r.space as u32;
465             let p = r.prefetchable as u32;
466             [
467                 // BUS_ADDRESS(3) encoded as defined in OF PCI Bus Binding
468                 (ss << 24) | (p << 30),
469                 (r.bus_address >> 32) as u32,
470                 r.bus_address as u32,
471                 // CPU_PHYSICAL(2)
472                 (r.cpu_physical_address >> 32) as u32,
473                 r.cpu_physical_address as u32,
474                 // SIZE(2)
475                 (r.size >> 32) as u32,
476                 r.size as u32,
477             ]
478         })
479         .collect();
480 
481     let bus_range = [0u32, 0u32]; // Only bus 0
482     let reg = [cfg.base, cfg.size];
483 
484     let mut interrupts: Vec<u32> = Vec::new();
485     let mut masks: Vec<u32> = Vec::new();
486 
487     for (address, irq_num, irq_pin) in pci_irqs.iter() {
488         // PCI_DEVICE(3)
489         interrupts.push(address.to_config_address(0, 8));
490         interrupts.push(0);
491         interrupts.push(0);
492 
493         // INT#(1)
494         interrupts.push(irq_pin.to_mask() + 1);
495 
496         // CONTROLLER(PHANDLE)
497         interrupts.push(PHANDLE_GIC);
498         interrupts.push(0);
499         interrupts.push(0);
500 
501         // CONTROLLER_DATA(3)
502         interrupts.push(GIC_FDT_IRQ_TYPE_SPI);
503         interrupts.push(*irq_num);
504         interrupts.push(IRQ_TYPE_LEVEL_HIGH);
505 
506         // PCI_DEVICE(3)
507         masks.push(0xf800); // bits 11..15 (device)
508         masks.push(0);
509         masks.push(0);
510 
511         // INT#(1)
512         masks.push(0x7); // allow INTA#-INTD# (1 | 2 | 3 | 4)
513     }
514 
515     let pci_node = fdt.root_mut().subnode_mut("pci")?;
516     pci_node.set_prop("compatible", "pci-host-cam-generic")?;
517     pci_node.set_prop("device_type", "pci")?;
518     pci_node.set_prop("ranges", ranges)?;
519     pci_node.set_prop("bus-range", &bus_range)?;
520     pci_node.set_prop("#address-cells", 3u32)?;
521     pci_node.set_prop("#size-cells", 2u32)?;
522     pci_node.set_prop("reg", &reg)?;
523     pci_node.set_prop("#interrupt-cells", 1u32)?;
524     pci_node.set_prop("interrupt-map", interrupts)?;
525     pci_node.set_prop("interrupt-map-mask", masks)?;
526     pci_node.set_prop("dma-coherent", ())?;
527     if let Some(dma_pool_phandle) = dma_pool_phandle {
528         pci_node.set_prop("memory-region", dma_pool_phandle)?;
529     }
530     Ok(())
531 }
532 
create_rtc_node(fdt: &mut Fdt) -> Result<()>533 fn create_rtc_node(fdt: &mut Fdt) -> Result<()> {
534     // the kernel driver for pl030 really really wants a clock node
535     // associated with an AMBA device or it will fail to probe, so we
536     // need to make up a clock node to associate with the pl030 rtc
537     // node and an associated handle with a unique phandle value.
538     const CLK_PHANDLE: u32 = 24;
539     let clock_node = fdt.root_mut().subnode_mut("pclk@3M")?;
540     clock_node.set_prop("#clock-cells", 0u32)?;
541     clock_node.set_prop("compatible", "fixed-clock")?;
542     clock_node.set_prop("clock-frequency", 3141592u32)?;
543     clock_node.set_prop("phandle", CLK_PHANDLE)?;
544 
545     let rtc_name = format!("rtc@{:x}", AARCH64_RTC_ADDR);
546     let reg = [AARCH64_RTC_ADDR, AARCH64_RTC_SIZE];
547     let irq = [GIC_FDT_IRQ_TYPE_SPI, AARCH64_RTC_IRQ, IRQ_TYPE_LEVEL_HIGH];
548 
549     let rtc_node = fdt.root_mut().subnode_mut(&rtc_name)?;
550     rtc_node.set_prop("compatible", "arm,primecell")?;
551     rtc_node.set_prop("arm,primecell-periphid", PL030_AMBA_ID)?;
552     rtc_node.set_prop("reg", &reg)?;
553     rtc_node.set_prop("interrupts", &irq)?;
554     rtc_node.set_prop("clocks", CLK_PHANDLE)?;
555     rtc_node.set_prop("clock-names", "apb_pclk")?;
556     Ok(())
557 }
558 
559 /// Create a flattened device tree node for Goldfish Battery device.
560 ///
561 /// # Arguments
562 ///
563 /// * `fdt` - An Fdt in which the node is created
564 /// * `mmio_base` - The MMIO base address of the battery
565 /// * `irq` - The IRQ number of the battery
create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()>566 fn create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()> {
567     let reg = [mmio_base, GOLDFISHBAT_MMIO_LEN];
568     let irqs = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_LEVEL_HIGH];
569     let bat_node = fdt.root_mut().subnode_mut("goldfish_battery")?;
570     bat_node.set_prop("compatible", "google,goldfish-battery")?;
571     bat_node.set_prop("reg", &reg)?;
572     bat_node.set_prop("interrupts", &irqs)?;
573     Ok(())
574 }
575 
create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig) -> Result<()>576 fn create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig) -> Result<()> {
577     let vmwdt_name = format!("vmwdt@{:x}", vmwdt_cfg.base);
578     let reg = [vmwdt_cfg.base, vmwdt_cfg.size];
579     let vmwdt_node = fdt.root_mut().subnode_mut(&vmwdt_name)?;
580     vmwdt_node.set_prop("compatible", "qemu,vcpu-stall-detector")?;
581     vmwdt_node.set_prop("reg", &reg)?;
582     vmwdt_node.set_prop("clock-frequency", vmwdt_cfg.clock_hz)?;
583     vmwdt_node.set_prop("timeout-sec", vmwdt_cfg.timeout_sec)?;
584     Ok(())
585 }
586 
587 // Add a node path to __symbols__ node of the FDT, so it can be referenced by an overlay.
add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()>588 fn add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()> {
589     // Ensure the path points to a valid node with a defined phandle
590     let Some(target_node) = fdt.get_node(path) else {
591         return Err(Error::InvalidPath(format!("{path} does not exist")));
592     };
593     target_node
594         .get_prop::<u32>("phandle")
595         .or_else(|| target_node.get_prop("linux,phandle"))
596         .ok_or_else(|| Error::InvalidPath(format!("{path} must have a phandle")))?;
597     // Add the label -> path mapping.
598     let symbols_node = fdt.root_mut().subnode_mut("__symbols__")?;
599     symbols_node.set_prop(symbol, path)?;
600     Ok(())
601 }
602 
603 /// Creates a flattened device tree containing all of the parameters for the
604 /// kernel and loads it into the guest memory at the specified offset.
605 ///
606 /// # Arguments
607 ///
608 /// * `fdt_max_size` - The amount of space reserved for the device tree
609 /// * `guest_mem` - The guest memory object
610 /// * `pci_irqs` - List of PCI device address to PCI interrupt number and pin mappings
611 /// * `pci_cfg` - Location of the memory-mapped PCI configuration space.
612 /// * `pci_ranges` - Memory ranges accessible via the PCI host controller.
613 /// * `num_cpus` - Number of virtual CPUs the guest will have
614 /// * `fdt_address` - The offset into physical memory for the device tree
615 /// * `cmdline` - The kernel commandline
616 /// * `initrd` - An optional tuple of initrd guest physical address and size
617 /// * `android_fstab` - An optional file holding Android fstab entries
618 /// * `is_gicv3` - True if gicv3, false if v2
619 /// * `psci_version` - the current PSCI version
620 /// * `swiotlb` - Reserve a memory pool for DMA. Tuple of base address and size.
621 /// * `bat_mmio_base_and_irq` - The battery base address and irq number
622 /// * `vmwdt_cfg` - The virtual watchdog configuration
623 /// * `dump_device_tree_blob` - Option path to write DTB to
624 /// * `vm_generator` - Callback to add additional nodes to DTB. create_vm uses Aarch64Vm::create_fdt
create_fdt( fdt_max_size: usize, guest_mem: &GuestMemory, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, pci_cfg: PciConfigRegion, pci_ranges: &[PciRange], #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec< PlatformBusResources, >, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, fdt_address: GuestAddress, cmdline: &str, image: (GuestAddress, usize), initrd: Option<(GuestAddress, usize)>, android_fstab: Option<File>, is_gicv3: bool, use_pmu: bool, psci_version: PsciVersion, swiotlb: Option<(Option<GuestAddress>, u64)>, bat_mmio_base_and_irq: Option<(u64, u32)>, vmwdt_cfg: VmWdtConfig, dump_device_tree_blob: Option<PathBuf>, vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>, dynamic_power_coefficient: BTreeMap<usize, u32>, device_tree_overlays: Vec<DtbOverlay>, serial_devices: &[SerialDeviceInfo], ) -> Result<()>625 pub fn create_fdt(
626     fdt_max_size: usize,
627     guest_mem: &GuestMemory,
628     pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
629     pci_cfg: PciConfigRegion,
630     pci_ranges: &[PciRange],
631     #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec<
632         PlatformBusResources,
633     >,
634     num_cpus: u32,
635     cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
636     cpu_clusters: Vec<CpuSet>,
637     cpu_capacity: BTreeMap<usize, u32>,
638     cpu_frequencies: BTreeMap<usize, Vec<u32>>,
639     fdt_address: GuestAddress,
640     cmdline: &str,
641     image: (GuestAddress, usize),
642     initrd: Option<(GuestAddress, usize)>,
643     android_fstab: Option<File>,
644     is_gicv3: bool,
645     use_pmu: bool,
646     psci_version: PsciVersion,
647     swiotlb: Option<(Option<GuestAddress>, u64)>,
648     bat_mmio_base_and_irq: Option<(u64, u32)>,
649     vmwdt_cfg: VmWdtConfig,
650     dump_device_tree_blob: Option<PathBuf>,
651     vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>,
652     dynamic_power_coefficient: BTreeMap<usize, u32>,
653     device_tree_overlays: Vec<DtbOverlay>,
654     serial_devices: &[SerialDeviceInfo],
655 ) -> Result<()> {
656     let mut fdt = Fdt::new(&[]);
657     let mut phandles_key_cache = Vec::new();
658     let mut phandles = BTreeMap::new();
659 
660     // The whole thing is put into one giant node with some top level properties
661     let root_node = fdt.root_mut();
662     root_node.set_prop("interrupt-parent", PHANDLE_GIC)?;
663     phandles.insert("intc", PHANDLE_GIC);
664     root_node.set_prop("compatible", "linux,dummy-virt")?;
665     root_node.set_prop("#address-cells", 0x2u32)?;
666     root_node.set_prop("#size-cells", 0x2u32)?;
667     if let Some(android_fstab) = android_fstab {
668         arch::android::create_android_fdt(&mut fdt, android_fstab)?;
669     }
670     let stdout_path = serial_devices
671         .first()
672         .map(|first_serial| format!("/U6_16550A@{:x}", first_serial.address));
673     create_chosen_node(&mut fdt, cmdline, initrd, stdout_path.as_deref())?;
674     create_config_node(&mut fdt, image)?;
675     create_memory_node(&mut fdt, guest_mem)?;
676     let dma_pool_phandle = match swiotlb {
677         Some(x) => {
678             let phandle = create_resv_memory_node(&mut fdt, x)?;
679             phandles.insert("restricted_dma_reserved", phandle);
680             Some(phandle)
681         }
682         None => None,
683     };
684     create_cpu_nodes(
685         &mut fdt,
686         num_cpus,
687         cpu_mpidr_generator,
688         cpu_clusters,
689         cpu_capacity,
690         dynamic_power_coefficient,
691         cpu_frequencies.clone(),
692     )?;
693     create_gic_node(&mut fdt, is_gicv3, num_cpus as u64)?;
694     create_timer_node(&mut fdt, num_cpus)?;
695     if use_pmu {
696         create_pmu_node(&mut fdt, num_cpus)?;
697     }
698     create_serial_nodes(&mut fdt, serial_devices)?;
699     create_psci_node(&mut fdt, &psci_version)?;
700     create_pci_nodes(&mut fdt, pci_irqs, pci_cfg, pci_ranges, dma_pool_phandle)?;
701     create_rtc_node(&mut fdt)?;
702     if let Some((bat_mmio_base, bat_irq)) = bat_mmio_base_and_irq {
703         create_battery_node(&mut fdt, bat_mmio_base, bat_irq)?;
704     }
705     create_vmwdt_node(&mut fdt, vmwdt_cfg)?;
706     create_kvm_cpufreq_node(&mut fdt)?;
707     vm_generator(&mut fdt, &phandles)?;
708     if !cpu_frequencies.is_empty() {
709         create_virt_cpufreq_node(&mut fdt, num_cpus as u64)?;
710     }
711 
712     let pviommu_ids = get_pkvm_pviommu_ids(&platform_dev_resources)?;
713 
714     let cache_offset = phandles_key_cache.len();
715     // Hack to extend the lifetime of the Strings as keys of phandles (i.e. &str).
716     phandles_key_cache.extend(pviommu_ids.iter().map(|id| format!("pviommu{id}")));
717     let pviommu_phandle_keys = &phandles_key_cache[cache_offset..];
718 
719     for (index, (id, key)) in pviommu_ids.iter().zip(pviommu_phandle_keys).enumerate() {
720         let phandle = create_pkvm_pviommu_node(&mut fdt, index, *id)?;
721         phandles.insert(key, phandle);
722     }
723 
724     // Done writing base FDT, now apply DT overlays
725     apply_device_tree_overlays(
726         &mut fdt,
727         device_tree_overlays,
728         #[cfg(any(target_os = "android", target_os = "linux"))]
729         platform_dev_resources,
730         #[cfg(any(target_os = "android", target_os = "linux"))]
731         &phandles,
732     )?;
733 
734     let fdt_final = fdt.finish()?;
735 
736     if let Some(file_path) = dump_device_tree_blob {
737         std::fs::write(&file_path, &fdt_final)
738             .map_err(|e| Error::FdtDumpIoError(e, file_path.clone()))?;
739     }
740 
741     if fdt_final.len() > fdt_max_size {
742         return Err(Error::TotalSizeTooLarge);
743     }
744 
745     let written = guest_mem
746         .write_at_addr(fdt_final.as_slice(), fdt_address)
747         .map_err(|_| Error::FdtGuestMemoryWriteError)?;
748     if written < fdt_final.len() {
749         return Err(Error::FdtGuestMemoryWriteError);
750     }
751 
752     Ok(())
753 }
754 
755 #[cfg(test)]
756 mod tests {
757     use super::*;
758 
759     #[test]
psci_compatible_v0_1()760     fn psci_compatible_v0_1() {
761         assert_eq!(
762             psci_compatible(&PsciVersion::new(0, 1).unwrap()),
763             vec!["arm,psci"]
764         );
765     }
766 
767     #[test]
psci_compatible_v0_2()768     fn psci_compatible_v0_2() {
769         assert_eq!(
770             psci_compatible(&PsciVersion::new(0, 2).unwrap()),
771             vec!["arm,psci-0.2"]
772         );
773     }
774 
775     #[test]
psci_compatible_v0_5()776     fn psci_compatible_v0_5() {
777         // Only the 0.2 version supported by the kernel should be added.
778         assert_eq!(
779             psci_compatible(&PsciVersion::new(0, 5).unwrap()),
780             vec!["arm,psci-0.2"]
781         );
782     }
783 
784     #[test]
psci_compatible_v1_0()785     fn psci_compatible_v1_0() {
786         // Both 1.0 and 0.2 should be listed, in that order.
787         assert_eq!(
788             psci_compatible(&PsciVersion::new(1, 0).unwrap()),
789             vec!["arm,psci-1.0", "arm,psci-0.2"]
790         );
791     }
792 
793     #[test]
psci_compatible_v1_5()794     fn psci_compatible_v1_5() {
795         // Only the 1.0 and 0.2 versions supported by the kernel should be listed.
796         assert_eq!(
797             psci_compatible(&PsciVersion::new(1, 5).unwrap()),
798             vec!["arm,psci-1.0", "arm,psci-0.2"]
799         );
800     }
801 
802     #[test]
symbols_entries()803     fn symbols_entries() {
804         const TEST_SYMBOL: &str = "dev";
805         const TEST_PATH: &str = "/dev";
806 
807         let mut fdt = Fdt::new(&[]);
808         add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing node");
809 
810         fdt.root_mut().subnode_mut(TEST_SYMBOL).unwrap();
811         add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing phandle");
812 
813         let intc_node = fdt.get_node_mut(TEST_PATH).unwrap();
814         intc_node.set_prop("phandle", 1u32).unwrap();
815         add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect("valid path");
816 
817         let symbols = fdt.get_node("/__symbols__").unwrap();
818         assert_eq!(symbols.get_prop::<String>(TEST_SYMBOL).unwrap(), TEST_PATH);
819     }
820 }
821