• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // We have u32 constants from bindings that are passed into archiitecture-dependent functions
6 // taking u32/64 parameters. So on 32 bit platforms we may have needless casts.
7 #![allow(clippy::useless_conversion)]
8 
9 use std::collections::BTreeMap;
10 use std::convert::TryFrom;
11 use std::mem::offset_of;
12 
13 use anyhow::Context;
14 use base::errno_result;
15 use base::error;
16 use base::ioctl_with_mut_ref;
17 use base::ioctl_with_ref;
18 use base::ioctl_with_val;
19 use base::warn;
20 use base::Error;
21 use base::Result;
22 use cros_fdt::Fdt;
23 use data_model::vec_with_array_field;
24 use kvm_sys::*;
25 use libc::EINVAL;
26 use libc::ENOMEM;
27 use libc::ENOTSUP;
28 use libc::ENXIO;
29 use serde::Deserialize;
30 use serde::Serialize;
31 use snapshot::AnySnapshot;
32 use vm_memory::GuestAddress;
33 
34 use super::Config;
35 use super::Kvm;
36 use super::KvmCap;
37 use super::KvmVcpu;
38 use super::KvmVm;
39 use crate::AArch64SysRegId;
40 use crate::ClockState;
41 use crate::DeviceKind;
42 use crate::Hypervisor;
43 use crate::IrqSourceChip;
44 use crate::ProtectionType;
45 use crate::PsciVersion;
46 use crate::VcpuAArch64;
47 use crate::VcpuExit;
48 use crate::VcpuFeature;
49 use crate::VcpuRegAArch64;
50 use crate::VmAArch64;
51 use crate::VmCap;
52 use crate::AARCH64_MAX_REG_COUNT;
53 use crate::PSCI_0_2;
54 
55 impl Kvm {
56     // Compute the machine type, which should be the IPA range for the VM
57     // Ideally, this would take a description of the memory map and return
58     // the closest machine type for this VM. Here, we just return the maximum
59     // the kernel support.
get_vm_type(&self, protection_type: ProtectionType) -> Result<u32>60     pub fn get_vm_type(&self, protection_type: ProtectionType) -> Result<u32> {
61         // SAFETY:
62         // Safe because we know self is a real kvm fd
63         let ipa_size = match unsafe {
64             ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into())
65         } {
66             // Not supported? Use 0 as the machine type, which implies 40bit IPA
67             ret if ret < 0 => 0,
68             ipa => ipa as u32,
69         };
70         let protection_flag = if protection_type.isolates_memory() {
71             KVM_VM_TYPE_ARM_PROTECTED
72         } else {
73             0
74         };
75         // Use the lower 8 bits representing the IPA space as the machine type
76         Ok((ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) | protection_flag)
77     }
78 
79     /// Get the size of guest physical addresses (IPA) in bits.
get_guest_phys_addr_bits(&self) -> u880     pub fn get_guest_phys_addr_bits(&self) -> u8 {
81         // SAFETY:
82         // Safe because we know self is a real kvm fd
83         match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into()) } {
84             // Default physical address size is 40 bits if the extension is not supported.
85             ret if ret <= 0 => 40,
86             ipa => ipa as u8,
87         }
88     }
89 }
90 
91 impl KvmVm {
92     /// Does platform specific initialization for the KvmVm.
init_arch(&self, cfg: &Config) -> Result<()>93     pub fn init_arch(&self, cfg: &Config) -> Result<()> {
94         #[cfg(target_arch = "aarch64")]
95         if cfg.mte {
96             // SAFETY:
97             // Safe because it does not take pointer arguments.
98             unsafe { self.enable_raw_capability(KvmCap::ArmMte, 0, &[0, 0, 0, 0])? }
99         }
100         #[cfg(not(target_arch = "aarch64"))]
101         {
102             // Suppress warning.
103             let _ = cfg;
104         }
105 
106         Ok(())
107     }
108 
109     /// Whether running under pKVM.
is_pkvm(&self) -> bool110     pub fn is_pkvm(&self) -> bool {
111         self.get_protected_vm_info().is_ok()
112     }
113 
114     /// Checks if a particular `VmCap` is available, or returns None if arch-independent
115     /// Vm.check_capability() should handle the check.
check_capability_arch(&self, _c: VmCap) -> Option<bool>116     pub fn check_capability_arch(&self, _c: VmCap) -> Option<bool> {
117         None
118     }
119 
120     /// Returns the params to pass to KVM_CREATE_DEVICE for a `kind` device on this arch, or None to
121     /// let the arch-independent `KvmVm::create_device` handle it.
get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device>122     pub fn get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device> {
123         match kind {
124             DeviceKind::ArmVgicV2 => Some(kvm_create_device {
125                 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2,
126                 fd: 0,
127                 flags: 0,
128             }),
129             DeviceKind::ArmVgicV3 => Some(kvm_create_device {
130                 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3,
131                 fd: 0,
132                 flags: 0,
133             }),
134             _ => None,
135         }
136     }
137 
138     /// Arch-specific implementation of `Vm::get_pvclock`.  Always returns an error on AArch64.
get_pvclock_arch(&self) -> Result<ClockState>139     pub fn get_pvclock_arch(&self) -> Result<ClockState> {
140         Err(Error::new(ENXIO))
141     }
142 
143     /// Arch-specific implementation of `Vm::set_pvclock`.  Always returns an error on AArch64.
set_pvclock_arch(&self, _state: &ClockState) -> Result<()>144     pub fn set_pvclock_arch(&self, _state: &ClockState) -> Result<()> {
145         Err(Error::new(ENXIO))
146     }
147 
148     /// Get pKVM hypervisor details, e.g. the firmware size.
149     ///
150     /// Returns `Err` if not running under pKVM.
151     ///
152     /// Uses `KVM_ENABLE_CAP` internally, but it is only a getter, there should be no side effects
153     /// in KVM.
get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo>154     fn get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo> {
155         let mut info = KvmProtectedVmInfo {
156             firmware_size: 0,
157             reserved: [0; 7],
158         };
159         // SAFETY:
160         // Safe because we allocated the struct and we know the kernel won't write beyond the end of
161         // the struct or keep a pointer to it.
162         unsafe {
163             self.enable_raw_capability(
164                 KvmCap::ArmProtectedVm,
165                 KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO,
166                 &[&mut info as *mut KvmProtectedVmInfo as u64, 0, 0, 0],
167             )
168         }?;
169         Ok(info)
170     }
171 
set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()>172     fn set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()> {
173         // SAFETY:
174         // Safe because none of the args are pointers.
175         unsafe {
176             self.enable_raw_capability(
177                 KvmCap::ArmProtectedVm,
178                 KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA,
179                 &[fw_addr.0, 0, 0, 0],
180             )
181         }
182     }
183 }
184 
185 #[repr(C)]
186 struct KvmProtectedVmInfo {
187     firmware_size: u64,
188     reserved: [u64; 7],
189 }
190 
191 impl VmAArch64 for KvmVm {
get_hypervisor(&self) -> &dyn Hypervisor192     fn get_hypervisor(&self) -> &dyn Hypervisor {
193         &self.kvm
194     }
195 
load_protected_vm_firmware( &mut self, fw_addr: GuestAddress, fw_max_size: u64, ) -> Result<()>196     fn load_protected_vm_firmware(
197         &mut self,
198         fw_addr: GuestAddress,
199         fw_max_size: u64,
200     ) -> Result<()> {
201         let info = self.get_protected_vm_info()?;
202         if info.firmware_size == 0 {
203             Err(Error::new(EINVAL))
204         } else {
205             if info.firmware_size > fw_max_size {
206                 return Err(Error::new(ENOMEM));
207             }
208             self.set_protected_vm_firmware_ipa(fw_addr)
209         }
210     }
211 
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>>212     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>> {
213         // create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
214         // or VcpuX86.  But both use the same implementation in KvmVm::create_kvm_vcpu.
215         Ok(Box::new(self.create_kvm_vcpu(id)?))
216     }
217 
create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()>218     fn create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()> {
219         Ok(())
220     }
221 
init_arch( &self, _payload_entry_address: GuestAddress, _fdt_address: GuestAddress, _fdt_size: usize, ) -> Result<()>222     fn init_arch(
223         &self,
224         _payload_entry_address: GuestAddress,
225         _fdt_address: GuestAddress,
226         _fdt_size: usize,
227     ) -> Result<()> {
228         Ok(())
229     }
230 
set_counter_offset(&self, offset: u64) -> Result<()>231     fn set_counter_offset(&self, offset: u64) -> Result<()> {
232         let off = kvm_arm_counter_offset {
233             counter_offset: offset,
234             reserved: 0,
235         };
236         // SAFETY: self.vm is a valid KVM fd
237         let ret = unsafe { ioctl_with_ref(&self.vm, KVM_ARM_SET_COUNTER_OFFSET, &off) };
238         if ret != 0 {
239             return errno_result();
240         }
241         Ok(())
242     }
243 }
244 
245 impl KvmVcpu {
246     /// Handles a `KVM_EXIT_SYSTEM_EVENT` with event type `KVM_SYSTEM_EVENT_RESET` with the given
247     /// event flags and returns the appropriate `VcpuExit` value for the run loop to handle.
248     ///
249     /// `event_flags` should be one or more of the `KVM_SYSTEM_EVENT_RESET_FLAG_*` values defined by
250     /// KVM.
system_event_reset(&self, event_flags: u64) -> Result<VcpuExit>251     pub fn system_event_reset(&self, event_flags: u64) -> Result<VcpuExit> {
252         if event_flags & u64::from(KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2) != 0 {
253             // Read reset_type and cookie from x1 and x2.
254             let reset_type = self.get_one_reg(VcpuRegAArch64::X(1))?;
255             let cookie = self.get_one_reg(VcpuRegAArch64::X(2))?;
256             warn!(
257                 "PSCI SYSTEM_RESET2 with reset_type={:#x}, cookie={:#x}",
258                 reset_type, cookie
259             );
260         }
261         Ok(VcpuExit::SystemEventReset)
262     }
263 
kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister>264     fn kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister> {
265         match reg {
266             VcpuRegAArch64::X(n @ 0..=30) => Ok(KvmVcpuRegister::X(n)),
267             VcpuRegAArch64::Sp => Ok(KvmVcpuRegister::Sp),
268             VcpuRegAArch64::Pc => Ok(KvmVcpuRegister::Pc),
269             VcpuRegAArch64::Pstate => Ok(KvmVcpuRegister::Pstate),
270             // Special case for multiplexed KVM registers
271             VcpuRegAArch64::System(AArch64SysRegId::CCSIDR_EL1) => {
272                 let csselr =
273                     self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::CSSELR_EL1))?;
274                 Ok(KvmVcpuRegister::Ccsidr(csselr as u8))
275             }
276             VcpuRegAArch64::System(sysreg) => Ok(KvmVcpuRegister::System(sysreg)),
277             _ => Err(Error::new(EINVAL)),
278         }
279     }
280 
set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()>281     fn set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()> {
282         self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
283     }
284 
set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()>285     fn set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()> {
286         self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
287     }
288 
set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()>289     fn set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()> {
290         self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
291     }
292 
set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()>293     fn set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()> {
294         assert_eq!(kvm_reg_id.size(), data.len());
295         let id: u64 = kvm_reg_id.into();
296         let onereg = kvm_one_reg {
297             id,
298             addr: (data.as_ptr() as usize)
299                 .try_into()
300                 .expect("can't represent usize as u64"),
301         };
302         // SAFETY:
303         // Safe because we allocated the struct and we know the kernel will read exactly the size of
304         // the struct.
305         let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG, &onereg) };
306         if ret == 0 {
307             Ok(())
308         } else {
309             errno_result()
310         }
311     }
312 
get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32>313     fn get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32> {
314         let mut bytes = 0u32.to_ne_bytes();
315         self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
316         Ok(u32::from_ne_bytes(bytes))
317     }
318 
get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64>319     fn get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64> {
320         let mut bytes = 0u64.to_ne_bytes();
321         self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
322         Ok(u64::from_ne_bytes(bytes))
323     }
324 
get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128>325     fn get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128> {
326         let mut bytes = 0u128.to_ne_bytes();
327         self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
328         Ok(u128::from_ne_bytes(bytes))
329     }
330 
get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()>331     fn get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()> {
332         assert_eq!(kvm_reg_id.size(), data.len());
333         let id: u64 = kvm_reg_id.into();
334         let onereg = kvm_one_reg {
335             id,
336             addr: (data.as_mut_ptr() as usize)
337                 .try_into()
338                 .expect("can't represent usize as u64"),
339         };
340 
341         // SAFETY:
342         // Safe because we allocated the struct and we know the kernel will read exactly the size of
343         // the struct.
344         let ret = unsafe { ioctl_with_ref(self, KVM_GET_ONE_REG, &onereg) };
345         if ret == 0 {
346             Ok(())
347         } else {
348             errno_result()
349         }
350     }
351 
352     #[inline]
handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit>353     pub(crate) fn handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit> {
354         // No aarch64-specific exits (for now)
355         None
356     }
357 
get_reg_list(&self) -> Result<Vec<u64>>358     fn get_reg_list(&self) -> Result<Vec<u64>> {
359         let mut kvm_reg_list = vec_with_array_field::<kvm_reg_list, u64>(AARCH64_MAX_REG_COUNT);
360         kvm_reg_list[0].n = AARCH64_MAX_REG_COUNT as u64;
361         let ret =
362             // SAFETY:
363             // We trust the kernel not to read/write past the end of kvm_reg_list struct.
364             unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST, &mut kvm_reg_list[0]) };
365         if ret < 0 {
366             return errno_result();
367         }
368         let n = kvm_reg_list[0].n;
369         assert!(
370             n <= AARCH64_MAX_REG_COUNT as u64,
371             "Get reg list returned more registers than possible"
372         );
373         // SAFETY:
374         // Mapping the unsized array to a slice is unsafe because the length isn't known.
375         // Providing the length used to create the struct guarantees the entire slice is valid.
376         let reg_list: &[u64] = unsafe { kvm_reg_list[0].reg.as_slice(n as usize) };
377         Ok(reg_list.to_vec())
378     }
379 
get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32>380     fn get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32> {
381         let mut all_features = 0;
382         let check_extension = |ext: u32| -> bool {
383             // SAFETY:
384             // Safe because we know self.vm is a real kvm fd
385             unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION, ext.into()) == 1 }
386         };
387 
388         for f in features {
389             let shift = match f {
390                 VcpuFeature::PsciV0_2 => KVM_ARM_VCPU_PSCI_0_2,
391                 VcpuFeature::PmuV3 => KVM_ARM_VCPU_PMU_V3,
392                 VcpuFeature::PowerOff => KVM_ARM_VCPU_POWER_OFF,
393                 VcpuFeature::Sve => {
394                     if !check_extension(KVM_CAP_ARM_SVE) {
395                         return Err(Error::new(ENOTSUP));
396                     }
397                     KVM_ARM_VCPU_SVE
398                 }
399             };
400             all_features |= 1 << shift;
401         }
402 
403         if check_extension(KVM_CAP_ARM_PTRAUTH_ADDRESS)
404             && check_extension(KVM_CAP_ARM_PTRAUTH_GENERIC)
405         {
406             all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS;
407             all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC;
408         }
409 
410         Ok(all_features)
411     }
412 
413     /// Finalize VCPU features setup. This does not affect features that do not make use of
414     /// finalize.
finalize(&self, features: u32) -> Result<()>415     fn finalize(&self, features: u32) -> Result<()> {
416         if (features & 1 << KVM_ARM_VCPU_SVE) != 0 {
417             // SAFETY:
418             // Safe because we know that our file is a Vcpu fd and we verify the return result.
419             let ret = unsafe {
420                 ioctl_with_ref(
421                     self,
422                     KVM_ARM_VCPU_FINALIZE,
423                     &std::os::raw::c_int::try_from(KVM_ARM_VCPU_SVE)
424                         .map_err(|_| Error::new(EINVAL))?,
425                 )
426             };
427             if ret != 0 {
428                 return errno_result();
429             }
430         }
431 
432         Ok(())
433     }
434 }
435 
436 /// KVM registers as used by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API
437 ///
438 /// These variants represent the registers as exposed by KVM which must be different from
439 /// `VcpuRegAArch64` to support registers which don't have an architectural definition such as
440 /// pseudo-registers (`Firmware`) and multiplexed registers (`Ccsidr`).
441 ///
442 /// See https://docs.kernel.org/virt/kvm/api.html for more details.
443 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
444 pub enum KvmVcpuRegister {
445     /// General Purpose Registers X0-X30
446     X(u8),
447     /// Stack Pointer
448     Sp,
449     /// Program Counter
450     Pc,
451     /// Processor State
452     Pstate,
453     /// FP & SIMD Registers V0-V31
454     V(u8),
455     /// KVM Firmware Pseudo-Registers
456     Firmware(u16),
457     /// System Registers
458     System(AArch64SysRegId),
459     /// CCSIDR_EL1 Demultiplexed by CSSELR_EL1
460     Ccsidr(u8),
461 }
462 
463 impl KvmVcpuRegister {
464     // Firmware pseudo-registers are part of the ARM KVM interface:
465     //     https://docs.kernel.org/virt/kvm/arm/hypercalls.html
466     pub const PSCI_VERSION: Self = Self::Firmware(0);
467     pub const SMCCC_ARCH_WORKAROUND_1: Self = Self::Firmware(1);
468     pub const SMCCC_ARCH_WORKAROUND_2: Self = Self::Firmware(2);
469     pub const SMCCC_ARCH_WORKAROUND_3: Self = Self::Firmware(3);
470 
471     /// Size of this register in bytes.
size(&self) -> usize472     pub fn size(&self) -> usize {
473         let kvm_reg = u64::from(*self);
474         let size_field = kvm_reg & KVM_REG_SIZE_MASK;
475         const REG_SIZE_U8: u64 = KVM_REG_SIZE_U8 as u64; // cast from bindgen's u32 to u64
476         match size_field {
477             REG_SIZE_U8 => 1,
478             KVM_REG_SIZE_U16 => 2,
479             KVM_REG_SIZE_U32 => 4,
480             KVM_REG_SIZE_U64 => 8,
481             KVM_REG_SIZE_U128 => 16,
482             KVM_REG_SIZE_U256 => 32,
483             KVM_REG_SIZE_U512 => 64,
484             KVM_REG_SIZE_U1024 => 128,
485             KVM_REG_SIZE_U2048 => 256,
486             // `From<KvmVcpuRegister> for u64` should always include a valid size.
487             _ => panic!("invalid size field {}", size_field),
488         }
489     }
490 }
491 
492 /// Gives the `u64` register ID expected by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API.
493 ///
494 /// See the KVM documentation of those ioctls for details about the format of the register ID.
495 impl From<KvmVcpuRegister> for u64 {
from(register: KvmVcpuRegister) -> Self496     fn from(register: KvmVcpuRegister) -> Self {
497         const fn reg(size: u64, kind: u64, fields: u64) -> u64 {
498             KVM_REG_ARM64 | size | kind | fields
499         }
500 
501         const fn kvm_regs_reg(size: u64, offset: usize) -> u64 {
502             let offset = offset / std::mem::size_of::<u32>();
503 
504             reg(size, KVM_REG_ARM_CORE as u64, offset as u64)
505         }
506 
507         const fn kvm_reg(offset: usize) -> u64 {
508             kvm_regs_reg(KVM_REG_SIZE_U64, offset)
509         }
510 
511         fn spsr_reg(spsr_reg: u32) -> u64 {
512             let n = std::mem::size_of::<u64>() * (spsr_reg as usize);
513             kvm_reg(offset_of!(kvm_regs, spsr) + n)
514         }
515 
516         fn user_pt_reg(offset: usize) -> u64 {
517             kvm_regs_reg(KVM_REG_SIZE_U64, offset_of!(kvm_regs, regs) + offset)
518         }
519 
520         fn user_fpsimd_state_reg(size: u64, offset: usize) -> u64 {
521             kvm_regs_reg(size, offset_of!(kvm_regs, fp_regs) + offset)
522         }
523 
524         const fn reg_u64(kind: u64, fields: u64) -> u64 {
525             reg(KVM_REG_SIZE_U64, kind, fields)
526         }
527 
528         const fn demux_reg(size: u64, index: u64, value: u64) -> u64 {
529             let index = (index << KVM_REG_ARM_DEMUX_ID_SHIFT) & (KVM_REG_ARM_DEMUX_ID_MASK as u64);
530             let value =
531                 (value << KVM_REG_ARM_DEMUX_VAL_SHIFT) & (KVM_REG_ARM_DEMUX_VAL_MASK as u64);
532 
533             reg(size, KVM_REG_ARM_DEMUX as u64, index | value)
534         }
535 
536         match register {
537             KvmVcpuRegister::X(n @ 0..=30) => {
538                 let n = std::mem::size_of::<u64>() * (n as usize);
539 
540                 user_pt_reg(offset_of!(user_pt_regs, regs) + n)
541             }
542             KvmVcpuRegister::X(n) => unreachable!("invalid KvmVcpuRegister Xn index: {n}"),
543             KvmVcpuRegister::Sp => user_pt_reg(offset_of!(user_pt_regs, sp)),
544             KvmVcpuRegister::Pc => user_pt_reg(offset_of!(user_pt_regs, pc)),
545             KvmVcpuRegister::Pstate => user_pt_reg(offset_of!(user_pt_regs, pstate)),
546             KvmVcpuRegister::V(n @ 0..=31) => {
547                 let n = std::mem::size_of::<u128>() * (n as usize);
548 
549                 user_fpsimd_state_reg(KVM_REG_SIZE_U128, offset_of!(user_fpsimd_state, vregs) + n)
550             }
551             KvmVcpuRegister::V(n) => unreachable!("invalid KvmVcpuRegister Vn index: {n}"),
552             KvmVcpuRegister::System(AArch64SysRegId::FPSR) => {
553                 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpsr))
554             }
555             KvmVcpuRegister::System(AArch64SysRegId::FPCR) => {
556                 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpcr))
557             }
558             KvmVcpuRegister::System(AArch64SysRegId::SPSR_EL1) => spsr_reg(KVM_SPSR_EL1),
559             KvmVcpuRegister::System(AArch64SysRegId::SPSR_abt) => spsr_reg(KVM_SPSR_ABT),
560             KvmVcpuRegister::System(AArch64SysRegId::SPSR_und) => spsr_reg(KVM_SPSR_UND),
561             KvmVcpuRegister::System(AArch64SysRegId::SPSR_irq) => spsr_reg(KVM_SPSR_IRQ),
562             KvmVcpuRegister::System(AArch64SysRegId::SPSR_fiq) => spsr_reg(KVM_SPSR_FIQ),
563             KvmVcpuRegister::System(AArch64SysRegId::SP_EL1) => {
564                 kvm_reg(offset_of!(kvm_regs, sp_el1))
565             }
566             KvmVcpuRegister::System(AArch64SysRegId::ELR_EL1) => {
567                 kvm_reg(offset_of!(kvm_regs, elr_el1))
568             }
569             // The KVM API accidentally swapped CNTV_CVAL_EL0 and CNTVCT_EL0.
570             KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0) => reg_u64(
571                 KVM_REG_ARM64_SYSREG.into(),
572                 AArch64SysRegId::CNTVCT_EL0.encoded().into(),
573             ),
574             KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0) => reg_u64(
575                 KVM_REG_ARM64_SYSREG.into(),
576                 AArch64SysRegId::CNTV_CVAL_EL0.encoded().into(),
577             ),
578             KvmVcpuRegister::System(sysreg) => {
579                 reg_u64(KVM_REG_ARM64_SYSREG.into(), sysreg.encoded().into())
580             }
581             KvmVcpuRegister::Firmware(n) => reg_u64(KVM_REG_ARM_FW.into(), n.into()),
582             KvmVcpuRegister::Ccsidr(n) => demux_reg(KVM_REG_SIZE_U32, 0, n.into()),
583         }
584     }
585 }
586 
587 impl VcpuAArch64 for KvmVcpu {
init(&self, features: &[VcpuFeature]) -> Result<()>588     fn init(&self, features: &[VcpuFeature]) -> Result<()> {
589         let mut kvi = kvm_vcpu_init {
590             target: KVM_ARM_TARGET_GENERIC_V8,
591             features: [0; 7],
592         };
593         // SAFETY:
594         // Safe because we allocated the struct and we know the kernel will write exactly the size
595         // of the struct.
596         let ret = unsafe { ioctl_with_mut_ref(&self.vm, KVM_ARM_PREFERRED_TARGET, &mut kvi) };
597         if ret != 0 {
598             return errno_result();
599         }
600 
601         kvi.features[0] = self.get_features_bitmap(features)?;
602         // SAFETY:
603         // Safe because we allocated the struct and we know the kernel will read exactly the size of
604         // the struct.
605         let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT, &kvi) };
606         if ret != 0 {
607             return errno_result();
608         }
609 
610         self.finalize(kvi.features[0])?;
611         Ok(())
612     }
613 
init_pmu(&self, irq: u64) -> Result<()>614     fn init_pmu(&self, irq: u64) -> Result<()> {
615         let irq_addr = &irq as *const u64;
616 
617         // The in-kernel PMU virtualization is initialized by setting the irq
618         // with KVM_ARM_VCPU_PMU_V3_IRQ and then by KVM_ARM_VCPU_PMU_V3_INIT.
619 
620         let irq_attr = kvm_device_attr {
621             group: KVM_ARM_VCPU_PMU_V3_CTRL,
622             attr: KVM_ARM_VCPU_PMU_V3_IRQ as u64,
623             addr: irq_addr as u64,
624             flags: 0,
625         };
626         // SAFETY:
627         // Safe because we allocated the struct and we know the kernel will read exactly the size of
628         // the struct.
629         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &irq_attr) };
630         if ret < 0 {
631             return errno_result();
632         }
633 
634         // SAFETY:
635         // Safe because we allocated the struct and we know the kernel will read exactly the size of
636         // the struct.
637         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &irq_attr) };
638         if ret < 0 {
639             return errno_result();
640         }
641 
642         let init_attr = kvm_device_attr {
643             group: KVM_ARM_VCPU_PMU_V3_CTRL,
644             attr: KVM_ARM_VCPU_PMU_V3_INIT as u64,
645             addr: 0,
646             flags: 0,
647         };
648         // SAFETY:
649         // Safe because we allocated the struct and we know the kernel will read exactly the size of
650         // the struct.
651         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &init_attr) };
652         if ret < 0 {
653             return errno_result();
654         }
655 
656         Ok(())
657     }
658 
has_pvtime_support(&self) -> bool659     fn has_pvtime_support(&self) -> bool {
660         // The in-kernel PV time structure is initialized by setting the base
661         // address with KVM_ARM_VCPU_PVTIME_IPA
662         let pvtime_attr = kvm_device_attr {
663             group: KVM_ARM_VCPU_PVTIME_CTRL,
664             attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
665             addr: 0,
666             flags: 0,
667         };
668         // SAFETY:
669         // Safe because we allocated the struct and we know the kernel will read exactly the size of
670         // the struct.
671         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &pvtime_attr) };
672         ret >= 0
673     }
674 
init_pvtime(&self, pvtime_ipa: u64) -> Result<()>675     fn init_pvtime(&self, pvtime_ipa: u64) -> Result<()> {
676         let pvtime_ipa_addr = &pvtime_ipa as *const u64;
677 
678         // The in-kernel PV time structure is initialized by setting the base
679         // address with KVM_ARM_VCPU_PVTIME_IPA
680         let pvtime_attr = kvm_device_attr {
681             group: KVM_ARM_VCPU_PVTIME_CTRL,
682             attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
683             addr: pvtime_ipa_addr as u64,
684             flags: 0,
685         };
686 
687         // SAFETY:
688         // Safe because we allocated the struct and we know the kernel will read exactly the size of
689         // the struct.
690         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &pvtime_attr) };
691         if ret < 0 {
692             return errno_result();
693         }
694 
695         Ok(())
696     }
697 
set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()>698     fn set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()> {
699         let kvm_reg = self.kvm_reg_id(reg_id)?;
700         match kvm_reg.size() {
701             4 => self.set_one_kvm_reg_u32(kvm_reg, data as u32),
702             8 => self.set_one_kvm_reg_u64(kvm_reg, data),
703             size => panic!("bad reg size {size}"),
704         }
705     }
706 
get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64>707     fn get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64> {
708         let kvm_reg = self.kvm_reg_id(reg_id)?;
709         match kvm_reg.size() {
710             4 => self.get_one_kvm_reg_u32(kvm_reg).map(u64::from),
711             8 => self.get_one_kvm_reg_u64(kvm_reg),
712             size => panic!("bad reg size {size}"),
713         }
714     }
715 
set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()>716     fn set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()> {
717         if reg_num > 31 {
718             return Err(Error::new(EINVAL));
719         }
720         self.set_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num), data)
721     }
722 
get_vector_reg(&self, reg_num: u8) -> Result<u128>723     fn get_vector_reg(&self, reg_num: u8) -> Result<u128> {
724         if reg_num > 31 {
725             return Err(Error::new(EINVAL));
726         }
727         self.get_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num))
728     }
729 
get_mpidr(&self) -> Result<u64>730     fn get_mpidr(&self) -> Result<u64> {
731         self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::MPIDR_EL1))
732     }
733 
get_psci_version(&self) -> Result<PsciVersion>734     fn get_psci_version(&self) -> Result<PsciVersion> {
735         let version = if let Ok(v) = self.get_one_kvm_reg_u64(KvmVcpuRegister::PSCI_VERSION) {
736             let v = u32::try_from(v).map_err(|_| Error::new(EINVAL))?;
737             PsciVersion::try_from(v)?
738         } else {
739             // When `KVM_REG_ARM_PSCI_VERSION` is not supported, we can return PSCI 0.2, as vCPU
740             // has been initialized with `KVM_ARM_VCPU_PSCI_0_2` successfully.
741             PSCI_0_2
742         };
743 
744         if version < PSCI_0_2 {
745             // PSCI v0.1 isn't currently supported for guests
746             Err(Error::new(ENOTSUP))
747         } else {
748             Ok(version)
749         }
750     }
751 
get_max_hw_bps(&self) -> Result<usize>752     fn get_max_hw_bps(&self) -> Result<usize> {
753         // SAFETY:
754         // Safe because the kernel will only return the result of the ioctl.
755         let max_hw_bps = unsafe {
756             ioctl_with_val(
757                 &self.vm,
758                 KVM_CHECK_EXTENSION,
759                 KVM_CAP_GUEST_DEBUG_HW_BPS.into(),
760             )
761         };
762 
763         if max_hw_bps < 0 {
764             errno_result()
765         } else {
766             Ok(max_hw_bps.try_into().expect("can't represent u64 as usize"))
767         }
768     }
769 
get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>>770     fn get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>> {
771         let reg_list = self.get_reg_list()?;
772         let cntvct_el0: u16 = AArch64SysRegId::CNTVCT_EL0.encoded();
773         let cntv_cval_el0: u16 = AArch64SysRegId::CNTV_CVAL_EL0.encoded();
774         let mut sys_regs = BTreeMap::new();
775         for reg in reg_list {
776             if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM64_SYSREG {
777                 let r = if reg as u16 == cntvct_el0 {
778                     AArch64SysRegId::CNTV_CVAL_EL0
779                 } else if reg as u16 == cntv_cval_el0 {
780                     AArch64SysRegId::CNTVCT_EL0
781                 } else {
782                     AArch64SysRegId::from_encoded((reg & 0xFFFF) as u16)
783                 };
784                 sys_regs.insert(r, self.get_one_reg(VcpuRegAArch64::System(r))?);
785                 // The register representations are tricky. Double check they round trip correctly.
786                 assert_eq!(
787                     Ok(reg),
788                     self.kvm_reg_id(VcpuRegAArch64::System(r)).map(u64::from),
789                 );
790             }
791         }
792         Ok(sys_regs)
793     }
794 
get_cache_info(&self) -> Result<BTreeMap<u8, u64>>795     fn get_cache_info(&self) -> Result<BTreeMap<u8, u64>> {
796         const KVM_REG_CCSIDR: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | (KVM_REG_ARM_DEMUX as u64);
797         const CCSIDR_INDEX_MASK: u64 = 0xFF;
798         let reg_list = self.get_reg_list()?;
799         let mut cache_info = BTreeMap::new();
800         for reg in reg_list {
801             if (reg & !CCSIDR_INDEX_MASK) == KVM_REG_CCSIDR {
802                 let idx = reg as u8;
803                 cache_info.insert(
804                     idx,
805                     self.get_one_kvm_reg_u32(KvmVcpuRegister::Ccsidr(idx))?
806                         .into(),
807                 );
808             }
809         }
810         Ok(cache_info)
811     }
812 
set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()>813     fn set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()> {
814         for (idx, val) in cache_info {
815             self.set_one_kvm_reg_u32(
816                 KvmVcpuRegister::Ccsidr(idx),
817                 val.try_into()
818                     .expect("trying to set a u32 register with a u64 value"),
819             )?;
820         }
821         Ok(())
822     }
823 
hypervisor_specific_snapshot(&self) -> anyhow::Result<AnySnapshot>824     fn hypervisor_specific_snapshot(&self) -> anyhow::Result<AnySnapshot> {
825         let reg_list = self.get_reg_list()?;
826         let mut firmware_regs = BTreeMap::new();
827         for reg in reg_list {
828             if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM_FW {
829                 firmware_regs.insert(
830                     reg as u16,
831                     self.get_one_kvm_reg_u64(KvmVcpuRegister::Firmware(reg as u16))?,
832                 );
833             }
834         }
835 
836         AnySnapshot::to_any(KvmSnapshot { firmware_regs })
837             .context("Failed to serialize KVM specific data")
838     }
839 
hypervisor_specific_restore(&self, data: AnySnapshot) -> anyhow::Result<()>840     fn hypervisor_specific_restore(&self, data: AnySnapshot) -> anyhow::Result<()> {
841         let deser: KvmSnapshot =
842             AnySnapshot::from_any(data).context("Failed to deserialize KVM specific data")?;
843         // TODO: need to set firmware registers before "create_fdt" is called, earlier in the
844         // stack.
845         for (id, val) in &deser.firmware_regs {
846             self.set_one_kvm_reg_u64(KvmVcpuRegister::Firmware(*id), *val)?;
847         }
848         Ok(())
849     }
850 
851     #[allow(clippy::unusual_byte_groupings)]
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>852     fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()> {
853         let mut dbg = kvm_guest_debug {
854             control: KVM_GUESTDBG_ENABLE,
855             ..Default::default()
856         };
857 
858         if enable_singlestep {
859             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
860         }
861         if !addrs.is_empty() {
862             dbg.control |= KVM_GUESTDBG_USE_HW;
863         }
864 
865         for (i, guest_addr) in addrs.iter().enumerate() {
866             // From the ARMv8 Architecture Reference Manual (DDI0487H.a) D31.3.{2,3}:
867             // When DBGBCR<n>_EL1.BT == 0b000x:
868             //      DBGBVR<n>_EL1, Bits [1:0]: Reserved, RES0
869             if guest_addr.0 & 0b11 != 0 {
870                 return Err(Error::new(EINVAL));
871             }
872             let sign_ext = 15;
873             //      DBGBVR<n>_EL1.RESS[14:0], bits [63:49]: Reserved, Sign extended
874             dbg.arch.dbg_bvr[i] = (((guest_addr.0 << sign_ext) as i64) >> sign_ext) as u64;
875             // DBGBCR<n>_EL1.BT, bits [23:20]: Breakpoint Type
876             //      0b0000: Unlinked instruction address match.
877             //              DBGBVR<n>_EL1 is the address of an instruction.
878             // DBGBCR<n>_EL1.BAS, bits [8:5]: Byte address select
879             //      0b1111: Use for A64 and A32 instructions
880             // DBGBCR<n>_EL1.PMC, bits [2:1]: Privilege mode control
881             //      0b11: EL1 & EL0
882             // DBGBCR<n>_EL1.E, bit [0]: Enable breakpoint
883             //      0b1: Enabled
884             dbg.arch.dbg_bcr[i] = 0b1111_11_1;
885         }
886 
887         // SAFETY:
888         // Safe because the kernel won't read past the end of the kvm_guest_debug struct.
889         let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG, &dbg) };
890         if ret == 0 {
891             Ok(())
892         } else {
893             errno_result()
894         }
895     }
896 }
897 
898 #[derive(Debug, Serialize, Deserialize)]
899 struct KvmSnapshot {
900     firmware_regs: BTreeMap<u16, u64>,
901 }
902 
903 // This function translates an IrqSrouceChip to the kvm u32 equivalent. It has a different
904 // implementation between x86_64 and aarch64 because the irqchip KVM constants are not defined on
905 // all architectures.
chip_to_kvm_chip(chip: IrqSourceChip) -> u32906 pub(super) fn chip_to_kvm_chip(chip: IrqSourceChip) -> u32 {
907     match chip {
908         // ARM does not have a constant for this, but the default routing
909         // setup seems to set this to 0
910         IrqSourceChip::Gic => 0,
911         _ => {
912             error!("Invalid IrqChipSource for ARM {:?}", chip);
913             0
914         }
915     }
916 }
917 
918 #[cfg(test)]
919 mod tests {
920     use super::*;
921 
922     #[test]
system_timer_register_mixup()923     fn system_timer_register_mixup() {
924         // Per https://docs.kernel.org/virt/kvm/api.html ARM64 system register encoding docs,
925         // KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT were accidentally defined backwards.
926         // Ensure the AArch64SysRegId to KvmVcpuRegister encoding maps these to the expected
927         // values.
928 
929         const KVM_REG_ARM_TIMER_CVAL: u64 = 0x6030_0000_0013_DF02;
930         let cntv_cval_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0);
931         assert_eq!(u64::from(cntv_cval_el0_kvm), KVM_REG_ARM_TIMER_CVAL);
932 
933         const KVM_REG_ARM_TIMER_CNT: u64 = 0x6030_0000_0013_DF1A;
934         let cntvct_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0);
935         assert_eq!(u64::from(cntvct_el0_kvm), KVM_REG_ARM_TIMER_CNT);
936     }
937 }
938