1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // We have u32 constants from bindings that are passed into archiitecture-dependent functions
6 // taking u32/64 parameters. So on 32 bit platforms we may have needless casts.
7 #![allow(clippy::useless_conversion)]
8
9 use std::collections::BTreeMap;
10 use std::convert::TryFrom;
11 use std::mem::offset_of;
12
13 use anyhow::Context;
14 use base::errno_result;
15 use base::error;
16 use base::ioctl_with_mut_ref;
17 use base::ioctl_with_ref;
18 use base::ioctl_with_val;
19 use base::warn;
20 use base::Error;
21 use base::Result;
22 use cros_fdt::Fdt;
23 use data_model::vec_with_array_field;
24 use kvm_sys::*;
25 use libc::EINVAL;
26 use libc::ENOMEM;
27 use libc::ENOTSUP;
28 use libc::ENXIO;
29 use serde::Deserialize;
30 use serde::Serialize;
31 use snapshot::AnySnapshot;
32 use vm_memory::GuestAddress;
33
34 use super::Config;
35 use super::Kvm;
36 use super::KvmCap;
37 use super::KvmVcpu;
38 use super::KvmVm;
39 use crate::AArch64SysRegId;
40 use crate::ClockState;
41 use crate::DeviceKind;
42 use crate::Hypervisor;
43 use crate::IrqSourceChip;
44 use crate::ProtectionType;
45 use crate::PsciVersion;
46 use crate::VcpuAArch64;
47 use crate::VcpuExit;
48 use crate::VcpuFeature;
49 use crate::VcpuRegAArch64;
50 use crate::VmAArch64;
51 use crate::VmCap;
52 use crate::AARCH64_MAX_REG_COUNT;
53 use crate::PSCI_0_2;
54
55 impl Kvm {
56 // Compute the machine type, which should be the IPA range for the VM
57 // Ideally, this would take a description of the memory map and return
58 // the closest machine type for this VM. Here, we just return the maximum
59 // the kernel support.
get_vm_type(&self, protection_type: ProtectionType) -> Result<u32>60 pub fn get_vm_type(&self, protection_type: ProtectionType) -> Result<u32> {
61 // SAFETY:
62 // Safe because we know self is a real kvm fd
63 let ipa_size = match unsafe {
64 ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into())
65 } {
66 // Not supported? Use 0 as the machine type, which implies 40bit IPA
67 ret if ret < 0 => 0,
68 ipa => ipa as u32,
69 };
70 let protection_flag = if protection_type.isolates_memory() {
71 KVM_VM_TYPE_ARM_PROTECTED
72 } else {
73 0
74 };
75 // Use the lower 8 bits representing the IPA space as the machine type
76 Ok((ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) | protection_flag)
77 }
78
79 /// Get the size of guest physical addresses (IPA) in bits.
get_guest_phys_addr_bits(&self) -> u880 pub fn get_guest_phys_addr_bits(&self) -> u8 {
81 // SAFETY:
82 // Safe because we know self is a real kvm fd
83 match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into()) } {
84 // Default physical address size is 40 bits if the extension is not supported.
85 ret if ret <= 0 => 40,
86 ipa => ipa as u8,
87 }
88 }
89 }
90
91 impl KvmVm {
92 /// Does platform specific initialization for the KvmVm.
init_arch(&self, cfg: &Config) -> Result<()>93 pub fn init_arch(&self, cfg: &Config) -> Result<()> {
94 #[cfg(target_arch = "aarch64")]
95 if cfg.mte {
96 // SAFETY:
97 // Safe because it does not take pointer arguments.
98 unsafe { self.enable_raw_capability(KvmCap::ArmMte, 0, &[0, 0, 0, 0])? }
99 }
100 #[cfg(not(target_arch = "aarch64"))]
101 {
102 // Suppress warning.
103 let _ = cfg;
104 }
105
106 Ok(())
107 }
108
109 /// Whether running under pKVM.
is_pkvm(&self) -> bool110 pub fn is_pkvm(&self) -> bool {
111 self.get_protected_vm_info().is_ok()
112 }
113
114 /// Checks if a particular `VmCap` is available, or returns None if arch-independent
115 /// Vm.check_capability() should handle the check.
check_capability_arch(&self, _c: VmCap) -> Option<bool>116 pub fn check_capability_arch(&self, _c: VmCap) -> Option<bool> {
117 None
118 }
119
120 /// Returns the params to pass to KVM_CREATE_DEVICE for a `kind` device on this arch, or None to
121 /// let the arch-independent `KvmVm::create_device` handle it.
get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device>122 pub fn get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device> {
123 match kind {
124 DeviceKind::ArmVgicV2 => Some(kvm_create_device {
125 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2,
126 fd: 0,
127 flags: 0,
128 }),
129 DeviceKind::ArmVgicV3 => Some(kvm_create_device {
130 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3,
131 fd: 0,
132 flags: 0,
133 }),
134 _ => None,
135 }
136 }
137
138 /// Arch-specific implementation of `Vm::get_pvclock`. Always returns an error on AArch64.
get_pvclock_arch(&self) -> Result<ClockState>139 pub fn get_pvclock_arch(&self) -> Result<ClockState> {
140 Err(Error::new(ENXIO))
141 }
142
143 /// Arch-specific implementation of `Vm::set_pvclock`. Always returns an error on AArch64.
set_pvclock_arch(&self, _state: &ClockState) -> Result<()>144 pub fn set_pvclock_arch(&self, _state: &ClockState) -> Result<()> {
145 Err(Error::new(ENXIO))
146 }
147
148 /// Get pKVM hypervisor details, e.g. the firmware size.
149 ///
150 /// Returns `Err` if not running under pKVM.
151 ///
152 /// Uses `KVM_ENABLE_CAP` internally, but it is only a getter, there should be no side effects
153 /// in KVM.
get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo>154 fn get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo> {
155 let mut info = KvmProtectedVmInfo {
156 firmware_size: 0,
157 reserved: [0; 7],
158 };
159 // SAFETY:
160 // Safe because we allocated the struct and we know the kernel won't write beyond the end of
161 // the struct or keep a pointer to it.
162 unsafe {
163 self.enable_raw_capability(
164 KvmCap::ArmProtectedVm,
165 KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO,
166 &[&mut info as *mut KvmProtectedVmInfo as u64, 0, 0, 0],
167 )
168 }?;
169 Ok(info)
170 }
171
set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()>172 fn set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()> {
173 // SAFETY:
174 // Safe because none of the args are pointers.
175 unsafe {
176 self.enable_raw_capability(
177 KvmCap::ArmProtectedVm,
178 KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA,
179 &[fw_addr.0, 0, 0, 0],
180 )
181 }
182 }
183 }
184
185 #[repr(C)]
186 struct KvmProtectedVmInfo {
187 firmware_size: u64,
188 reserved: [u64; 7],
189 }
190
191 impl VmAArch64 for KvmVm {
get_hypervisor(&self) -> &dyn Hypervisor192 fn get_hypervisor(&self) -> &dyn Hypervisor {
193 &self.kvm
194 }
195
load_protected_vm_firmware( &mut self, fw_addr: GuestAddress, fw_max_size: u64, ) -> Result<()>196 fn load_protected_vm_firmware(
197 &mut self,
198 fw_addr: GuestAddress,
199 fw_max_size: u64,
200 ) -> Result<()> {
201 let info = self.get_protected_vm_info()?;
202 if info.firmware_size == 0 {
203 Err(Error::new(EINVAL))
204 } else {
205 if info.firmware_size > fw_max_size {
206 return Err(Error::new(ENOMEM));
207 }
208 self.set_protected_vm_firmware_ipa(fw_addr)
209 }
210 }
211
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>>212 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>> {
213 // create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
214 // or VcpuX86. But both use the same implementation in KvmVm::create_kvm_vcpu.
215 Ok(Box::new(self.create_kvm_vcpu(id)?))
216 }
217
create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()>218 fn create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()> {
219 Ok(())
220 }
221
init_arch( &self, _payload_entry_address: GuestAddress, _fdt_address: GuestAddress, _fdt_size: usize, ) -> Result<()>222 fn init_arch(
223 &self,
224 _payload_entry_address: GuestAddress,
225 _fdt_address: GuestAddress,
226 _fdt_size: usize,
227 ) -> Result<()> {
228 Ok(())
229 }
230
set_counter_offset(&self, offset: u64) -> Result<()>231 fn set_counter_offset(&self, offset: u64) -> Result<()> {
232 let off = kvm_arm_counter_offset {
233 counter_offset: offset,
234 reserved: 0,
235 };
236 // SAFETY: self.vm is a valid KVM fd
237 let ret = unsafe { ioctl_with_ref(&self.vm, KVM_ARM_SET_COUNTER_OFFSET, &off) };
238 if ret != 0 {
239 return errno_result();
240 }
241 Ok(())
242 }
243 }
244
245 impl KvmVcpu {
246 /// Handles a `KVM_EXIT_SYSTEM_EVENT` with event type `KVM_SYSTEM_EVENT_RESET` with the given
247 /// event flags and returns the appropriate `VcpuExit` value for the run loop to handle.
248 ///
249 /// `event_flags` should be one or more of the `KVM_SYSTEM_EVENT_RESET_FLAG_*` values defined by
250 /// KVM.
system_event_reset(&self, event_flags: u64) -> Result<VcpuExit>251 pub fn system_event_reset(&self, event_flags: u64) -> Result<VcpuExit> {
252 if event_flags & u64::from(KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2) != 0 {
253 // Read reset_type and cookie from x1 and x2.
254 let reset_type = self.get_one_reg(VcpuRegAArch64::X(1))?;
255 let cookie = self.get_one_reg(VcpuRegAArch64::X(2))?;
256 warn!(
257 "PSCI SYSTEM_RESET2 with reset_type={:#x}, cookie={:#x}",
258 reset_type, cookie
259 );
260 }
261 Ok(VcpuExit::SystemEventReset)
262 }
263
kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister>264 fn kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister> {
265 match reg {
266 VcpuRegAArch64::X(n @ 0..=30) => Ok(KvmVcpuRegister::X(n)),
267 VcpuRegAArch64::Sp => Ok(KvmVcpuRegister::Sp),
268 VcpuRegAArch64::Pc => Ok(KvmVcpuRegister::Pc),
269 VcpuRegAArch64::Pstate => Ok(KvmVcpuRegister::Pstate),
270 // Special case for multiplexed KVM registers
271 VcpuRegAArch64::System(AArch64SysRegId::CCSIDR_EL1) => {
272 let csselr =
273 self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::CSSELR_EL1))?;
274 Ok(KvmVcpuRegister::Ccsidr(csselr as u8))
275 }
276 VcpuRegAArch64::System(sysreg) => Ok(KvmVcpuRegister::System(sysreg)),
277 _ => Err(Error::new(EINVAL)),
278 }
279 }
280
set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()>281 fn set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()> {
282 self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
283 }
284
set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()>285 fn set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()> {
286 self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
287 }
288
set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()>289 fn set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()> {
290 self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
291 }
292
set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()>293 fn set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()> {
294 assert_eq!(kvm_reg_id.size(), data.len());
295 let id: u64 = kvm_reg_id.into();
296 let onereg = kvm_one_reg {
297 id,
298 addr: (data.as_ptr() as usize)
299 .try_into()
300 .expect("can't represent usize as u64"),
301 };
302 // SAFETY:
303 // Safe because we allocated the struct and we know the kernel will read exactly the size of
304 // the struct.
305 let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG, &onereg) };
306 if ret == 0 {
307 Ok(())
308 } else {
309 errno_result()
310 }
311 }
312
get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32>313 fn get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32> {
314 let mut bytes = 0u32.to_ne_bytes();
315 self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
316 Ok(u32::from_ne_bytes(bytes))
317 }
318
get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64>319 fn get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64> {
320 let mut bytes = 0u64.to_ne_bytes();
321 self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
322 Ok(u64::from_ne_bytes(bytes))
323 }
324
get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128>325 fn get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128> {
326 let mut bytes = 0u128.to_ne_bytes();
327 self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
328 Ok(u128::from_ne_bytes(bytes))
329 }
330
get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()>331 fn get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()> {
332 assert_eq!(kvm_reg_id.size(), data.len());
333 let id: u64 = kvm_reg_id.into();
334 let onereg = kvm_one_reg {
335 id,
336 addr: (data.as_mut_ptr() as usize)
337 .try_into()
338 .expect("can't represent usize as u64"),
339 };
340
341 // SAFETY:
342 // Safe because we allocated the struct and we know the kernel will read exactly the size of
343 // the struct.
344 let ret = unsafe { ioctl_with_ref(self, KVM_GET_ONE_REG, &onereg) };
345 if ret == 0 {
346 Ok(())
347 } else {
348 errno_result()
349 }
350 }
351
352 #[inline]
handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit>353 pub(crate) fn handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit> {
354 // No aarch64-specific exits (for now)
355 None
356 }
357
get_reg_list(&self) -> Result<Vec<u64>>358 fn get_reg_list(&self) -> Result<Vec<u64>> {
359 let mut kvm_reg_list = vec_with_array_field::<kvm_reg_list, u64>(AARCH64_MAX_REG_COUNT);
360 kvm_reg_list[0].n = AARCH64_MAX_REG_COUNT as u64;
361 let ret =
362 // SAFETY:
363 // We trust the kernel not to read/write past the end of kvm_reg_list struct.
364 unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST, &mut kvm_reg_list[0]) };
365 if ret < 0 {
366 return errno_result();
367 }
368 let n = kvm_reg_list[0].n;
369 assert!(
370 n <= AARCH64_MAX_REG_COUNT as u64,
371 "Get reg list returned more registers than possible"
372 );
373 // SAFETY:
374 // Mapping the unsized array to a slice is unsafe because the length isn't known.
375 // Providing the length used to create the struct guarantees the entire slice is valid.
376 let reg_list: &[u64] = unsafe { kvm_reg_list[0].reg.as_slice(n as usize) };
377 Ok(reg_list.to_vec())
378 }
379
get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32>380 fn get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32> {
381 let mut all_features = 0;
382 let check_extension = |ext: u32| -> bool {
383 // SAFETY:
384 // Safe because we know self.vm is a real kvm fd
385 unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION, ext.into()) == 1 }
386 };
387
388 for f in features {
389 let shift = match f {
390 VcpuFeature::PsciV0_2 => KVM_ARM_VCPU_PSCI_0_2,
391 VcpuFeature::PmuV3 => KVM_ARM_VCPU_PMU_V3,
392 VcpuFeature::PowerOff => KVM_ARM_VCPU_POWER_OFF,
393 VcpuFeature::Sve => {
394 if !check_extension(KVM_CAP_ARM_SVE) {
395 return Err(Error::new(ENOTSUP));
396 }
397 KVM_ARM_VCPU_SVE
398 }
399 };
400 all_features |= 1 << shift;
401 }
402
403 if check_extension(KVM_CAP_ARM_PTRAUTH_ADDRESS)
404 && check_extension(KVM_CAP_ARM_PTRAUTH_GENERIC)
405 {
406 all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS;
407 all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC;
408 }
409
410 Ok(all_features)
411 }
412
413 /// Finalize VCPU features setup. This does not affect features that do not make use of
414 /// finalize.
finalize(&self, features: u32) -> Result<()>415 fn finalize(&self, features: u32) -> Result<()> {
416 if (features & 1 << KVM_ARM_VCPU_SVE) != 0 {
417 // SAFETY:
418 // Safe because we know that our file is a Vcpu fd and we verify the return result.
419 let ret = unsafe {
420 ioctl_with_ref(
421 self,
422 KVM_ARM_VCPU_FINALIZE,
423 &std::os::raw::c_int::try_from(KVM_ARM_VCPU_SVE)
424 .map_err(|_| Error::new(EINVAL))?,
425 )
426 };
427 if ret != 0 {
428 return errno_result();
429 }
430 }
431
432 Ok(())
433 }
434 }
435
436 /// KVM registers as used by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API
437 ///
438 /// These variants represent the registers as exposed by KVM which must be different from
439 /// `VcpuRegAArch64` to support registers which don't have an architectural definition such as
440 /// pseudo-registers (`Firmware`) and multiplexed registers (`Ccsidr`).
441 ///
442 /// See https://docs.kernel.org/virt/kvm/api.html for more details.
443 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
444 pub enum KvmVcpuRegister {
445 /// General Purpose Registers X0-X30
446 X(u8),
447 /// Stack Pointer
448 Sp,
449 /// Program Counter
450 Pc,
451 /// Processor State
452 Pstate,
453 /// FP & SIMD Registers V0-V31
454 V(u8),
455 /// KVM Firmware Pseudo-Registers
456 Firmware(u16),
457 /// System Registers
458 System(AArch64SysRegId),
459 /// CCSIDR_EL1 Demultiplexed by CSSELR_EL1
460 Ccsidr(u8),
461 }
462
463 impl KvmVcpuRegister {
464 // Firmware pseudo-registers are part of the ARM KVM interface:
465 // https://docs.kernel.org/virt/kvm/arm/hypercalls.html
466 pub const PSCI_VERSION: Self = Self::Firmware(0);
467 pub const SMCCC_ARCH_WORKAROUND_1: Self = Self::Firmware(1);
468 pub const SMCCC_ARCH_WORKAROUND_2: Self = Self::Firmware(2);
469 pub const SMCCC_ARCH_WORKAROUND_3: Self = Self::Firmware(3);
470
471 /// Size of this register in bytes.
size(&self) -> usize472 pub fn size(&self) -> usize {
473 let kvm_reg = u64::from(*self);
474 let size_field = kvm_reg & KVM_REG_SIZE_MASK;
475 const REG_SIZE_U8: u64 = KVM_REG_SIZE_U8 as u64; // cast from bindgen's u32 to u64
476 match size_field {
477 REG_SIZE_U8 => 1,
478 KVM_REG_SIZE_U16 => 2,
479 KVM_REG_SIZE_U32 => 4,
480 KVM_REG_SIZE_U64 => 8,
481 KVM_REG_SIZE_U128 => 16,
482 KVM_REG_SIZE_U256 => 32,
483 KVM_REG_SIZE_U512 => 64,
484 KVM_REG_SIZE_U1024 => 128,
485 KVM_REG_SIZE_U2048 => 256,
486 // `From<KvmVcpuRegister> for u64` should always include a valid size.
487 _ => panic!("invalid size field {}", size_field),
488 }
489 }
490 }
491
492 /// Gives the `u64` register ID expected by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API.
493 ///
494 /// See the KVM documentation of those ioctls for details about the format of the register ID.
495 impl From<KvmVcpuRegister> for u64 {
from(register: KvmVcpuRegister) -> Self496 fn from(register: KvmVcpuRegister) -> Self {
497 const fn reg(size: u64, kind: u64, fields: u64) -> u64 {
498 KVM_REG_ARM64 | size | kind | fields
499 }
500
501 const fn kvm_regs_reg(size: u64, offset: usize) -> u64 {
502 let offset = offset / std::mem::size_of::<u32>();
503
504 reg(size, KVM_REG_ARM_CORE as u64, offset as u64)
505 }
506
507 const fn kvm_reg(offset: usize) -> u64 {
508 kvm_regs_reg(KVM_REG_SIZE_U64, offset)
509 }
510
511 fn spsr_reg(spsr_reg: u32) -> u64 {
512 let n = std::mem::size_of::<u64>() * (spsr_reg as usize);
513 kvm_reg(offset_of!(kvm_regs, spsr) + n)
514 }
515
516 fn user_pt_reg(offset: usize) -> u64 {
517 kvm_regs_reg(KVM_REG_SIZE_U64, offset_of!(kvm_regs, regs) + offset)
518 }
519
520 fn user_fpsimd_state_reg(size: u64, offset: usize) -> u64 {
521 kvm_regs_reg(size, offset_of!(kvm_regs, fp_regs) + offset)
522 }
523
524 const fn reg_u64(kind: u64, fields: u64) -> u64 {
525 reg(KVM_REG_SIZE_U64, kind, fields)
526 }
527
528 const fn demux_reg(size: u64, index: u64, value: u64) -> u64 {
529 let index = (index << KVM_REG_ARM_DEMUX_ID_SHIFT) & (KVM_REG_ARM_DEMUX_ID_MASK as u64);
530 let value =
531 (value << KVM_REG_ARM_DEMUX_VAL_SHIFT) & (KVM_REG_ARM_DEMUX_VAL_MASK as u64);
532
533 reg(size, KVM_REG_ARM_DEMUX as u64, index | value)
534 }
535
536 match register {
537 KvmVcpuRegister::X(n @ 0..=30) => {
538 let n = std::mem::size_of::<u64>() * (n as usize);
539
540 user_pt_reg(offset_of!(user_pt_regs, regs) + n)
541 }
542 KvmVcpuRegister::X(n) => unreachable!("invalid KvmVcpuRegister Xn index: {n}"),
543 KvmVcpuRegister::Sp => user_pt_reg(offset_of!(user_pt_regs, sp)),
544 KvmVcpuRegister::Pc => user_pt_reg(offset_of!(user_pt_regs, pc)),
545 KvmVcpuRegister::Pstate => user_pt_reg(offset_of!(user_pt_regs, pstate)),
546 KvmVcpuRegister::V(n @ 0..=31) => {
547 let n = std::mem::size_of::<u128>() * (n as usize);
548
549 user_fpsimd_state_reg(KVM_REG_SIZE_U128, offset_of!(user_fpsimd_state, vregs) + n)
550 }
551 KvmVcpuRegister::V(n) => unreachable!("invalid KvmVcpuRegister Vn index: {n}"),
552 KvmVcpuRegister::System(AArch64SysRegId::FPSR) => {
553 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpsr))
554 }
555 KvmVcpuRegister::System(AArch64SysRegId::FPCR) => {
556 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpcr))
557 }
558 KvmVcpuRegister::System(AArch64SysRegId::SPSR_EL1) => spsr_reg(KVM_SPSR_EL1),
559 KvmVcpuRegister::System(AArch64SysRegId::SPSR_abt) => spsr_reg(KVM_SPSR_ABT),
560 KvmVcpuRegister::System(AArch64SysRegId::SPSR_und) => spsr_reg(KVM_SPSR_UND),
561 KvmVcpuRegister::System(AArch64SysRegId::SPSR_irq) => spsr_reg(KVM_SPSR_IRQ),
562 KvmVcpuRegister::System(AArch64SysRegId::SPSR_fiq) => spsr_reg(KVM_SPSR_FIQ),
563 KvmVcpuRegister::System(AArch64SysRegId::SP_EL1) => {
564 kvm_reg(offset_of!(kvm_regs, sp_el1))
565 }
566 KvmVcpuRegister::System(AArch64SysRegId::ELR_EL1) => {
567 kvm_reg(offset_of!(kvm_regs, elr_el1))
568 }
569 // The KVM API accidentally swapped CNTV_CVAL_EL0 and CNTVCT_EL0.
570 KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0) => reg_u64(
571 KVM_REG_ARM64_SYSREG.into(),
572 AArch64SysRegId::CNTVCT_EL0.encoded().into(),
573 ),
574 KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0) => reg_u64(
575 KVM_REG_ARM64_SYSREG.into(),
576 AArch64SysRegId::CNTV_CVAL_EL0.encoded().into(),
577 ),
578 KvmVcpuRegister::System(sysreg) => {
579 reg_u64(KVM_REG_ARM64_SYSREG.into(), sysreg.encoded().into())
580 }
581 KvmVcpuRegister::Firmware(n) => reg_u64(KVM_REG_ARM_FW.into(), n.into()),
582 KvmVcpuRegister::Ccsidr(n) => demux_reg(KVM_REG_SIZE_U32, 0, n.into()),
583 }
584 }
585 }
586
587 impl VcpuAArch64 for KvmVcpu {
init(&self, features: &[VcpuFeature]) -> Result<()>588 fn init(&self, features: &[VcpuFeature]) -> Result<()> {
589 let mut kvi = kvm_vcpu_init {
590 target: KVM_ARM_TARGET_GENERIC_V8,
591 features: [0; 7],
592 };
593 // SAFETY:
594 // Safe because we allocated the struct and we know the kernel will write exactly the size
595 // of the struct.
596 let ret = unsafe { ioctl_with_mut_ref(&self.vm, KVM_ARM_PREFERRED_TARGET, &mut kvi) };
597 if ret != 0 {
598 return errno_result();
599 }
600
601 kvi.features[0] = self.get_features_bitmap(features)?;
602 // SAFETY:
603 // Safe because we allocated the struct and we know the kernel will read exactly the size of
604 // the struct.
605 let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT, &kvi) };
606 if ret != 0 {
607 return errno_result();
608 }
609
610 self.finalize(kvi.features[0])?;
611 Ok(())
612 }
613
init_pmu(&self, irq: u64) -> Result<()>614 fn init_pmu(&self, irq: u64) -> Result<()> {
615 let irq_addr = &irq as *const u64;
616
617 // The in-kernel PMU virtualization is initialized by setting the irq
618 // with KVM_ARM_VCPU_PMU_V3_IRQ and then by KVM_ARM_VCPU_PMU_V3_INIT.
619
620 let irq_attr = kvm_device_attr {
621 group: KVM_ARM_VCPU_PMU_V3_CTRL,
622 attr: KVM_ARM_VCPU_PMU_V3_IRQ as u64,
623 addr: irq_addr as u64,
624 flags: 0,
625 };
626 // SAFETY:
627 // Safe because we allocated the struct and we know the kernel will read exactly the size of
628 // the struct.
629 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &irq_attr) };
630 if ret < 0 {
631 return errno_result();
632 }
633
634 // SAFETY:
635 // Safe because we allocated the struct and we know the kernel will read exactly the size of
636 // the struct.
637 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &irq_attr) };
638 if ret < 0 {
639 return errno_result();
640 }
641
642 let init_attr = kvm_device_attr {
643 group: KVM_ARM_VCPU_PMU_V3_CTRL,
644 attr: KVM_ARM_VCPU_PMU_V3_INIT as u64,
645 addr: 0,
646 flags: 0,
647 };
648 // SAFETY:
649 // Safe because we allocated the struct and we know the kernel will read exactly the size of
650 // the struct.
651 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &init_attr) };
652 if ret < 0 {
653 return errno_result();
654 }
655
656 Ok(())
657 }
658
has_pvtime_support(&self) -> bool659 fn has_pvtime_support(&self) -> bool {
660 // The in-kernel PV time structure is initialized by setting the base
661 // address with KVM_ARM_VCPU_PVTIME_IPA
662 let pvtime_attr = kvm_device_attr {
663 group: KVM_ARM_VCPU_PVTIME_CTRL,
664 attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
665 addr: 0,
666 flags: 0,
667 };
668 // SAFETY:
669 // Safe because we allocated the struct and we know the kernel will read exactly the size of
670 // the struct.
671 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &pvtime_attr) };
672 ret >= 0
673 }
674
init_pvtime(&self, pvtime_ipa: u64) -> Result<()>675 fn init_pvtime(&self, pvtime_ipa: u64) -> Result<()> {
676 let pvtime_ipa_addr = &pvtime_ipa as *const u64;
677
678 // The in-kernel PV time structure is initialized by setting the base
679 // address with KVM_ARM_VCPU_PVTIME_IPA
680 let pvtime_attr = kvm_device_attr {
681 group: KVM_ARM_VCPU_PVTIME_CTRL,
682 attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
683 addr: pvtime_ipa_addr as u64,
684 flags: 0,
685 };
686
687 // SAFETY:
688 // Safe because we allocated the struct and we know the kernel will read exactly the size of
689 // the struct.
690 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &pvtime_attr) };
691 if ret < 0 {
692 return errno_result();
693 }
694
695 Ok(())
696 }
697
set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()>698 fn set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()> {
699 let kvm_reg = self.kvm_reg_id(reg_id)?;
700 match kvm_reg.size() {
701 4 => self.set_one_kvm_reg_u32(kvm_reg, data as u32),
702 8 => self.set_one_kvm_reg_u64(kvm_reg, data),
703 size => panic!("bad reg size {size}"),
704 }
705 }
706
get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64>707 fn get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64> {
708 let kvm_reg = self.kvm_reg_id(reg_id)?;
709 match kvm_reg.size() {
710 4 => self.get_one_kvm_reg_u32(kvm_reg).map(u64::from),
711 8 => self.get_one_kvm_reg_u64(kvm_reg),
712 size => panic!("bad reg size {size}"),
713 }
714 }
715
set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()>716 fn set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()> {
717 if reg_num > 31 {
718 return Err(Error::new(EINVAL));
719 }
720 self.set_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num), data)
721 }
722
get_vector_reg(&self, reg_num: u8) -> Result<u128>723 fn get_vector_reg(&self, reg_num: u8) -> Result<u128> {
724 if reg_num > 31 {
725 return Err(Error::new(EINVAL));
726 }
727 self.get_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num))
728 }
729
get_mpidr(&self) -> Result<u64>730 fn get_mpidr(&self) -> Result<u64> {
731 self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::MPIDR_EL1))
732 }
733
get_psci_version(&self) -> Result<PsciVersion>734 fn get_psci_version(&self) -> Result<PsciVersion> {
735 let version = if let Ok(v) = self.get_one_kvm_reg_u64(KvmVcpuRegister::PSCI_VERSION) {
736 let v = u32::try_from(v).map_err(|_| Error::new(EINVAL))?;
737 PsciVersion::try_from(v)?
738 } else {
739 // When `KVM_REG_ARM_PSCI_VERSION` is not supported, we can return PSCI 0.2, as vCPU
740 // has been initialized with `KVM_ARM_VCPU_PSCI_0_2` successfully.
741 PSCI_0_2
742 };
743
744 if version < PSCI_0_2 {
745 // PSCI v0.1 isn't currently supported for guests
746 Err(Error::new(ENOTSUP))
747 } else {
748 Ok(version)
749 }
750 }
751
get_max_hw_bps(&self) -> Result<usize>752 fn get_max_hw_bps(&self) -> Result<usize> {
753 // SAFETY:
754 // Safe because the kernel will only return the result of the ioctl.
755 let max_hw_bps = unsafe {
756 ioctl_with_val(
757 &self.vm,
758 KVM_CHECK_EXTENSION,
759 KVM_CAP_GUEST_DEBUG_HW_BPS.into(),
760 )
761 };
762
763 if max_hw_bps < 0 {
764 errno_result()
765 } else {
766 Ok(max_hw_bps.try_into().expect("can't represent u64 as usize"))
767 }
768 }
769
get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>>770 fn get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>> {
771 let reg_list = self.get_reg_list()?;
772 let cntvct_el0: u16 = AArch64SysRegId::CNTVCT_EL0.encoded();
773 let cntv_cval_el0: u16 = AArch64SysRegId::CNTV_CVAL_EL0.encoded();
774 let mut sys_regs = BTreeMap::new();
775 for reg in reg_list {
776 if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM64_SYSREG {
777 let r = if reg as u16 == cntvct_el0 {
778 AArch64SysRegId::CNTV_CVAL_EL0
779 } else if reg as u16 == cntv_cval_el0 {
780 AArch64SysRegId::CNTVCT_EL0
781 } else {
782 AArch64SysRegId::from_encoded((reg & 0xFFFF) as u16)
783 };
784 sys_regs.insert(r, self.get_one_reg(VcpuRegAArch64::System(r))?);
785 // The register representations are tricky. Double check they round trip correctly.
786 assert_eq!(
787 Ok(reg),
788 self.kvm_reg_id(VcpuRegAArch64::System(r)).map(u64::from),
789 );
790 }
791 }
792 Ok(sys_regs)
793 }
794
get_cache_info(&self) -> Result<BTreeMap<u8, u64>>795 fn get_cache_info(&self) -> Result<BTreeMap<u8, u64>> {
796 const KVM_REG_CCSIDR: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | (KVM_REG_ARM_DEMUX as u64);
797 const CCSIDR_INDEX_MASK: u64 = 0xFF;
798 let reg_list = self.get_reg_list()?;
799 let mut cache_info = BTreeMap::new();
800 for reg in reg_list {
801 if (reg & !CCSIDR_INDEX_MASK) == KVM_REG_CCSIDR {
802 let idx = reg as u8;
803 cache_info.insert(
804 idx,
805 self.get_one_kvm_reg_u32(KvmVcpuRegister::Ccsidr(idx))?
806 .into(),
807 );
808 }
809 }
810 Ok(cache_info)
811 }
812
set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()>813 fn set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()> {
814 for (idx, val) in cache_info {
815 self.set_one_kvm_reg_u32(
816 KvmVcpuRegister::Ccsidr(idx),
817 val.try_into()
818 .expect("trying to set a u32 register with a u64 value"),
819 )?;
820 }
821 Ok(())
822 }
823
hypervisor_specific_snapshot(&self) -> anyhow::Result<AnySnapshot>824 fn hypervisor_specific_snapshot(&self) -> anyhow::Result<AnySnapshot> {
825 let reg_list = self.get_reg_list()?;
826 let mut firmware_regs = BTreeMap::new();
827 for reg in reg_list {
828 if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM_FW {
829 firmware_regs.insert(
830 reg as u16,
831 self.get_one_kvm_reg_u64(KvmVcpuRegister::Firmware(reg as u16))?,
832 );
833 }
834 }
835
836 AnySnapshot::to_any(KvmSnapshot { firmware_regs })
837 .context("Failed to serialize KVM specific data")
838 }
839
hypervisor_specific_restore(&self, data: AnySnapshot) -> anyhow::Result<()>840 fn hypervisor_specific_restore(&self, data: AnySnapshot) -> anyhow::Result<()> {
841 let deser: KvmSnapshot =
842 AnySnapshot::from_any(data).context("Failed to deserialize KVM specific data")?;
843 // TODO: need to set firmware registers before "create_fdt" is called, earlier in the
844 // stack.
845 for (id, val) in &deser.firmware_regs {
846 self.set_one_kvm_reg_u64(KvmVcpuRegister::Firmware(*id), *val)?;
847 }
848 Ok(())
849 }
850
851 #[allow(clippy::unusual_byte_groupings)]
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>852 fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()> {
853 let mut dbg = kvm_guest_debug {
854 control: KVM_GUESTDBG_ENABLE,
855 ..Default::default()
856 };
857
858 if enable_singlestep {
859 dbg.control |= KVM_GUESTDBG_SINGLESTEP;
860 }
861 if !addrs.is_empty() {
862 dbg.control |= KVM_GUESTDBG_USE_HW;
863 }
864
865 for (i, guest_addr) in addrs.iter().enumerate() {
866 // From the ARMv8 Architecture Reference Manual (DDI0487H.a) D31.3.{2,3}:
867 // When DBGBCR<n>_EL1.BT == 0b000x:
868 // DBGBVR<n>_EL1, Bits [1:0]: Reserved, RES0
869 if guest_addr.0 & 0b11 != 0 {
870 return Err(Error::new(EINVAL));
871 }
872 let sign_ext = 15;
873 // DBGBVR<n>_EL1.RESS[14:0], bits [63:49]: Reserved, Sign extended
874 dbg.arch.dbg_bvr[i] = (((guest_addr.0 << sign_ext) as i64) >> sign_ext) as u64;
875 // DBGBCR<n>_EL1.BT, bits [23:20]: Breakpoint Type
876 // 0b0000: Unlinked instruction address match.
877 // DBGBVR<n>_EL1 is the address of an instruction.
878 // DBGBCR<n>_EL1.BAS, bits [8:5]: Byte address select
879 // 0b1111: Use for A64 and A32 instructions
880 // DBGBCR<n>_EL1.PMC, bits [2:1]: Privilege mode control
881 // 0b11: EL1 & EL0
882 // DBGBCR<n>_EL1.E, bit [0]: Enable breakpoint
883 // 0b1: Enabled
884 dbg.arch.dbg_bcr[i] = 0b1111_11_1;
885 }
886
887 // SAFETY:
888 // Safe because the kernel won't read past the end of the kvm_guest_debug struct.
889 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG, &dbg) };
890 if ret == 0 {
891 Ok(())
892 } else {
893 errno_result()
894 }
895 }
896 }
897
898 #[derive(Debug, Serialize, Deserialize)]
899 struct KvmSnapshot {
900 firmware_regs: BTreeMap<u16, u64>,
901 }
902
903 // This function translates an IrqSrouceChip to the kvm u32 equivalent. It has a different
904 // implementation between x86_64 and aarch64 because the irqchip KVM constants are not defined on
905 // all architectures.
chip_to_kvm_chip(chip: IrqSourceChip) -> u32906 pub(super) fn chip_to_kvm_chip(chip: IrqSourceChip) -> u32 {
907 match chip {
908 // ARM does not have a constant for this, but the default routing
909 // setup seems to set this to 0
910 IrqSourceChip::Gic => 0,
911 _ => {
912 error!("Invalid IrqChipSource for ARM {:?}", chip);
913 0
914 }
915 }
916 }
917
918 #[cfg(test)]
919 mod tests {
920 use super::*;
921
922 #[test]
system_timer_register_mixup()923 fn system_timer_register_mixup() {
924 // Per https://docs.kernel.org/virt/kvm/api.html ARM64 system register encoding docs,
925 // KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT were accidentally defined backwards.
926 // Ensure the AArch64SysRegId to KvmVcpuRegister encoding maps these to the expected
927 // values.
928
929 const KVM_REG_ARM_TIMER_CVAL: u64 = 0x6030_0000_0013_DF02;
930 let cntv_cval_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0);
931 assert_eq!(u64::from(cntv_cval_el0_kvm), KVM_REG_ARM_TIMER_CVAL);
932
933 const KVM_REG_ARM_TIMER_CNT: u64 = 0x6030_0000_0013_DF1A;
934 let cntvct_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0);
935 assert_eq!(u64::from(cntvct_el0_kvm), KVM_REG_ARM_TIMER_CNT);
936 }
937 }
938