1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::HashMap;
6 use std::ffi::CString;
7 use std::fs::File;
8 use std::fs::OpenOptions;
9 use std::io;
10 use std::mem;
11 use std::os::raw::c_ulong;
12 use std::os::unix::prelude::FileExt;
13 use std::path::Path;
14 use std::path::PathBuf;
15 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
16 use std::ptr::addr_of_mut;
17 use std::slice;
18 use std::sync::Arc;
19
20 use base::error;
21 use base::ioctl;
22 use base::ioctl_with_mut_ptr;
23 use base::ioctl_with_mut_ref;
24 use base::ioctl_with_ptr;
25 use base::ioctl_with_ref;
26 use base::ioctl_with_val;
27 use base::warn;
28 use base::AsRawDescriptor;
29 use base::Error;
30 use base::Event;
31 use base::FromRawDescriptor;
32 use base::RawDescriptor;
33 use base::SafeDescriptor;
34 use cfg_if::cfg_if;
35 use data_model::vec_with_array_field;
36 use hypervisor::DeviceKind;
37 use hypervisor::Vm;
38 use once_cell::sync::OnceCell;
39 use rand::seq::index::sample;
40 use rand::thread_rng;
41 use remain::sorted;
42 use resources::address_allocator::AddressAllocator;
43 use resources::AddressRange;
44 use resources::Alloc;
45 use resources::Error as ResourcesError;
46 use sync::Mutex;
47 use thiserror::Error;
48 use vfio_sys::vfio::vfio_acpi_dsm;
49 use vfio_sys::vfio::VFIO_IRQ_SET_DATA_BOOL;
50 use vfio_sys::*;
51 use zerocopy::FromBytes;
52 use zerocopy::Immutable;
53 use zerocopy::IntoBytes;
54
55 use crate::IommuDevType;
56
57 #[sorted]
58 #[derive(Error, Debug)]
59 pub enum VfioError {
60 #[error("failed to duplicate VfioContainer")]
61 ContainerDupError,
62 #[error("failed to set container's IOMMU driver type as {0:?}: {1}")]
63 ContainerSetIOMMU(IommuType, Error),
64 #[error("failed to create KVM vfio device: {0}")]
65 CreateVfioKvmDevice(Error),
66 #[error("failed to get Group Status: {0}")]
67 GetGroupStatus(Error),
68 #[error("failed to get vfio device fd: {0}")]
69 GroupGetDeviceFD(Error),
70 #[error("failed to add vfio group into vfio container: {0}")]
71 GroupSetContainer(Error),
72 #[error("group is inviable")]
73 GroupViable,
74 #[error("invalid region index: {0}")]
75 InvalidIndex(usize),
76 #[error("invalid operation")]
77 InvalidOperation,
78 #[error("invalid file path")]
79 InvalidPath,
80 #[error("failed to add guest memory map into iommu table: {0}")]
81 IommuDmaMap(Error),
82 #[error("failed to remove guest memory map from iommu table: {0}")]
83 IommuDmaUnmap(Error),
84 #[error("failed to get IOMMU cap info from host")]
85 IommuGetCapInfo,
86 #[error("failed to get IOMMU info from host: {0}")]
87 IommuGetInfo(Error),
88 #[error("failed to attach device to pKVM pvIOMMU: {0}")]
89 KvmPviommuSetConfig(Error),
90 #[error("failed to set KVM vfio device's attribute: {0}")]
91 KvmSetDeviceAttr(Error),
92 #[error("AddressAllocator is unavailable")]
93 NoRescAlloc,
94 #[error("failed to open /dev/vfio/vfio container: {0}")]
95 OpenContainer(io::Error),
96 #[error("failed to open {1} group: {0}")]
97 OpenGroup(io::Error, String),
98 #[error("failed to read {1} link: {0}")]
99 ReadLink(io::Error, PathBuf),
100 #[error("resources error: {0}")]
101 Resources(ResourcesError),
102 #[error("unknown vfio device type (flags: {0:#x})")]
103 UnknownDeviceType(u32),
104 #[error("failed to call vfio device's ACPI _DSM: {0}")]
105 VfioAcpiDsm(Error),
106 #[error("failed to disable vfio deviece's acpi notification: {0}")]
107 VfioAcpiNotificationDisable(Error),
108 #[error("failed to enable vfio deviece's acpi notification: {0}")]
109 VfioAcpiNotificationEnable(Error),
110 #[error("failed to test vfio deviece's acpi notification: {0}")]
111 VfioAcpiNotificationTest(Error),
112 #[error(
113 "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
114 )]
115 VfioApiVersion,
116 #[error("failed to get vfio device's info or info doesn't match: {0}")]
117 VfioDeviceGetInfo(Error),
118 #[error("failed to get vfio device's region info: {0}")]
119 VfioDeviceGetRegionInfo(Error),
120 #[error("container doesn't support IOMMU driver type {0:?}")]
121 VfioIommuSupport(IommuType),
122 #[error("failed to disable vfio deviece's irq: {0}")]
123 VfioIrqDisable(Error),
124 #[error("failed to enable vfio deviece's irq: {0}")]
125 VfioIrqEnable(Error),
126 #[error("failed to mask vfio deviece's irq: {0}")]
127 VfioIrqMask(Error),
128 #[error("failed to unmask vfio deviece's irq: {0}")]
129 VfioIrqUnmask(Error),
130 #[error("failed to enter vfio deviece's low power state: {0}")]
131 VfioPmLowPowerEnter(Error),
132 #[error("failed to exit vfio deviece's low power state: {0}")]
133 VfioPmLowPowerExit(Error),
134 }
135
136 type Result<T> = std::result::Result<T, VfioError>;
137
get_error() -> Error138 fn get_error() -> Error {
139 Error::last()
140 }
141
142 static KVM_VFIO_FILE: OnceCell<SafeDescriptor> = OnceCell::new();
143
144 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
145 pub enum VfioDeviceType {
146 Pci,
147 Platform,
148 }
149
150 enum KvmVfioGroupOps {
151 Add,
152 Delete,
153 }
154
155 #[derive(Debug)]
156 pub struct KvmVfioPviommu {
157 file: File,
158 }
159
160 impl KvmVfioPviommu {
new(vm: &impl Vm) -> Result<Self>161 pub fn new(vm: &impl Vm) -> Result<Self> {
162 cfg_if! {
163 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
164 let file = Self::ioctl_kvm_dev_vfio_pviommu_attach(vm)?;
165
166 Ok(Self { file })
167 } else {
168 let _ = vm;
169 unimplemented!()
170 }
171 }
172 }
173
attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()>174 pub fn attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()> {
175 cfg_if! {
176 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
177 self.ioctl_kvm_pviommu_set_config(device, sid_idx, vsid)
178 } else {
179 let _ = device;
180 let _ = sid_idx;
181 let _ = vsid;
182 unimplemented!()
183 }
184 }
185 }
186
id(&self) -> u32187 pub fn id(&self) -> u32 {
188 let fd = self.as_raw_descriptor();
189 // Guests identify pvIOMMUs to the hypervisor using the corresponding VMM FDs.
190 fd.try_into().unwrap()
191 }
192
get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32>193 pub fn get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32> {
194 cfg_if! {
195 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
196 let info = Self::ioctl_kvm_dev_vfio_pviommu_get_info(vm, device)?;
197
198 Ok(info.nr_sids)
199 } else {
200 let _ = vm;
201 let _ = device;
202 unimplemented!()
203 }
204 }
205 }
206
207 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File>208 fn ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File> {
209 let kvm_vfio_file = KVM_VFIO_FILE
210 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
211 .map_err(VfioError::CreateVfioKvmDevice)?;
212
213 let vfio_dev_attr = kvm_sys::kvm_device_attr {
214 flags: 0,
215 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
216 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_ATTACH as u64,
217 addr: 0,
218 };
219
220 // SAFETY:
221 // Safe as we are the owner of vfio_dev_attr, which is valid.
222 let ret =
223 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
224
225 if ret < 0 {
226 Err(VfioError::KvmSetDeviceAttr(get_error()))
227 } else {
228 // SAFETY: Safe as we verify the return value.
229 Ok(unsafe { File::from_raw_descriptor(ret) })
230 }
231 }
232
233 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>( &self, device: &T, sid_idx: u32, vsid: u32, ) -> Result<()>234 fn ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>(
235 &self,
236 device: &T,
237 sid_idx: u32,
238 vsid: u32,
239 ) -> Result<()> {
240 let config = kvm_sys::kvm_vfio_iommu_config {
241 device_fd: device.as_raw_descriptor(),
242 sid_idx,
243 vsid,
244 };
245
246 // SAFETY:
247 // Safe as we are the owner of device and config which are valid, and we verify the return
248 // value.
249 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_PVIOMMU_SET_CONFIG, &config) };
250
251 if ret < 0 {
252 Err(VfioError::KvmPviommuSetConfig(get_error()))
253 } else {
254 Ok(())
255 }
256 }
257
258 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>( vm: &impl Vm, device: &T, ) -> Result<kvm_sys::kvm_vfio_iommu_info>259 fn ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>(
260 vm: &impl Vm,
261 device: &T,
262 ) -> Result<kvm_sys::kvm_vfio_iommu_info> {
263 let kvm_vfio_file = KVM_VFIO_FILE
264 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
265 .map_err(VfioError::CreateVfioKvmDevice)?;
266
267 let mut info = kvm_sys::kvm_vfio_iommu_info {
268 device_fd: device.as_raw_descriptor(),
269 nr_sids: 0,
270 };
271
272 let vfio_dev_attr = kvm_sys::kvm_device_attr {
273 flags: 0,
274 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
275 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_GET_INFO as u64,
276 addr: addr_of_mut!(info) as usize as u64,
277 };
278
279 // SAFETY:
280 // Safe as we are the owner of vfio_dev_attr, which is valid.
281 let ret =
282 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
283
284 if ret < 0 {
285 Err(VfioError::KvmSetDeviceAttr(get_error()))
286 } else {
287 Ok(info)
288 }
289 }
290 }
291
292 impl AsRawDescriptor for KvmVfioPviommu {
as_raw_descriptor(&self) -> RawDescriptor293 fn as_raw_descriptor(&self) -> RawDescriptor {
294 self.file.as_raw_descriptor()
295 }
296 }
297
298 #[repr(u32)]
299 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
300 pub enum IommuType {
301 Type1V2 = VFIO_TYPE1v2_IOMMU,
302 PkvmPviommu = VFIO_PKVM_PVIOMMU,
303 // ChromeOS specific vfio_iommu_type1 implementation that is optimized for
304 // small, dynamic mappings. For clients which create large, relatively
305 // static mappings, Type1V2 is still preferred.
306 //
307 // See crrev.com/c/3593528 for the implementation.
308 Type1ChromeOS = 100001,
309 }
310
311 /// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table
312 pub struct VfioContainer {
313 container: File,
314 groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
315 iommu_type: Option<IommuType>,
316 }
317
extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T> where T: FromBytes,318 fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T>
319 where
320 T: FromBytes,
321 {
322 Some(T::read_from_prefix(bytes.get(offset..)?).ok()?.0)
323 }
324
325 const VFIO_API_VERSION: u8 = 0;
326 impl VfioContainer {
new() -> Result<Self>327 pub fn new() -> Result<Self> {
328 let container = OpenOptions::new()
329 .read(true)
330 .write(true)
331 .open("/dev/vfio/vfio")
332 .map_err(VfioError::OpenContainer)?;
333
334 Self::new_from_container(container)
335 }
336
337 // Construct a VfioContainer from an exist container file.
new_from_container(container: File) -> Result<Self>338 pub fn new_from_container(container: File) -> Result<Self> {
339 // SAFETY:
340 // Safe as file is vfio container descriptor and ioctl is defined by kernel.
341 let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION) };
342 if version as u8 != VFIO_API_VERSION {
343 return Err(VfioError::VfioApiVersion);
344 }
345
346 Ok(VfioContainer {
347 container,
348 groups: HashMap::new(),
349 iommu_type: None,
350 })
351 }
352
is_group_set(&self, group_id: u32) -> bool353 fn is_group_set(&self, group_id: u32) -> bool {
354 self.groups.contains_key(&group_id)
355 }
356
check_extension(&self, val: IommuType) -> bool357 fn check_extension(&self, val: IommuType) -> bool {
358 // SAFETY:
359 // Safe as file is vfio container and make sure val is valid.
360 let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION, val as c_ulong) };
361 ret != 0
362 }
363
set_iommu(&mut self, val: IommuType) -> i32364 fn set_iommu(&mut self, val: IommuType) -> i32 {
365 // SAFETY:
366 // Safe as file is vfio container and make sure val is valid.
367 unsafe { ioctl_with_val(self, VFIO_SET_IOMMU, val as c_ulong) }
368 }
369
set_iommu_checked(&mut self, val: IommuType) -> Result<()>370 fn set_iommu_checked(&mut self, val: IommuType) -> Result<()> {
371 if !self.check_extension(val) {
372 Err(VfioError::VfioIommuSupport(val))
373 } else if self.set_iommu(val) != 0 {
374 Err(VfioError::ContainerSetIOMMU(val, get_error()))
375 } else {
376 self.iommu_type = Some(val);
377 Ok(())
378 }
379 }
380
381 /// # Safety
382 ///
383 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>384 pub unsafe fn vfio_dma_map(
385 &self,
386 iova: u64,
387 size: u64,
388 user_addr: u64,
389 write_en: bool,
390 ) -> Result<()> {
391 match self
392 .iommu_type
393 .expect("vfio_dma_map called before configuring IOMMU")
394 {
395 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
396 self.vfio_iommu_type1_dma_map(iova, size, user_addr, write_en)
397 }
398 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
399 }
400 }
401
402 /// # Safety
403 ///
404 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_iommu_type1_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>405 unsafe fn vfio_iommu_type1_dma_map(
406 &self,
407 iova: u64,
408 size: u64,
409 user_addr: u64,
410 write_en: bool,
411 ) -> Result<()> {
412 let mut dma_map = vfio_iommu_type1_dma_map {
413 argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
414 flags: VFIO_DMA_MAP_FLAG_READ,
415 vaddr: user_addr,
416 iova,
417 size,
418 };
419
420 if write_en {
421 dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
422 }
423
424 let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA, &dma_map);
425 if ret != 0 {
426 return Err(VfioError::IommuDmaMap(get_error()));
427 }
428
429 Ok(())
430 }
431
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>432 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
433 match self
434 .iommu_type
435 .expect("vfio_dma_unmap called before configuring IOMMU")
436 {
437 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
438 self.vfio_iommu_type1_dma_unmap(iova, size)
439 }
440 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
441 }
442 }
443
vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()>444 fn vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
445 let mut dma_unmap = vfio_iommu_type1_dma_unmap {
446 argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
447 flags: 0,
448 iova,
449 size,
450 ..Default::default()
451 };
452
453 // SAFETY:
454 // Safe as file is vfio container, dma_unmap is constructed by us, and
455 // we check the return value
456 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA, &mut dma_unmap) };
457 if ret != 0 || dma_unmap.size != size {
458 return Err(VfioError::IommuDmaUnmap(get_error()));
459 }
460
461 Ok(())
462 }
463
vfio_get_iommu_page_size_mask(&self) -> Result<u64>464 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
465 match self
466 .iommu_type
467 .expect("vfio_get_iommu_page_size_mask called before configuring IOMMU")
468 {
469 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
470 self.vfio_iommu_type1_get_iommu_page_size_mask()
471 }
472 IommuType::PkvmPviommu => Ok(0),
473 }
474 }
475
vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64>476 fn vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64> {
477 let mut iommu_info = vfio_iommu_type1_info {
478 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
479 flags: 0,
480 iova_pgsizes: 0,
481 ..Default::default()
482 };
483
484 // SAFETY:
485 // Safe as file is vfio container, iommu_info has valid values,
486 // and we check the return value
487 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info) };
488 if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
489 return Err(VfioError::IommuGetInfo(get_error()));
490 }
491
492 Ok(iommu_info.iova_pgsizes)
493 }
494
vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>>495 pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
496 match self
497 .iommu_type
498 .expect("vfio_iommu_iova_get_iova_ranges called before configuring IOMMU")
499 {
500 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
501 self.vfio_iommu_type1_get_iova_ranges()
502 }
503 IommuType::PkvmPviommu => Ok(Vec::new()),
504 }
505 }
506
vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>>507 fn vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
508 // Query the buffer size needed fetch the capabilities.
509 let mut iommu_info_argsz = vfio_iommu_type1_info {
510 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
511 flags: 0,
512 iova_pgsizes: 0,
513 ..Default::default()
514 };
515
516 // SAFETY:
517 // Safe as file is vfio container, iommu_info_argsz has valid values,
518 // and we check the return value
519 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info_argsz) };
520 if ret != 0 {
521 return Err(VfioError::IommuGetInfo(get_error()));
522 }
523
524 if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
525 return Err(VfioError::IommuGetCapInfo);
526 }
527
528 let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
529 iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
530 );
531 iommu_info[0].argsz = iommu_info_argsz.argsz;
532 let ret =
533 // SAFETY:
534 // Safe as file is vfio container, iommu_info has valid values,
535 // and we check the return value
536 unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO, iommu_info.as_mut_ptr()) };
537 if ret != 0 {
538 return Err(VfioError::IommuGetInfo(get_error()));
539 }
540
541 // SAFETY:
542 // Safe because we initialized iommu_info with enough space, u8 has less strict
543 // alignment, and since it will no longer be mutated.
544 let info_bytes = unsafe {
545 std::slice::from_raw_parts(
546 iommu_info.as_ptr() as *const u8,
547 iommu_info_argsz.argsz as usize,
548 )
549 };
550
551 if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
552 return Err(VfioError::IommuGetCapInfo);
553 }
554
555 let mut offset = iommu_info[0].cap_offset as usize;
556 while offset != 0 {
557 let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset)
558 .ok_or(VfioError::IommuGetCapInfo)?;
559
560 if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
561 let iova_header =
562 extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
563 info_bytes, offset,
564 )
565 .ok_or(VfioError::IommuGetCapInfo)?;
566 let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
567 let mut ret = Vec::new();
568 for i in 0..iova_header.nr_iovas {
569 ret.push(
570 extract_vfio_struct::<vfio_iova_range>(
571 info_bytes,
572 range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
573 )
574 .ok_or(VfioError::IommuGetCapInfo)?,
575 );
576 }
577 return Ok(ret
578 .iter()
579 .map(|range| AddressRange {
580 start: range.start,
581 end: range.end,
582 })
583 .collect());
584 }
585 offset = header.next as usize;
586 }
587
588 Err(VfioError::IommuGetCapInfo)
589 }
590
set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()>591 fn set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()> {
592 match iommu_dev {
593 IommuDevType::CoIommu | IommuDevType::VirtioIommu => {
594 // If we expect granular, dynamic mappings, try the ChromeOS Type1ChromeOS first,
595 // then fall back to upstream versions.
596 self.set_iommu_checked(IommuType::Type1ChromeOS)
597 .or_else(|_| self.set_iommu_checked(IommuType::Type1V2))
598 }
599 IommuDevType::NoIommu => self.set_iommu_checked(IommuType::Type1V2),
600 IommuDevType::PkvmPviommu => self.set_iommu_checked(IommuType::PkvmPviommu),
601 }
602 }
603
get_group_with_vm( &mut self, id: u32, vm: &impl Vm, iommu_dev: IommuDevType, ) -> Result<Arc<Mutex<VfioGroup>>>604 fn get_group_with_vm(
605 &mut self,
606 id: u32,
607 vm: &impl Vm,
608 iommu_dev: IommuDevType,
609 ) -> Result<Arc<Mutex<VfioGroup>>> {
610 if let Some(group) = self.groups.get(&id) {
611 return Ok(group.clone());
612 }
613
614 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
615 if self.groups.is_empty() {
616 self.set_iommu_from(iommu_dev)?;
617 // Before the first group is added into container, do once per container
618 // initialization. Both coiommu and virtio-iommu rely on small, dynamic
619 // mappings. However, if an iommu is not enabled, then we map the entirety
620 // of guest memory as a small number of large, static mappings.
621 match iommu_dev {
622 IommuDevType::CoIommu | IommuDevType::PkvmPviommu | IommuDevType::VirtioIommu => {}
623 IommuDevType::NoIommu => {
624 for region in vm.get_memory().regions() {
625 // SAFETY:
626 // Safe because the guest regions are guaranteed not to overlap
627 unsafe {
628 self.vfio_dma_map(
629 region.guest_addr.0,
630 region.size as u64,
631 region.host_addr as u64,
632 true,
633 )
634 }?;
635 }
636 }
637 }
638 }
639
640 let kvm_vfio_file = KVM_VFIO_FILE
641 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
642 .map_err(VfioError::CreateVfioKvmDevice)?;
643 group
644 .lock()
645 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
646
647 self.groups.insert(id, group.clone());
648
649 Ok(group)
650 }
651
get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>>652 fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
653 if let Some(group) = self.groups.get(&id) {
654 return Ok(group.clone());
655 }
656
657 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
658
659 if self.groups.is_empty() {
660 // Before the first group is added into container, do once per
661 // container initialization.
662 self.set_iommu_checked(IommuType::Type1V2)?;
663 }
664
665 self.groups.insert(id, group.clone());
666 Ok(group)
667 }
668
remove_group(&mut self, id: u32, reduce: bool)669 fn remove_group(&mut self, id: u32, reduce: bool) {
670 let mut remove = false;
671
672 if let Some(group) = self.groups.get(&id) {
673 if reduce {
674 group.lock().reduce_device_num();
675 }
676 if group.lock().device_num() == 0 {
677 let kvm_vfio_file = KVM_VFIO_FILE.get().expect("kvm vfio file isn't created");
678 if group
679 .lock()
680 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
681 .is_err()
682 {
683 warn!("failing in remove vfio group from kvm device");
684 }
685 remove = true;
686 }
687 }
688
689 if remove {
690 self.groups.remove(&id);
691 }
692 }
693
clone_as_raw_descriptor(&self) -> Result<RawDescriptor>694 pub fn clone_as_raw_descriptor(&self) -> Result<RawDescriptor> {
695 // SAFETY: this call is safe because it doesn't modify any memory and we
696 // check the return value.
697 let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
698 if raw_descriptor < 0 {
699 Err(VfioError::ContainerDupError)
700 } else {
701 Ok(raw_descriptor)
702 }
703 }
704
705 // Gets group ids for all groups in the container.
group_ids(&self) -> Vec<&u32>706 pub fn group_ids(&self) -> Vec<&u32> {
707 self.groups.keys().collect()
708 }
709 }
710
711 impl AsRawDescriptor for VfioContainer {
as_raw_descriptor(&self) -> RawDescriptor712 fn as_raw_descriptor(&self) -> RawDescriptor {
713 self.container.as_raw_descriptor()
714 }
715 }
716
717 struct VfioGroup {
718 group: File,
719 device_num: u32,
720 }
721
722 impl VfioGroup {
new(container: &VfioContainer, id: u32) -> Result<Self>723 fn new(container: &VfioContainer, id: u32) -> Result<Self> {
724 let group_path = format!("/dev/vfio/{}", id);
725 let group_file = OpenOptions::new()
726 .read(true)
727 .write(true)
728 .open(Path::new(&group_path))
729 .map_err(|e| VfioError::OpenGroup(e, group_path))?;
730
731 let mut group_status = vfio_group_status {
732 argsz: mem::size_of::<vfio_group_status>() as u32,
733 flags: 0,
734 };
735 let mut ret =
736 // SAFETY:
737 // Safe as we are the owner of group_file and group_status which are valid value.
738 unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS, &mut group_status) };
739 if ret < 0 {
740 return Err(VfioError::GetGroupStatus(get_error()));
741 }
742
743 if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
744 return Err(VfioError::GroupViable);
745 }
746
747 let container_raw_descriptor = container.as_raw_descriptor();
748 // SAFETY:
749 // Safe as we are the owner of group_file and container_raw_descriptor which are valid
750 // value, and we verify the ret value
751 ret = unsafe {
752 ioctl_with_ref(
753 &group_file,
754 VFIO_GROUP_SET_CONTAINER,
755 &container_raw_descriptor,
756 )
757 };
758 if ret < 0 {
759 return Err(VfioError::GroupSetContainer(get_error()));
760 }
761
762 Ok(VfioGroup {
763 group: group_file,
764 device_num: 0,
765 })
766 }
767
get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32>768 fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
769 let mut uuid_path = PathBuf::new();
770 uuid_path.push(sysfspath);
771 uuid_path.push("iommu_group");
772 let group_path = uuid_path
773 .read_link()
774 .map_err(|e| VfioError::ReadLink(e, uuid_path))?;
775 let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
776 let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
777 let group_id = group_str
778 .parse::<u32>()
779 .map_err(|_| VfioError::InvalidPath)?;
780
781 Ok(group_id)
782 }
783
kvm_device_set_group( &self, kvm_vfio_file: &SafeDescriptor, ops: KvmVfioGroupOps, ) -> Result<()>784 fn kvm_device_set_group(
785 &self,
786 kvm_vfio_file: &SafeDescriptor,
787 ops: KvmVfioGroupOps,
788 ) -> Result<()> {
789 let group_descriptor = self.as_raw_descriptor();
790 let group_descriptor_ptr = &group_descriptor as *const i32;
791 let vfio_dev_attr = match ops {
792 KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
793 flags: 0,
794 group: kvm_sys::KVM_DEV_VFIO_GROUP,
795 attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
796 addr: group_descriptor_ptr as u64,
797 },
798 KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
799 flags: 0,
800 group: kvm_sys::KVM_DEV_VFIO_GROUP,
801 attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
802 addr: group_descriptor_ptr as u64,
803 },
804 };
805
806 // SAFETY:
807 // Safe as we are the owner of vfio_dev_descriptor and vfio_dev_attr which are valid value,
808 // and we verify the return value.
809 if 0 != unsafe {
810 ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr)
811 } {
812 return Err(VfioError::KvmSetDeviceAttr(get_error()));
813 }
814
815 Ok(())
816 }
817
get_device(&self, name: &str) -> Result<File>818 fn get_device(&self, name: &str) -> Result<File> {
819 let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
820 let path_ptr = path.as_ptr();
821
822 // SAFETY:
823 // Safe as we are the owner of self and path_ptr which are valid value.
824 let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD, path_ptr) };
825 if ret < 0 {
826 return Err(VfioError::GroupGetDeviceFD(get_error()));
827 }
828
829 // SAFETY:
830 // Safe as ret is valid descriptor
831 Ok(unsafe { File::from_raw_descriptor(ret) })
832 }
833
add_device_num(&mut self)834 fn add_device_num(&mut self) {
835 self.device_num += 1;
836 }
837
reduce_device_num(&mut self)838 fn reduce_device_num(&mut self) {
839 self.device_num -= 1;
840 }
841
device_num(&self) -> u32842 fn device_num(&self) -> u32 {
843 self.device_num
844 }
845 }
846
847 impl AsRawDescriptor for VfioGroup {
as_raw_descriptor(&self) -> RawDescriptor848 fn as_raw_descriptor(&self) -> RawDescriptor {
849 self.group.as_raw_descriptor()
850 }
851 }
852
853 /// A helper struct for managing VFIO containers
854 #[derive(Default)]
855 pub struct VfioContainerManager {
856 /// One VFIO container shared by all VFIO devices that don't attach to any IOMMU device.
857 no_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
858
859 /// For IOMMU enabled devices, all VFIO groups that share the same IOVA space are managed by
860 /// one VFIO container.
861 iommu_containers: Vec<Arc<Mutex<VfioContainer>>>,
862
863 /// One VFIO container shared by all VFIO devices that attach to the CoIOMMU device.
864 coiommu_container: Option<Arc<Mutex<VfioContainer>>>,
865
866 /// One VFIO container shared by all VFIO devices that attach to pKVM.
867 pkvm_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
868 }
869
870 impl VfioContainerManager {
new() -> Self871 pub fn new() -> Self {
872 Self::default()
873 }
874
875 /// The single place to create a VFIO container for a PCI endpoint.
876 ///
877 /// The policy to determine whether an individual or a shared VFIO container
878 /// will be created for this device is governed by the physical PCI topology,
879 /// and the argument iommu_type.
880 ///
881 /// # Arguments
882 ///
883 /// * `sysfspath` - the path to the PCI device, e.g. /sys/bus/pci/devices/0000:02:00.0
884 /// * `iommu_type` - which type of IOMMU is enabled on this device
get_container<P: AsRef<Path>>( &mut self, iommu_type: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>885 pub fn get_container<P: AsRef<Path>>(
886 &mut self,
887 iommu_type: IommuDevType,
888 sysfspath: Option<P>,
889 ) -> Result<Arc<Mutex<VfioContainer>>> {
890 match iommu_type {
891 IommuDevType::NoIommu => {
892 // One VFIO container is used for all IOMMU disabled groups.
893 if let Some(container) = &self.no_iommu_container {
894 Ok(container.clone())
895 } else {
896 let container = Arc::new(Mutex::new(VfioContainer::new()?));
897 self.no_iommu_container = Some(container.clone());
898 Ok(container)
899 }
900 }
901 IommuDevType::VirtioIommu => {
902 let path = sysfspath.ok_or(VfioError::InvalidPath)?;
903 let group_id = VfioGroup::get_group_id(path)?;
904
905 // One VFIO container is used for all devices that belong to one VFIO group.
906 // NOTE: vfio_wrapper relies on each container containing exactly one group.
907 if let Some(container) = self
908 .iommu_containers
909 .iter()
910 .find(|container| container.lock().is_group_set(group_id))
911 {
912 Ok(container.clone())
913 } else {
914 let container = Arc::new(Mutex::new(VfioContainer::new()?));
915 self.iommu_containers.push(container.clone());
916 Ok(container)
917 }
918 }
919 IommuDevType::CoIommu => {
920 // One VFIO container is used for devices attached to CoIommu
921 if let Some(container) = &self.coiommu_container {
922 Ok(container.clone())
923 } else {
924 let container = Arc::new(Mutex::new(VfioContainer::new()?));
925 self.coiommu_container = Some(container.clone());
926 Ok(container)
927 }
928 }
929 IommuDevType::PkvmPviommu => {
930 // One VFIO container is used for devices attached to pKVM
931 if let Some(container) = &self.pkvm_iommu_container {
932 Ok(container.clone())
933 } else {
934 let container = Arc::new(Mutex::new(VfioContainer::new()?));
935 self.pkvm_iommu_container = Some(container.clone());
936 Ok(container)
937 }
938 }
939 }
940 }
941 }
942
943 /// Vfio Irq type used to enable/disable/mask/unmask vfio irq
944 pub enum VfioIrqType {
945 Intx,
946 Msi,
947 Msix,
948 }
949
950 /// Vfio Irq information used to assign and enable/disable/mask/unmask vfio irq
951 pub struct VfioIrq {
952 pub flags: u32,
953 pub index: u32,
954 }
955
956 /// Address on VFIO memory region.
957 #[derive(Debug, Default, Clone)]
958 pub struct VfioRegionAddr {
959 /// region number.
960 pub index: usize,
961 /// offset in the region.
962 pub addr: u64,
963 }
964
965 #[derive(Debug)]
966 pub struct VfioRegion {
967 // flags for this region: read/write/mmap
968 flags: u32,
969 size: u64,
970 // region offset used to read/write with vfio device descriptor
971 offset: u64,
972 // vectors for mmap offset and size
973 mmaps: Vec<vfio_region_sparse_mmap_area>,
974 // type and subtype for cap type
975 cap_info: Option<(u32, u32)>,
976 }
977
978 /// Vfio device for exposing regions which could be read/write to kernel vfio device.
979 pub struct VfioDevice {
980 dev: File,
981 name: String,
982 container: Arc<Mutex<VfioContainer>>,
983 dev_type: VfioDeviceType,
984 group_descriptor: RawDescriptor,
985 group_id: u32,
986 // vec for vfio device's regions
987 regions: Vec<VfioRegion>,
988 num_irqs: u32,
989
990 iova_alloc: Arc<Mutex<AddressAllocator>>,
991 dt_symbol: Option<String>,
992 pviommu: Option<(Arc<Mutex<KvmVfioPviommu>>, Vec<u32>)>,
993 }
994
995 impl VfioDevice {
996 /// Create a new vfio device, then guest read/write on this device could be
997 /// transfered into kernel vfio.
998 /// sysfspath specify the vfio device path in sys file system.
new_passthrough<P: AsRef<Path>>( sysfspath: &P, vm: &impl Vm, container: Arc<Mutex<VfioContainer>>, iommu_dev: IommuDevType, dt_symbol: Option<String>, ) -> Result<Self>999 pub fn new_passthrough<P: AsRef<Path>>(
1000 sysfspath: &P,
1001 vm: &impl Vm,
1002 container: Arc<Mutex<VfioContainer>>,
1003 iommu_dev: IommuDevType,
1004 dt_symbol: Option<String>,
1005 ) -> Result<Self> {
1006 let group_id = VfioGroup::get_group_id(sysfspath)?;
1007
1008 let group = container
1009 .lock()
1010 .get_group_with_vm(group_id, vm, iommu_dev)?;
1011 let name_osstr = sysfspath
1012 .as_ref()
1013 .file_name()
1014 .ok_or(VfioError::InvalidPath)?;
1015 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1016 let name = String::from(name_str);
1017 let dev = group.lock().get_device(&name)?;
1018 let (dev_info, dev_type) = Self::get_device_info(&dev)?;
1019 let regions = Self::get_regions(&dev, dev_info.num_regions)?;
1020 group.lock().add_device_num();
1021 let group_descriptor = group.lock().as_raw_descriptor();
1022
1023 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1024 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1025 .map_err(VfioError::Resources)?;
1026
1027 let pviommu = if matches!(iommu_dev, IommuDevType::PkvmPviommu) {
1028 // We currently have a 1-to-1 mapping between pvIOMMUs and VFIO devices.
1029 let pviommu = KvmVfioPviommu::new(vm)?;
1030
1031 let vsids_len = KvmVfioPviommu::get_sid_count(vm, &dev)?.try_into().unwrap();
1032 let max_vsid = u32::MAX.try_into().unwrap();
1033 let random_vsids = sample(&mut thread_rng(), max_vsid, vsids_len).into_iter();
1034 let vsids = Vec::from_iter(random_vsids.map(|v| u32::try_from(v).unwrap()));
1035 for (i, vsid) in vsids.iter().enumerate() {
1036 pviommu.attach(&dev, i.try_into().unwrap(), *vsid)?;
1037 }
1038
1039 Some((Arc::new(Mutex::new(pviommu)), vsids))
1040 } else {
1041 None
1042 };
1043
1044 Ok(VfioDevice {
1045 dev,
1046 name,
1047 container,
1048 dev_type,
1049 group_descriptor,
1050 group_id,
1051 regions,
1052 num_irqs: dev_info.num_irqs,
1053 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1054 dt_symbol,
1055 pviommu,
1056 })
1057 }
1058
new<P: AsRef<Path>>( sysfspath: &P, container: Arc<Mutex<VfioContainer>>, ) -> Result<Self>1059 pub fn new<P: AsRef<Path>>(
1060 sysfspath: &P,
1061 container: Arc<Mutex<VfioContainer>>,
1062 ) -> Result<Self> {
1063 let group_id = VfioGroup::get_group_id(sysfspath)?;
1064 let group = container.lock().get_group(group_id)?;
1065 let name_osstr = sysfspath
1066 .as_ref()
1067 .file_name()
1068 .ok_or(VfioError::InvalidPath)?;
1069 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1070 let name = String::from(name_str);
1071
1072 let dev = match group.lock().get_device(&name) {
1073 Ok(dev) => dev,
1074 Err(e) => {
1075 container.lock().remove_group(group_id, false);
1076 return Err(e);
1077 }
1078 };
1079 let (dev_info, dev_type) = match Self::get_device_info(&dev) {
1080 Ok(dev_info) => dev_info,
1081 Err(e) => {
1082 container.lock().remove_group(group_id, false);
1083 return Err(e);
1084 }
1085 };
1086 let regions = match Self::get_regions(&dev, dev_info.num_regions) {
1087 Ok(regions) => regions,
1088 Err(e) => {
1089 container.lock().remove_group(group_id, false);
1090 return Err(e);
1091 }
1092 };
1093 group.lock().add_device_num();
1094 let group_descriptor = group.lock().as_raw_descriptor();
1095
1096 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1097 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1098 .map_err(VfioError::Resources)?;
1099
1100 Ok(VfioDevice {
1101 dev,
1102 name,
1103 container,
1104 dev_type,
1105 group_descriptor,
1106 group_id,
1107 regions,
1108 num_irqs: dev_info.num_irqs,
1109 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1110 dt_symbol: None,
1111 pviommu: None,
1112 })
1113 }
1114
1115 /// Returns the file for this device.
dev_file(&self) -> &File1116 pub fn dev_file(&self) -> &File {
1117 &self.dev
1118 }
1119
1120 /// Returns PCI device name, formatted as BUS:DEVICE.FUNCTION string.
device_name(&self) -> &String1121 pub fn device_name(&self) -> &String {
1122 &self.name
1123 }
1124
1125 /// Returns the type of this VFIO device.
device_type(&self) -> VfioDeviceType1126 pub fn device_type(&self) -> VfioDeviceType {
1127 self.dev_type
1128 }
1129
1130 /// Returns the DT symbol (node label) of this VFIO device.
dt_symbol(&self) -> Option<&str>1131 pub fn dt_symbol(&self) -> Option<&str> {
1132 self.dt_symbol.as_deref()
1133 }
1134
1135 /// Returns the type and indentifier (if applicable) of the IOMMU used by this VFIO device and
1136 /// its master IDs.
iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])>1137 pub fn iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])> {
1138 // We currently only report IommuDevType::PkvmPviommu.
1139 if let Some((ref pviommu, ref ids)) = self.pviommu {
1140 Some((
1141 IommuDevType::PkvmPviommu,
1142 Some(pviommu.lock().id()),
1143 ids.as_ref(),
1144 ))
1145 } else {
1146 None
1147 }
1148 }
1149
1150 /// enter the device's low power state
pm_low_power_enter(&self) -> Result<()>1151 pub fn pm_low_power_enter(&self) -> Result<()> {
1152 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1153 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1154 device_feature[0].flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY;
1155 // SAFETY:
1156 // Safe as we are the owner of self and power_management which are valid value
1157 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1158 if ret < 0 {
1159 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1160 } else {
1161 Ok(())
1162 }
1163 }
1164
1165 /// enter the device's low power state with wakeup notification
pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()>1166 pub fn pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()> {
1167 let payload = vfio_device_low_power_entry_with_wakeup {
1168 wakeup_eventfd: wakeup_evt.as_raw_descriptor(),
1169 reserved: 0,
1170 };
1171 let payload_size = mem::size_of::<vfio_device_low_power_entry_with_wakeup>();
1172 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(payload_size);
1173 device_feature[0].argsz = (mem::size_of::<vfio_device_feature>() + payload_size) as u32;
1174 device_feature[0].flags =
1175 VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP;
1176 // SAFETY:
1177 // Safe as we know vfio_device_low_power_entry_with_wakeup has two 32-bit int fields
1178 unsafe {
1179 device_feature[0]
1180 .data
1181 .as_mut_slice(payload_size)
1182 .copy_from_slice(
1183 mem::transmute::<vfio_device_low_power_entry_with_wakeup, [u8; 8]>(payload)
1184 .as_slice(),
1185 );
1186 }
1187 // SAFETY:
1188 // Safe as we are the owner of self and power_management which are valid value
1189 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1190 if ret < 0 {
1191 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1192 } else {
1193 Ok(())
1194 }
1195 }
1196
1197 /// exit the device's low power state
pm_low_power_exit(&self) -> Result<()>1198 pub fn pm_low_power_exit(&self) -> Result<()> {
1199 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1200 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1201 device_feature[0].flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT;
1202 // SAFETY:
1203 // Safe as we are the owner of self and power_management which are valid value
1204 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1205 if ret < 0 {
1206 Err(VfioError::VfioPmLowPowerExit(get_error()))
1207 } else {
1208 Ok(())
1209 }
1210 }
1211
1212 /// call _DSM from the device's ACPI table
acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>>1213 pub fn acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>> {
1214 let count = args.len();
1215 let mut dsm = vec_with_array_field::<vfio_acpi_dsm, u8>(count);
1216 dsm[0].argsz = (mem::size_of::<vfio_acpi_dsm>() + mem::size_of_val(args)) as u32;
1217 dsm[0].padding = 0;
1218 // SAFETY:
1219 // Safe as we allocated enough space to hold args
1220 unsafe {
1221 dsm[0].args.as_mut_slice(count).clone_from_slice(args);
1222 }
1223 // SAFETY:
1224 // Safe as we are the owner of self and dsm which are valid value
1225 let ret = unsafe { ioctl_with_mut_ref(&self.dev, VFIO_DEVICE_ACPI_DSM, &mut dsm[0]) };
1226 if ret < 0 {
1227 Err(VfioError::VfioAcpiDsm(get_error()))
1228 } else {
1229 // SAFETY:
1230 // Safe as we allocated enough space to hold args
1231 let res = unsafe { dsm[0].args.as_slice(count) };
1232 Ok(res.to_vec())
1233 }
1234 }
1235
1236 /// Enable vfio device's ACPI notifications and associate EventFD with device.
acpi_notification_evt_enable( &self, acpi_notification_eventfd: &Event, index: u32, ) -> Result<()>1237 pub fn acpi_notification_evt_enable(
1238 &self,
1239 acpi_notification_eventfd: &Event,
1240 index: u32,
1241 ) -> Result<()> {
1242 let u32_size = mem::size_of::<u32>();
1243 let count = 1;
1244
1245 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1246 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1247 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1248 irq_set[0].index = index;
1249 irq_set[0].start = 0;
1250 irq_set[0].count = count as u32;
1251
1252 // SAFETY:
1253 // It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1254 let data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1255 data.copy_from_slice(&acpi_notification_eventfd.as_raw_descriptor().to_ne_bytes()[..]);
1256
1257 // SAFETY:
1258 // Safe as we are the owner of self and irq_set which are valid value
1259 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1260 if ret < 0 {
1261 Err(VfioError::VfioAcpiNotificationEnable(get_error()))
1262 } else {
1263 Ok(())
1264 }
1265 }
1266
1267 /// Disable vfio device's ACPI notification and disconnect EventFd with device.
acpi_notification_disable(&self, index: u32) -> Result<()>1268 pub fn acpi_notification_disable(&self, index: u32) -> Result<()> {
1269 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1270 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1271 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1272 irq_set[0].index = index;
1273 irq_set[0].start = 0;
1274 irq_set[0].count = 0;
1275
1276 // SAFETY:
1277 // Safe as we are the owner of self and irq_set which are valid value
1278 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1279 if ret < 0 {
1280 Err(VfioError::VfioAcpiNotificationDisable(get_error()))
1281 } else {
1282 Ok(())
1283 }
1284 }
1285
1286 /// Test vfio device's ACPI notification by simulating hardware triggering.
1287 /// When the signaling mechanism is set, the VFIO_IRQ_SET_DATA_BOOL can be used with
1288 /// VFIO_IRQ_SET_ACTION_TRIGGER to perform kernel level interrupt loopback testing.
acpi_notification_test(&self, index: u32, val: u32) -> Result<()>1289 pub fn acpi_notification_test(&self, index: u32, val: u32) -> Result<()> {
1290 let u32_size = mem::size_of::<u32>();
1291 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1292 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + u32_size) as u32;
1293 irq_set[0].flags = VFIO_IRQ_SET_DATA_BOOL | VFIO_IRQ_SET_ACTION_TRIGGER;
1294 irq_set[0].index = index;
1295 irq_set[0].start = 0;
1296 irq_set[0].count = 1;
1297
1298 // SAFETY:
1299 // It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1300 let data = unsafe { irq_set[0].data.as_mut_slice(u32_size) };
1301 data.copy_from_slice(&val.to_ne_bytes()[..]);
1302
1303 // SAFETY:
1304 // Safe as we are the owner of self and irq_set which are valid value
1305 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1306 if ret < 0 {
1307 Err(VfioError::VfioAcpiNotificationTest(get_error()))
1308 } else {
1309 Ok(())
1310 }
1311 }
1312
1313 /// Enable vfio device's irq and associate Irqfd Event with device.
1314 /// When MSIx is enabled, multi vectors will be supported, and vectors starting from subindex to
1315 /// subindex + descriptors length will be assigned with irqfd in the descriptors array.
1316 /// when index = VFIO_PCI_REQ_IRQ_INDEX, kernel vfio will trigger this event when physical
1317 /// device is removed.
1318 /// If descriptor is None, -1 is assigned to the irq. A value of -1 is used to either de-assign
1319 /// interrupts if already assigned or skip un-assigned interrupts.
irq_enable( &self, descriptors: &[Option<&Event>], index: u32, subindex: u32, ) -> Result<()>1320 pub fn irq_enable(
1321 &self,
1322 descriptors: &[Option<&Event>],
1323 index: u32,
1324 subindex: u32,
1325 ) -> Result<()> {
1326 let count = descriptors.len();
1327 let u32_size = mem::size_of::<u32>();
1328 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1329 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1330 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1331 irq_set[0].index = index;
1332 irq_set[0].start = subindex;
1333 irq_set[0].count = count as u32;
1334
1335 // SAFETY:
1336 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data
1337 // is u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1338 // together as u32. It is safe as enough space is reserved through
1339 // vec_with_array_field(u32)<count>.
1340 let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1341 for descriptor in descriptors.iter().take(count) {
1342 let (left, right) = data.split_at_mut(u32_size);
1343 match descriptor {
1344 Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
1345 None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
1346 }
1347 data = right;
1348 }
1349
1350 // SAFETY:
1351 // Safe as we are the owner of self and irq_set which are valid value
1352 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1353 if ret < 0 {
1354 Err(VfioError::VfioIrqEnable(get_error()))
1355 } else {
1356 Ok(())
1357 }
1358 }
1359
1360 /// When intx is enabled, irqfd is used to trigger a level interrupt into guest, resample irqfd
1361 /// is used to get guest EOI notification.
1362 /// When host hw generates interrupt, vfio irq handler in host kernel receive and handle it,
1363 /// this handler disable hw irq first, then trigger irqfd to inject interrupt into guest. When
1364 /// resample irqfd is triggered by guest EOI, vfio kernel could enable hw irq, so hw could
1365 /// generate another interrupts.
1366 /// This function enable resample irqfd and let vfio kernel could get EOI notification.
1367 ///
1368 /// descriptor: should be resample IrqFd.
resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()>1369 pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
1370 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1371 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
1372 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
1373 irq_set[0].index = index;
1374 irq_set[0].start = 0;
1375 irq_set[0].count = 1;
1376
1377 {
1378 // SAFETY:
1379 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data is
1380 // u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1381 // together as u32. It is safe as enough space is reserved through
1382 // vec_with_array_field(u32)<1>.
1383 let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
1384 descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
1385 }
1386
1387 // SAFETY:
1388 // Safe as we are the owner of self and irq_set which are valid value
1389 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1390 if ret < 0 {
1391 Err(VfioError::VfioIrqEnable(get_error()))
1392 } else {
1393 Ok(())
1394 }
1395 }
1396
1397 /// disable vfio device's irq and disconnect Irqfd Event with device
irq_disable(&self, index: u32) -> Result<()>1398 pub fn irq_disable(&self, index: u32) -> Result<()> {
1399 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1400 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1401 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1402 irq_set[0].index = index;
1403 irq_set[0].start = 0;
1404 irq_set[0].count = 0;
1405
1406 // SAFETY:
1407 // Safe as we are the owner of self and irq_set which are valid value
1408 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1409 if ret < 0 {
1410 Err(VfioError::VfioIrqDisable(get_error()))
1411 } else {
1412 Ok(())
1413 }
1414 }
1415
1416 /// Unmask vfio device irq
irq_unmask(&self, index: u32) -> Result<()>1417 pub fn irq_unmask(&self, index: u32) -> Result<()> {
1418 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1419 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1420 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
1421 irq_set[0].index = index;
1422 irq_set[0].start = 0;
1423 irq_set[0].count = 1;
1424
1425 // SAFETY:
1426 // Safe as we are the owner of self and irq_set which are valid value
1427 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1428 if ret < 0 {
1429 Err(VfioError::VfioIrqUnmask(get_error()))
1430 } else {
1431 Ok(())
1432 }
1433 }
1434
1435 /// Mask vfio device irq
irq_mask(&self, index: u32) -> Result<()>1436 pub fn irq_mask(&self, index: u32) -> Result<()> {
1437 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1438 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1439 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
1440 irq_set[0].index = index;
1441 irq_set[0].start = 0;
1442 irq_set[0].count = 1;
1443
1444 // SAFETY:
1445 // Safe as we are the owner of self and irq_set which are valid value
1446 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1447 if ret < 0 {
1448 Err(VfioError::VfioIrqMask(get_error()))
1449 } else {
1450 Ok(())
1451 }
1452 }
1453
1454 /// Get and validate VFIO device information.
get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)>1455 fn get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)> {
1456 let mut dev_info = vfio_device_info {
1457 argsz: mem::size_of::<vfio_device_info>() as u32,
1458 flags: 0,
1459 num_regions: 0,
1460 num_irqs: 0,
1461 ..Default::default()
1462 };
1463
1464 // SAFETY:
1465 // Safe as we are the owner of device_file and dev_info which are valid value,
1466 // and we verify the return value.
1467 let ret = unsafe { ioctl_with_mut_ref(device_file, VFIO_DEVICE_GET_INFO, &mut dev_info) };
1468 if ret < 0 {
1469 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1470 }
1471
1472 let dev_type = if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
1473 if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
1474 || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
1475 {
1476 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1477 }
1478
1479 VfioDeviceType::Pci
1480 } else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
1481 VfioDeviceType::Platform
1482 } else {
1483 return Err(VfioError::UnknownDeviceType(dev_info.flags));
1484 };
1485
1486 Ok((dev_info, dev_type))
1487 }
1488
1489 /// Query interrupt information
1490 /// return: Vector of interrupts information, each of which contains flags and index
get_irqs(&self) -> Result<Vec<VfioIrq>>1491 pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
1492 let mut irqs: Vec<VfioIrq> = Vec::new();
1493
1494 for i in 0..self.num_irqs {
1495 let argsz = mem::size_of::<vfio_irq_info>() as u32;
1496 let mut irq_info = vfio_irq_info {
1497 argsz,
1498 flags: 0,
1499 index: i,
1500 count: 0,
1501 };
1502 // SAFETY:
1503 // Safe as we are the owner of dev and irq_info which are valid value,
1504 // and we verify the return value.
1505 let ret = unsafe {
1506 ioctl_with_mut_ref(self.device_file(), VFIO_DEVICE_GET_IRQ_INFO, &mut irq_info)
1507 };
1508 if ret < 0 || irq_info.count != 1 {
1509 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1510 }
1511
1512 let irq = VfioIrq {
1513 flags: irq_info.flags,
1514 index: irq_info.index,
1515 };
1516 irqs.push(irq);
1517 }
1518 Ok(irqs)
1519 }
1520
1521 #[allow(clippy::cast_ptr_alignment)]
get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>>1522 fn get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>> {
1523 let mut regions: Vec<VfioRegion> = Vec::new();
1524 for i in 0..num_regions {
1525 let argsz = mem::size_of::<vfio_region_info>() as u32;
1526 let mut reg_info = vfio_region_info {
1527 argsz,
1528 flags: 0,
1529 index: i,
1530 cap_offset: 0,
1531 size: 0,
1532 offset: 0,
1533 };
1534 let ret =
1535 // SAFETY:
1536 // Safe as we are the owner of dev and reg_info which are valid value,
1537 // and we verify the return value.
1538 unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO, &mut reg_info) };
1539 if ret < 0 {
1540 continue;
1541 }
1542
1543 let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1544 let mut cap_info: Option<(u32, u32)> = None;
1545 if reg_info.argsz > argsz {
1546 let cap_len: usize = (reg_info.argsz - argsz) as usize;
1547 let mut region_with_cap =
1548 vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1549 region_with_cap[0].region_info.argsz = reg_info.argsz;
1550 region_with_cap[0].region_info.flags = 0;
1551 region_with_cap[0].region_info.index = i;
1552 region_with_cap[0].region_info.cap_offset = 0;
1553 region_with_cap[0].region_info.size = 0;
1554 region_with_cap[0].region_info.offset = 0;
1555 // SAFETY:
1556 // Safe as we are the owner of dev and region_info which are valid value,
1557 // and we verify the return value.
1558 let ret = unsafe {
1559 ioctl_with_mut_ref(
1560 dev,
1561 VFIO_DEVICE_GET_REGION_INFO,
1562 &mut (region_with_cap[0].region_info),
1563 )
1564 };
1565 if ret < 0 {
1566 return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1567 }
1568
1569 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1570 continue;
1571 }
1572
1573 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1574 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1575 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1576 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1577 let region_info_sz = reg_info.argsz;
1578
1579 // region_with_cap[0].cap_info may contain many structures, like
1580 // vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct.
1581 // Both of them begin with vfio_info_cap_header, so we will get individual cap from
1582 // vfio_into_cap_header.
1583 // Go through all the cap structs.
1584 let info_ptr = region_with_cap.as_ptr() as *mut u8;
1585 let mut offset = region_with_cap[0].region_info.cap_offset;
1586 while offset != 0 {
1587 if offset + cap_header_sz > region_info_sz {
1588 break;
1589 }
1590 // SAFETY:
1591 // Safe, as cap_header struct is in this function allocated region_with_cap
1592 // vec.
1593 let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1594 // SAFETY:
1595 // Safe, as cap_header struct is in this function allocated region_with_cap
1596 // vec.
1597 let cap_header = unsafe { &*(cap_ptr as *const vfio_info_cap_header) };
1598 if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1599 if offset + mmap_cap_sz > region_info_sz {
1600 break;
1601 }
1602 // cap_ptr is vfio_region_info_cap_sparse_mmap here
1603 let sparse_mmap =
1604 // SAFETY:
1605 // Safe, this vfio_region_info_cap_sparse_mmap is in this function
1606 // allocated region_with_cap vec.
1607 unsafe { &*(cap_ptr as *const vfio_region_info_cap_sparse_mmap) };
1608
1609 let area_num = sparse_mmap.nr_areas;
1610 if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1611 break;
1612 }
1613 let areas =
1614 // SAFETY:
1615 // Safe, these vfio_region_sparse_mmap_area are in this function allocated
1616 // region_with_cap vec.
1617 unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1618 for area in areas.iter() {
1619 mmaps.push(*area);
1620 }
1621 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1622 if offset + type_cap_sz > region_info_sz {
1623 break;
1624 }
1625 // cap_ptr is vfio_region_info_cap_type here
1626 let cap_type_info =
1627 // SAFETY:
1628 // Safe, this vfio_region_info_cap_type is in this function allocated
1629 // region_with_cap vec
1630 unsafe { &*(cap_ptr as *const vfio_region_info_cap_type) };
1631
1632 cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1633 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1634 mmaps.push(vfio_region_sparse_mmap_area {
1635 offset: 0,
1636 size: region_with_cap[0].region_info.size,
1637 });
1638 }
1639
1640 offset = cap_header.next;
1641 }
1642 } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1643 mmaps.push(vfio_region_sparse_mmap_area {
1644 offset: 0,
1645 size: reg_info.size,
1646 });
1647 }
1648
1649 let region = VfioRegion {
1650 flags: reg_info.flags,
1651 size: reg_info.size,
1652 offset: reg_info.offset,
1653 mmaps,
1654 cap_info,
1655 };
1656 regions.push(region);
1657 }
1658
1659 Ok(regions)
1660 }
1661
1662 /// get a region's flag
1663 /// the return's value may conatin:
1664 /// VFIO_REGION_INFO_FLAG_READ: region supports read
1665 /// VFIO_REGION_INFO_FLAG_WRITE: region supports write
1666 /// VFIO_REGION_INFO_FLAG_MMAP: region supports mmap
1667 /// VFIO_REGION_INFO_FLAG_CAPS: region's info supports caps
get_region_flags(&self, index: usize) -> u321668 pub fn get_region_flags(&self, index: usize) -> u32 {
1669 match self.regions.get(index) {
1670 Some(v) => v.flags,
1671 None => {
1672 warn!("get_region_flags() with invalid index: {}", index);
1673 0
1674 }
1675 }
1676 }
1677
1678 /// get a region's offset
1679 /// return: Region offset from the start of vfio device descriptor
get_region_offset(&self, index: usize) -> u641680 pub fn get_region_offset(&self, index: usize) -> u64 {
1681 match self.regions.get(index) {
1682 Some(v) => v.offset,
1683 None => {
1684 warn!("get_region_offset with invalid index: {}", index);
1685 0
1686 }
1687 }
1688 }
1689
1690 /// get a region's size
1691 /// return: Region size from the start of vfio device descriptor
get_region_size(&self, index: usize) -> u641692 pub fn get_region_size(&self, index: usize) -> u64 {
1693 match self.regions.get(index) {
1694 Some(v) => v.size,
1695 None => {
1696 warn!("get_region_size with invalid index: {}", index);
1697 0
1698 }
1699 }
1700 }
1701
1702 /// get a number of regions
1703 /// return: Number of regions of vfio device descriptor
get_region_count(&self) -> usize1704 pub fn get_region_count(&self) -> usize {
1705 self.regions.len()
1706 }
1707
1708 /// get a region's mmap info vector
get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area>1709 pub fn get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area> {
1710 match self.regions.get(index) {
1711 Some(v) => v.mmaps.clone(),
1712 None => {
1713 warn!("get_region_mmap with invalid index: {}", index);
1714 Vec::new()
1715 }
1716 }
1717 }
1718
1719 /// find the specified cap type in device regions
1720 /// Input:
1721 /// type_: cap type
1722 /// sub_type: cap sub_type
1723 /// Output:
1724 /// None: device doesn't have the specified cap type
1725 /// Some((bar_index, region_size)): device has the specified cap type, return region's
1726 /// index and size
get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)>1727 pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1728 for (index, region) in self.regions.iter().enumerate() {
1729 if let Some(cap_info) = ®ion.cap_info {
1730 if cap_info.0 == type_ && cap_info.1 == sub_type {
1731 return Some((index as u32, region.size));
1732 }
1733 }
1734 }
1735
1736 None
1737 }
1738
1739 /// Returns file offset corresponding to the given `VfioRegionAddr`.
1740 /// The offset can be used when reading/writing the VFIO device's FD directly.
get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64>1741 pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1742 let region = self
1743 .regions
1744 .get(addr.index)
1745 .ok_or(VfioError::InvalidIndex(addr.index))?;
1746 Ok(region.offset + addr.addr)
1747 }
1748
1749 /// Read region's data from VFIO device into buf
1750 /// index: region num
1751 /// buf: data destination and buf length is read size
1752 /// addr: offset in the region
region_read(&self, index: usize, buf: &mut [u8], addr: u64)1753 pub fn region_read(&self, index: usize, buf: &mut [u8], addr: u64) {
1754 let stub: &VfioRegion = self
1755 .regions
1756 .get(index)
1757 .unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {}", index));
1758
1759 let size = buf.len() as u64;
1760 if size > stub.size || addr + size > stub.size {
1761 panic!(
1762 "tried to read VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1763 index, addr, size
1764 );
1765 }
1766
1767 self.dev
1768 .read_exact_at(buf, stub.offset + addr)
1769 .unwrap_or_else(|e| {
1770 panic!(
1771 "failed to read region: index={}, addr=0x{:x}, error={}",
1772 index, addr, e
1773 )
1774 });
1775 }
1776
1777 /// Reads a value from the specified `VfioRegionAddr.addr` + `offset`.
region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T1778 pub fn region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1779 let mut val = mem::MaybeUninit::zeroed();
1780 let buf =
1781 // SAFETY:
1782 // Safe because we have zero-initialized `size_of::<T>()` bytes.
1783 unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1784 self.region_read(addr.index, buf, addr.addr + offset);
1785 // SAFETY:
1786 // Safe because any bit pattern is valid for a type that implements FromBytes.
1787 unsafe { val.assume_init() }
1788 }
1789
1790 /// write the data from buf into a vfio device region
1791 /// index: region num
1792 /// buf: data src and buf length is write size
1793 /// addr: offset in the region
region_write(&self, index: usize, buf: &[u8], addr: u64)1794 pub fn region_write(&self, index: usize, buf: &[u8], addr: u64) {
1795 let stub: &VfioRegion = self
1796 .regions
1797 .get(index)
1798 .unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {}", index));
1799
1800 let size = buf.len() as u64;
1801 if size > stub.size
1802 || addr + size > stub.size
1803 || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1804 {
1805 panic!(
1806 "tried to write VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1807 index, addr, size
1808 );
1809 }
1810
1811 self.dev
1812 .write_all_at(buf, stub.offset + addr)
1813 .unwrap_or_else(|e| {
1814 panic!(
1815 "failed to write region: index={}, addr=0x{:x}, error={}",
1816 index, addr, e
1817 )
1818 });
1819 }
1820
1821 /// Writes data into the specified `VfioRegionAddr.addr` + `offset`.
region_write_to_addr(&self, data: &[u8], addr: &VfioRegionAddr, offset: u64)1822 pub fn region_write_to_addr(&self, data: &[u8], addr: &VfioRegionAddr, offset: u64) {
1823 self.region_write(addr.index, data, addr.addr + offset);
1824 }
1825
1826 /// get vfio device's descriptors which are passed into minijail process
keep_rds(&self) -> Vec<RawDescriptor>1827 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1828 vec![
1829 self.dev.as_raw_descriptor(),
1830 self.group_descriptor,
1831 self.container.lock().as_raw_descriptor(),
1832 ]
1833 }
1834
1835 /// Add (iova, user_addr) map into vfio container iommu table
1836 /// # Safety
1837 ///
1838 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>1839 pub unsafe fn vfio_dma_map(
1840 &self,
1841 iova: u64,
1842 size: u64,
1843 user_addr: u64,
1844 write_en: bool,
1845 ) -> Result<()> {
1846 self.container
1847 .lock()
1848 .vfio_dma_map(iova, size, user_addr, write_en)
1849 }
1850
1851 /// Remove (iova, user_addr) map from vfio container iommu table
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>1852 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1853 self.container.lock().vfio_dma_unmap(iova, size)
1854 }
1855
vfio_get_iommu_page_size_mask(&self) -> Result<u64>1856 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1857 self.container.lock().vfio_get_iommu_page_size_mask()
1858 }
1859
alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64>1860 pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1861 self.iova_alloc
1862 .lock()
1863 .allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1864 .map_err(VfioError::Resources)
1865 }
1866
get_iova(&self, alloc: &Alloc) -> Option<AddressRange>1867 pub fn get_iova(&self, alloc: &Alloc) -> Option<AddressRange> {
1868 self.iova_alloc.lock().get(alloc).map(|res| res.0)
1869 }
1870
release_iova(&self, alloc: Alloc) -> Result<AddressRange>1871 pub fn release_iova(&self, alloc: Alloc) -> Result<AddressRange> {
1872 self.iova_alloc
1873 .lock()
1874 .release(alloc)
1875 .map_err(VfioError::Resources)
1876 }
1877
get_max_addr(&self) -> u641878 pub fn get_max_addr(&self) -> u64 {
1879 self.iova_alloc.lock().get_max_addr()
1880 }
1881
1882 /// Gets the vfio device backing `File`.
device_file(&self) -> &File1883 pub fn device_file(&self) -> &File {
1884 &self.dev
1885 }
1886
1887 /// close vfio device
close(&self)1888 pub fn close(&self) {
1889 self.container.lock().remove_group(self.group_id, true);
1890 }
1891 }
1892
1893 pub struct VfioPciConfig {
1894 device: Arc<VfioDevice>,
1895 }
1896
1897 impl VfioPciConfig {
new(device: Arc<VfioDevice>) -> Self1898 pub fn new(device: Arc<VfioDevice>) -> Self {
1899 VfioPciConfig { device }
1900 }
1901
read_config<T: IntoBytes + FromBytes>(&self, offset: u32) -> T1902 pub fn read_config<T: IntoBytes + FromBytes>(&self, offset: u32) -> T {
1903 let mut config = T::new_zeroed();
1904 self.device.region_read(
1905 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1906 config.as_mut_bytes(),
1907 offset.into(),
1908 );
1909 config
1910 }
1911
write_config<T: Immutable + IntoBytes>(&self, config: T, offset: u32)1912 pub fn write_config<T: Immutable + IntoBytes>(&self, config: T, offset: u32) {
1913 self.device.region_write(
1914 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1915 config.as_bytes(),
1916 offset.into(),
1917 );
1918 }
1919
1920 /// Set the VFIO device this config refers to as the bus master.
set_bus_master(&self)1921 pub fn set_bus_master(&self) {
1922 /// Constant definitions from `linux/pci_regs.h`.
1923 const PCI_COMMAND: u32 = 0x4;
1924 /// Enable bus mastering
1925 const PCI_COMMAND_MASTER: u16 = 0x4;
1926
1927 let mut cmd: u16 = self.read_config(PCI_COMMAND);
1928
1929 if cmd & PCI_COMMAND_MASTER != 0 {
1930 return;
1931 }
1932
1933 cmd |= PCI_COMMAND_MASTER;
1934
1935 self.write_config(cmd, PCI_COMMAND);
1936 }
1937 }
1938
1939 impl AsRawDescriptor for VfioDevice {
as_raw_descriptor(&self) -> RawDescriptor1940 fn as_raw_descriptor(&self) -> RawDescriptor {
1941 self.dev.as_raw_descriptor()
1942 }
1943 }
1944