1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::cell::RefCell;
6 use std::collections::HashMap;
7 use std::ffi::CString;
8 use std::fs::File;
9 use std::fs::OpenOptions;
10 use std::io;
11 use std::mem;
12 use std::os::raw::c_ulong;
13 use std::os::unix::prelude::FileExt;
14 use std::path::Path;
15 use std::path::PathBuf;
16 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
17 use std::ptr::addr_of_mut;
18 use std::slice;
19 use std::sync::Arc;
20 use std::u32;
21
22 use base::error;
23 use base::ioctl;
24 use base::ioctl_with_mut_ptr;
25 use base::ioctl_with_mut_ref;
26 use base::ioctl_with_ptr;
27 use base::ioctl_with_ref;
28 use base::ioctl_with_val;
29 use base::warn;
30 use base::AsRawDescriptor;
31 use base::Error;
32 use base::Event;
33 use base::FromRawDescriptor;
34 use base::RawDescriptor;
35 use base::SafeDescriptor;
36 use cfg_if::cfg_if;
37 use data_model::vec_with_array_field;
38 use hypervisor::DeviceKind;
39 use hypervisor::Vm;
40 use once_cell::sync::OnceCell;
41 use rand::seq::index::sample;
42 use rand::thread_rng;
43 use remain::sorted;
44 use resources::address_allocator::AddressAllocator;
45 use resources::AddressRange;
46 use resources::Alloc;
47 use resources::Error as ResourcesError;
48 use sync::Mutex;
49 use thiserror::Error;
50 use vfio_sys::vfio::vfio_acpi_dsm;
51 use vfio_sys::vfio::VFIO_IRQ_SET_DATA_BOOL;
52 use vfio_sys::*;
53 use zerocopy::AsBytes;
54 use zerocopy::FromBytes;
55
56 use crate::IommuDevType;
57
58 #[sorted]
59 #[derive(Error, Debug)]
60 pub enum VfioError {
61 #[error("failed to borrow global vfio container")]
62 BorrowVfioContainer,
63 #[error("failed to duplicate VfioContainer")]
64 ContainerDupError,
65 #[error("failed to set container's IOMMU driver type as {0:?}: {1}")]
66 ContainerSetIOMMU(IommuType, Error),
67 #[error("failed to create KVM vfio device: {0}")]
68 CreateVfioKvmDevice(Error),
69 #[error("failed to get Group Status: {0}")]
70 GetGroupStatus(Error),
71 #[error("failed to get vfio device fd: {0}")]
72 GroupGetDeviceFD(Error),
73 #[error("failed to add vfio group into vfio container: {0}")]
74 GroupSetContainer(Error),
75 #[error("group is inviable")]
76 GroupViable,
77 #[error("invalid region index: {0}")]
78 InvalidIndex(usize),
79 #[error("invalid operation")]
80 InvalidOperation,
81 #[error("invalid file path")]
82 InvalidPath,
83 #[error("failed to add guest memory map into iommu table: {0}")]
84 IommuDmaMap(Error),
85 #[error("failed to remove guest memory map from iommu table: {0}")]
86 IommuDmaUnmap(Error),
87 #[error("failed to get IOMMU cap info from host")]
88 IommuGetCapInfo,
89 #[error("failed to get IOMMU info from host: {0}")]
90 IommuGetInfo(Error),
91 #[error("failed to attach device to pKVM pvIOMMU: {0}")]
92 KvmPviommuSetConfig(Error),
93 #[error("failed to set KVM vfio device's attribute: {0}")]
94 KvmSetDeviceAttr(Error),
95 #[error("AddressAllocator is unavailable")]
96 NoRescAlloc,
97 #[error("failed to open /dev/vfio/vfio container: {0}")]
98 OpenContainer(io::Error),
99 #[error("failed to open {1} group: {0}")]
100 OpenGroup(io::Error, String),
101 #[error("failed to read {1} link: {0}")]
102 ReadLink(io::Error, PathBuf),
103 #[error("resources error: {0}")]
104 Resources(ResourcesError),
105 #[error("unknown vfio device type (flags: {0:#x})")]
106 UnknownDeviceType(u32),
107 #[error("failed to call vfio device's ACPI _DSM: {0}")]
108 VfioAcpiDsm(Error),
109 #[error("failed to disable vfio deviece's acpi notification: {0}")]
110 VfioAcpiNotificationDisable(Error),
111 #[error("failed to enable vfio deviece's acpi notification: {0}")]
112 VfioAcpiNotificationEnable(Error),
113 #[error("failed to test vfio deviece's acpi notification: {0}")]
114 VfioAcpiNotificationTest(Error),
115 #[error(
116 "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
117 )]
118 VfioApiVersion,
119 #[error("failed to get vfio device's info or info doesn't match: {0}")]
120 VfioDeviceGetInfo(Error),
121 #[error("failed to get vfio device's region info: {0}")]
122 VfioDeviceGetRegionInfo(Error),
123 #[error("container doesn't support IOMMU driver type {0:?}")]
124 VfioIommuSupport(IommuType),
125 #[error("failed to disable vfio deviece's irq: {0}")]
126 VfioIrqDisable(Error),
127 #[error("failed to enable vfio deviece's irq: {0}")]
128 VfioIrqEnable(Error),
129 #[error("failed to mask vfio deviece's irq: {0}")]
130 VfioIrqMask(Error),
131 #[error("failed to unmask vfio deviece's irq: {0}")]
132 VfioIrqUnmask(Error),
133 #[error("failed to enter vfio deviece's low power state: {0}")]
134 VfioPmLowPowerEnter(Error),
135 #[error("failed to exit vfio deviece's low power state: {0}")]
136 VfioPmLowPowerExit(Error),
137 }
138
139 type Result<T> = std::result::Result<T, VfioError>;
140
get_error() -> Error141 fn get_error() -> Error {
142 Error::last()
143 }
144
145 static KVM_VFIO_FILE: OnceCell<SafeDescriptor> = OnceCell::new();
146
147 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
148 pub enum VfioDeviceType {
149 Pci,
150 Platform,
151 }
152
153 enum KvmVfioGroupOps {
154 Add,
155 Delete,
156 }
157
158 #[derive(Debug)]
159 pub struct KvmVfioPviommu {
160 file: File,
161 }
162
163 impl KvmVfioPviommu {
new(vm: &impl Vm) -> Result<Self>164 pub fn new(vm: &impl Vm) -> Result<Self> {
165 cfg_if! {
166 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
167 let file = Self::ioctl_kvm_dev_vfio_pviommu_attach(vm)?;
168
169 Ok(Self { file })
170 } else {
171 let _ = vm;
172 unimplemented!()
173 }
174 }
175 }
176
attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()>177 pub fn attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()> {
178 cfg_if! {
179 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
180 self.ioctl_kvm_pviommu_set_config(device, sid_idx, vsid)
181 } else {
182 let _ = device;
183 let _ = sid_idx;
184 let _ = vsid;
185 unimplemented!()
186 }
187 }
188 }
189
id(&self) -> u32190 pub fn id(&self) -> u32 {
191 let fd = self.as_raw_descriptor();
192 // Guests identify pvIOMMUs to the hypervisor using the corresponding VMM FDs.
193 fd.try_into().unwrap()
194 }
195
get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32>196 pub fn get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32> {
197 cfg_if! {
198 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
199 let info = Self::ioctl_kvm_dev_vfio_pviommu_get_info(vm, device)?;
200
201 Ok(info.nr_sids)
202 } else {
203 let _ = vm;
204 let _ = device;
205 unimplemented!()
206 }
207 }
208 }
209
210 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File>211 fn ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File> {
212 let kvm_vfio_file = KVM_VFIO_FILE
213 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
214 .map_err(VfioError::CreateVfioKvmDevice)?;
215
216 let vfio_dev_attr = kvm_sys::kvm_device_attr {
217 flags: 0,
218 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
219 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_ATTACH as u64,
220 addr: 0,
221 };
222
223 // SAFETY:
224 // Safe as we are the owner of vfio_dev_attr, which is valid.
225 let ret = unsafe {
226 ioctl_with_ref(
227 kvm_vfio_file,
228 kvm_sys::KVM_SET_DEVICE_ATTR(),
229 &vfio_dev_attr,
230 )
231 };
232
233 if ret < 0 {
234 Err(VfioError::KvmSetDeviceAttr(get_error()))
235 } else {
236 // Safe as we verify the return value.
237 Ok(unsafe { File::from_raw_descriptor(ret) })
238 }
239 }
240
241 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>( &self, device: &T, sid_idx: u32, vsid: u32, ) -> Result<()>242 fn ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>(
243 &self,
244 device: &T,
245 sid_idx: u32,
246 vsid: u32,
247 ) -> Result<()> {
248 let config = kvm_sys::kvm_vfio_iommu_config {
249 device_fd: device.as_raw_descriptor(),
250 sid_idx,
251 vsid,
252 };
253
254 // SAFETY:
255 // Safe as we are the owner of device and config which are valid, and we verify the return
256 // value.
257 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_PVIOMMU_SET_CONFIG, &config) };
258
259 if ret < 0 {
260 Err(VfioError::KvmPviommuSetConfig(get_error()))
261 } else {
262 Ok(())
263 }
264 }
265
266 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>( vm: &impl Vm, device: &T, ) -> Result<kvm_sys::kvm_vfio_iommu_info>267 fn ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>(
268 vm: &impl Vm,
269 device: &T,
270 ) -> Result<kvm_sys::kvm_vfio_iommu_info> {
271 let kvm_vfio_file = KVM_VFIO_FILE
272 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
273 .map_err(VfioError::CreateVfioKvmDevice)?;
274
275 let mut info = kvm_sys::kvm_vfio_iommu_info {
276 device_fd: device.as_raw_descriptor(),
277 nr_sids: 0,
278 };
279
280 let vfio_dev_attr = kvm_sys::kvm_device_attr {
281 flags: 0,
282 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
283 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_GET_INFO as u64,
284 addr: addr_of_mut!(info) as usize as u64,
285 };
286
287 // SAFETY:
288 // Safe as we are the owner of vfio_dev_attr, which is valid.
289 let ret = unsafe {
290 ioctl_with_ref(
291 kvm_vfio_file,
292 kvm_sys::KVM_SET_DEVICE_ATTR(),
293 &vfio_dev_attr,
294 )
295 };
296
297 if ret < 0 {
298 Err(VfioError::KvmSetDeviceAttr(get_error()))
299 } else {
300 Ok(info)
301 }
302 }
303 }
304
305 impl AsRawDescriptor for KvmVfioPviommu {
as_raw_descriptor(&self) -> RawDescriptor306 fn as_raw_descriptor(&self) -> RawDescriptor {
307 self.file.as_raw_descriptor()
308 }
309 }
310
311 #[repr(u32)]
312 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
313 pub enum IommuType {
314 Type1V2 = VFIO_TYPE1v2_IOMMU,
315 PkvmPviommu = VFIO_PKVM_PVIOMMU,
316 // ChromeOS specific vfio_iommu_type1 implementation that is optimized for
317 // small, dynamic mappings. For clients which create large, relatively
318 // static mappings, Type1V2 is still preferred.
319 //
320 // See crrev.com/c/3593528 for the implementation.
321 Type1ChromeOS = 100001,
322 }
323
324 /// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table
325 pub struct VfioContainer {
326 container: File,
327 groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
328 iommu_type: Option<IommuType>,
329 }
330
extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T> where T: FromBytes,331 fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T>
332 where
333 T: FromBytes,
334 {
335 bytes.get(offset..).and_then(T::read_from_prefix)
336 }
337
338 const VFIO_API_VERSION: u8 = 0;
339 impl VfioContainer {
new() -> Result<Self>340 pub fn new() -> Result<Self> {
341 let container = OpenOptions::new()
342 .read(true)
343 .write(true)
344 .open("/dev/vfio/vfio")
345 .map_err(VfioError::OpenContainer)?;
346
347 Self::new_from_container(container)
348 }
349
350 // Construct a VfioContainer from an exist container file.
new_from_container(container: File) -> Result<Self>351 pub fn new_from_container(container: File) -> Result<Self> {
352 // SAFETY:
353 // Safe as file is vfio container descriptor and ioctl is defined by kernel.
354 let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION()) };
355 if version as u8 != VFIO_API_VERSION {
356 return Err(VfioError::VfioApiVersion);
357 }
358
359 Ok(VfioContainer {
360 container,
361 groups: HashMap::new(),
362 iommu_type: None,
363 })
364 }
365
is_group_set(&self, group_id: u32) -> bool366 fn is_group_set(&self, group_id: u32) -> bool {
367 self.groups.get(&group_id).is_some()
368 }
369
check_extension(&self, val: IommuType) -> bool370 fn check_extension(&self, val: IommuType) -> bool {
371 // SAFETY:
372 // Safe as file is vfio container and make sure val is valid.
373 let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION(), val as c_ulong) };
374 ret != 0
375 }
376
set_iommu(&mut self, val: IommuType) -> i32377 fn set_iommu(&mut self, val: IommuType) -> i32 {
378 // SAFETY:
379 // Safe as file is vfio container and make sure val is valid.
380 unsafe { ioctl_with_val(self, VFIO_SET_IOMMU(), val as c_ulong) }
381 }
382
set_iommu_checked(&mut self, val: IommuType) -> Result<()>383 fn set_iommu_checked(&mut self, val: IommuType) -> Result<()> {
384 if !self.check_extension(val) {
385 Err(VfioError::VfioIommuSupport(val))
386 } else if self.set_iommu(val) != 0 {
387 Err(VfioError::ContainerSetIOMMU(val, get_error()))
388 } else {
389 self.iommu_type = Some(val);
390 Ok(())
391 }
392 }
393
394 /// # Safety
395 ///
396 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>397 pub unsafe fn vfio_dma_map(
398 &self,
399 iova: u64,
400 size: u64,
401 user_addr: u64,
402 write_en: bool,
403 ) -> Result<()> {
404 match self
405 .iommu_type
406 .expect("vfio_dma_map called before configuring IOMMU")
407 {
408 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
409 self.vfio_iommu_type1_dma_map(iova, size, user_addr, write_en)
410 }
411 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
412 }
413 }
414
415 /// # Safety
416 ///
417 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_iommu_type1_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>418 unsafe fn vfio_iommu_type1_dma_map(
419 &self,
420 iova: u64,
421 size: u64,
422 user_addr: u64,
423 write_en: bool,
424 ) -> Result<()> {
425 let mut dma_map = vfio_iommu_type1_dma_map {
426 argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
427 flags: VFIO_DMA_MAP_FLAG_READ,
428 vaddr: user_addr,
429 iova,
430 size,
431 };
432
433 if write_en {
434 dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
435 }
436
437 let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA(), &dma_map);
438 if ret != 0 {
439 return Err(VfioError::IommuDmaMap(get_error()));
440 }
441
442 Ok(())
443 }
444
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>445 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
446 match self
447 .iommu_type
448 .expect("vfio_dma_unmap called before configuring IOMMU")
449 {
450 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
451 self.vfio_iommu_type1_dma_unmap(iova, size)
452 }
453 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
454 }
455 }
456
vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()>457 fn vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
458 let mut dma_unmap = vfio_iommu_type1_dma_unmap {
459 argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
460 flags: 0,
461 iova,
462 size,
463 ..Default::default()
464 };
465
466 // SAFETY:
467 // Safe as file is vfio container, dma_unmap is constructed by us, and
468 // we check the return value
469 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA(), &mut dma_unmap) };
470 if ret != 0 || dma_unmap.size != size {
471 return Err(VfioError::IommuDmaUnmap(get_error()));
472 }
473
474 Ok(())
475 }
476
vfio_get_iommu_page_size_mask(&self) -> Result<u64>477 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
478 match self
479 .iommu_type
480 .expect("vfio_get_iommu_page_size_mask called before configuring IOMMU")
481 {
482 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
483 self.vfio_iommu_type1_get_iommu_page_size_mask()
484 }
485 IommuType::PkvmPviommu => Ok(0),
486 }
487 }
488
vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64>489 fn vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64> {
490 let mut iommu_info = vfio_iommu_type1_info {
491 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
492 flags: 0,
493 iova_pgsizes: 0,
494 ..Default::default()
495 };
496
497 // SAFETY:
498 // Safe as file is vfio container, iommu_info has valid values,
499 // and we check the return value
500 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO(), &mut iommu_info) };
501 if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
502 return Err(VfioError::IommuGetInfo(get_error()));
503 }
504
505 Ok(iommu_info.iova_pgsizes)
506 }
507
vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>>508 pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
509 match self
510 .iommu_type
511 .expect("vfio_iommu_iova_get_iova_ranges called before configuring IOMMU")
512 {
513 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
514 self.vfio_iommu_type1_get_iova_ranges()
515 }
516 IommuType::PkvmPviommu => Ok(Vec::new()),
517 }
518 }
519
vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>>520 fn vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
521 // Query the buffer size needed fetch the capabilities.
522 let mut iommu_info_argsz = vfio_iommu_type1_info {
523 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
524 flags: 0,
525 iova_pgsizes: 0,
526 ..Default::default()
527 };
528
529 // SAFETY:
530 // Safe as file is vfio container, iommu_info_argsz has valid values,
531 // and we check the return value
532 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO(), &mut iommu_info_argsz) };
533 if ret != 0 {
534 return Err(VfioError::IommuGetInfo(get_error()));
535 }
536
537 if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
538 return Err(VfioError::IommuGetCapInfo);
539 }
540
541 let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
542 iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
543 );
544 iommu_info[0].argsz = iommu_info_argsz.argsz;
545 let ret =
546 // SAFETY:
547 // Safe as file is vfio container, iommu_info has valid values,
548 // and we check the return value
549 unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO(), iommu_info.as_mut_ptr()) };
550 if ret != 0 {
551 return Err(VfioError::IommuGetInfo(get_error()));
552 }
553
554 // SAFETY:
555 // Safe because we initialized iommu_info with enough space, u8 has less strict
556 // alignment, and since it will no longer be mutated.
557 let info_bytes = unsafe {
558 std::slice::from_raw_parts(
559 iommu_info.as_ptr() as *const u8,
560 iommu_info_argsz.argsz as usize,
561 )
562 };
563
564 if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
565 return Err(VfioError::IommuGetCapInfo);
566 }
567
568 let mut offset = iommu_info[0].cap_offset as usize;
569 while offset != 0 {
570 let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset)
571 .ok_or(VfioError::IommuGetCapInfo)?;
572
573 if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
574 let iova_header =
575 extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
576 info_bytes, offset,
577 )
578 .ok_or(VfioError::IommuGetCapInfo)?;
579 let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
580 let mut ret = Vec::new();
581 for i in 0..iova_header.nr_iovas {
582 ret.push(
583 extract_vfio_struct::<vfio_iova_range>(
584 info_bytes,
585 range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
586 )
587 .ok_or(VfioError::IommuGetCapInfo)?,
588 );
589 }
590 return Ok(ret
591 .iter()
592 .map(|range| AddressRange {
593 start: range.start,
594 end: range.end,
595 })
596 .collect());
597 }
598 offset = header.next as usize;
599 }
600
601 Err(VfioError::IommuGetCapInfo)
602 }
603
set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()>604 fn set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()> {
605 match iommu_dev {
606 IommuDevType::CoIommu | IommuDevType::VirtioIommu => {
607 // If we expect granular, dynamic mappings, try the ChromeOS Type1ChromeOS first,
608 // then fall back to upstream versions.
609 self.set_iommu_checked(IommuType::Type1ChromeOS)
610 .or_else(|_| self.set_iommu_checked(IommuType::Type1V2))
611 }
612 IommuDevType::NoIommu => self.set_iommu_checked(IommuType::Type1V2),
613 IommuDevType::PkvmPviommu => self.set_iommu_checked(IommuType::PkvmPviommu),
614 }
615 }
616
get_group_with_vm( &mut self, id: u32, vm: &impl Vm, iommu_dev: IommuDevType, ) -> Result<Arc<Mutex<VfioGroup>>>617 fn get_group_with_vm(
618 &mut self,
619 id: u32,
620 vm: &impl Vm,
621 iommu_dev: IommuDevType,
622 ) -> Result<Arc<Mutex<VfioGroup>>> {
623 if let Some(group) = self.groups.get(&id) {
624 return Ok(group.clone());
625 }
626
627 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
628 if self.groups.is_empty() {
629 self.set_iommu_from(iommu_dev)?;
630 // Before the first group is added into container, do once per container
631 // initialization. Both coiommu and virtio-iommu rely on small, dynamic
632 // mappings. However, if an iommu is not enabled, then we map the entirety
633 // of guest memory as a small number of large, static mappings.
634 match iommu_dev {
635 IommuDevType::CoIommu | IommuDevType::PkvmPviommu | IommuDevType::VirtioIommu => {}
636 IommuDevType::NoIommu => {
637 for region in vm.get_memory().regions() {
638 // SAFETY:
639 // Safe because the guest regions are guaranteed not to overlap
640 unsafe {
641 self.vfio_dma_map(
642 region.guest_addr.0,
643 region.size as u64,
644 region.host_addr as u64,
645 true,
646 )
647 }?;
648 }
649 }
650 }
651 }
652
653 let kvm_vfio_file = KVM_VFIO_FILE
654 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
655 .map_err(VfioError::CreateVfioKvmDevice)?;
656 group
657 .lock()
658 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
659
660 self.groups.insert(id, group.clone());
661
662 Ok(group)
663 }
664
get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>>665 fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
666 if let Some(group) = self.groups.get(&id) {
667 return Ok(group.clone());
668 }
669
670 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
671
672 if self.groups.is_empty() {
673 // Before the first group is added into container, do once per
674 // container initialization.
675 self.set_iommu_checked(IommuType::Type1V2)?;
676 }
677
678 self.groups.insert(id, group.clone());
679 Ok(group)
680 }
681
remove_group(&mut self, id: u32, reduce: bool)682 fn remove_group(&mut self, id: u32, reduce: bool) {
683 let mut remove = false;
684
685 if let Some(group) = self.groups.get(&id) {
686 if reduce {
687 group.lock().reduce_device_num();
688 }
689 if group.lock().device_num() == 0 {
690 let kvm_vfio_file = KVM_VFIO_FILE.get().expect("kvm vfio file isn't created");
691 if group
692 .lock()
693 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
694 .is_err()
695 {
696 warn!("failing in remove vfio group from kvm device");
697 }
698 remove = true;
699 }
700 }
701
702 if remove {
703 self.groups.remove(&id);
704 }
705 }
706
clone_as_raw_descriptor(&self) -> Result<RawDescriptor>707 pub fn clone_as_raw_descriptor(&self) -> Result<RawDescriptor> {
708 // SAFETY: this call is safe because it doesn't modify any memory and we
709 // check the return value.
710 let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
711 if raw_descriptor < 0 {
712 Err(VfioError::ContainerDupError)
713 } else {
714 Ok(raw_descriptor)
715 }
716 }
717
718 // Gets group ids for all groups in the container.
group_ids(&self) -> Vec<&u32>719 pub fn group_ids(&self) -> Vec<&u32> {
720 self.groups.keys().collect()
721 }
722 }
723
724 impl AsRawDescriptor for VfioContainer {
as_raw_descriptor(&self) -> RawDescriptor725 fn as_raw_descriptor(&self) -> RawDescriptor {
726 self.container.as_raw_descriptor()
727 }
728 }
729
730 struct VfioGroup {
731 group: File,
732 device_num: u32,
733 }
734
735 impl VfioGroup {
new(container: &VfioContainer, id: u32) -> Result<Self>736 fn new(container: &VfioContainer, id: u32) -> Result<Self> {
737 let group_path = format!("/dev/vfio/{}", id);
738 let group_file = OpenOptions::new()
739 .read(true)
740 .write(true)
741 .open(Path::new(&group_path))
742 .map_err(|e| VfioError::OpenGroup(e, group_path))?;
743
744 let mut group_status = vfio_group_status {
745 argsz: mem::size_of::<vfio_group_status>() as u32,
746 flags: 0,
747 };
748 let mut ret =
749 // SAFETY:
750 // Safe as we are the owner of group_file and group_status which are valid value.
751 unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS(), &mut group_status) };
752 if ret < 0 {
753 return Err(VfioError::GetGroupStatus(get_error()));
754 }
755
756 if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
757 return Err(VfioError::GroupViable);
758 }
759
760 let container_raw_descriptor = container.as_raw_descriptor();
761 // SAFETY:
762 // Safe as we are the owner of group_file and container_raw_descriptor which are valid
763 // value, and we verify the ret value
764 ret = unsafe {
765 ioctl_with_ref(
766 &group_file,
767 VFIO_GROUP_SET_CONTAINER(),
768 &container_raw_descriptor,
769 )
770 };
771 if ret < 0 {
772 return Err(VfioError::GroupSetContainer(get_error()));
773 }
774
775 Ok(VfioGroup {
776 group: group_file,
777 device_num: 0,
778 })
779 }
780
get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32>781 fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
782 let mut uuid_path = PathBuf::new();
783 uuid_path.push(sysfspath);
784 uuid_path.push("iommu_group");
785 let group_path = uuid_path
786 .read_link()
787 .map_err(|e| VfioError::ReadLink(e, uuid_path))?;
788 let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
789 let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
790 let group_id = group_str
791 .parse::<u32>()
792 .map_err(|_| VfioError::InvalidPath)?;
793
794 Ok(group_id)
795 }
796
kvm_device_set_group( &self, kvm_vfio_file: &SafeDescriptor, ops: KvmVfioGroupOps, ) -> Result<()>797 fn kvm_device_set_group(
798 &self,
799 kvm_vfio_file: &SafeDescriptor,
800 ops: KvmVfioGroupOps,
801 ) -> Result<()> {
802 let group_descriptor = self.as_raw_descriptor();
803 let group_descriptor_ptr = &group_descriptor as *const i32;
804 let vfio_dev_attr = match ops {
805 KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
806 flags: 0,
807 group: kvm_sys::KVM_DEV_VFIO_GROUP,
808 attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
809 addr: group_descriptor_ptr as u64,
810 },
811 KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
812 flags: 0,
813 group: kvm_sys::KVM_DEV_VFIO_GROUP,
814 attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
815 addr: group_descriptor_ptr as u64,
816 },
817 };
818
819 // SAFETY:
820 // Safe as we are the owner of vfio_dev_descriptor and vfio_dev_attr which are valid value,
821 // and we verify the return value.
822 if 0 != unsafe {
823 ioctl_with_ref(
824 kvm_vfio_file,
825 kvm_sys::KVM_SET_DEVICE_ATTR(),
826 &vfio_dev_attr,
827 )
828 } {
829 return Err(VfioError::KvmSetDeviceAttr(get_error()));
830 }
831
832 Ok(())
833 }
834
get_device(&self, name: &str) -> Result<File>835 fn get_device(&self, name: &str) -> Result<File> {
836 let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
837 let path_ptr = path.as_ptr();
838
839 // SAFETY:
840 // Safe as we are the owner of self and path_ptr which are valid value.
841 let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD(), path_ptr) };
842 if ret < 0 {
843 return Err(VfioError::GroupGetDeviceFD(get_error()));
844 }
845
846 // SAFETY:
847 // Safe as ret is valid descriptor
848 Ok(unsafe { File::from_raw_descriptor(ret) })
849 }
850
add_device_num(&mut self)851 fn add_device_num(&mut self) {
852 self.device_num += 1;
853 }
854
reduce_device_num(&mut self)855 fn reduce_device_num(&mut self) {
856 self.device_num -= 1;
857 }
858
device_num(&self) -> u32859 fn device_num(&self) -> u32 {
860 self.device_num
861 }
862 }
863
864 impl AsRawDescriptor for VfioGroup {
as_raw_descriptor(&self) -> RawDescriptor865 fn as_raw_descriptor(&self) -> RawDescriptor {
866 self.group.as_raw_descriptor()
867 }
868 }
869
870 /// A helper trait for managing VFIO setup
871 pub trait VfioCommonTrait: Send + Sync {
872 /// The single place to create a VFIO container for a PCI endpoint.
873 ///
874 /// The policy to determine whether an individual or a shared VFIO container
875 /// will be created for this device is governed by the physical PCI topology,
876 /// and the argument iommu_enabled.
877 ///
878 /// # Arguments
879 ///
880 /// * `sysfspath` - the path to the PCI device, e.g. /sys/bus/pci/devices/0000:02:00.0
881 /// * `iommu_enabled` - whether virtio IOMMU is enabled on this device
vfio_get_container<P: AsRef<Path>>( iommu_dev: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>882 fn vfio_get_container<P: AsRef<Path>>(
883 iommu_dev: IommuDevType,
884 sysfspath: Option<P>,
885 ) -> Result<Arc<Mutex<VfioContainer>>>;
886 }
887
888 thread_local! {
889
890 // One VFIO container is shared by all VFIO devices that don't
891 // attach to the virtio IOMMU device
892 static NO_IOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
893
894 // For IOMMU enabled devices, all VFIO groups that share the same IOVA space
895 // are managed by one VFIO container
896 static IOMMU_CONTAINERS: RefCell<Option<Vec<Arc<Mutex<VfioContainer>>>>> = RefCell::new(Some(Default::default()));
897
898 // One VFIO container is shared by all VFIO devices that
899 // attach to the CoIOMMU device
900 static COIOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
901
902 // One VFIO container is shared by all VFIO devices that attach to pKVM
903 static PKVM_IOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
904 }
905
906 pub struct VfioCommonSetup;
907
908 impl VfioCommonTrait for VfioCommonSetup {
vfio_get_container<P: AsRef<Path>>( iommu_dev: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>909 fn vfio_get_container<P: AsRef<Path>>(
910 iommu_dev: IommuDevType,
911 sysfspath: Option<P>,
912 ) -> Result<Arc<Mutex<VfioContainer>>> {
913 match iommu_dev {
914 IommuDevType::NoIommu => {
915 // One VFIO container is used for all IOMMU disabled groups
916 NO_IOMMU_CONTAINER.with(|v| {
917 if v.borrow().is_some() {
918 if let Some(ref container) = *v.borrow() {
919 Ok(container.clone())
920 } else {
921 Err(VfioError::BorrowVfioContainer)
922 }
923 } else {
924 let container = Arc::new(Mutex::new(VfioContainer::new()?));
925 *v.borrow_mut() = Some(container.clone());
926 Ok(container)
927 }
928 })
929 }
930 IommuDevType::VirtioIommu => {
931 let path = sysfspath.ok_or(VfioError::InvalidPath)?;
932 let group_id = VfioGroup::get_group_id(path)?;
933
934 // One VFIO container is used for all devices belong to one VFIO group
935 // NOTE: vfio_wrapper relies on each container containing exactly one group.
936 IOMMU_CONTAINERS.with(|v| {
937 if let Some(ref mut containers) = *v.borrow_mut() {
938 let container = containers
939 .iter()
940 .find(|container| container.lock().is_group_set(group_id));
941
942 match container {
943 None => {
944 let container = Arc::new(Mutex::new(VfioContainer::new()?));
945 containers.push(container.clone());
946 Ok(container)
947 }
948 Some(container) => Ok(container.clone()),
949 }
950 } else {
951 Err(VfioError::BorrowVfioContainer)
952 }
953 })
954 }
955 IommuDevType::CoIommu => {
956 // One VFIO container is used for devices attached to CoIommu
957 COIOMMU_CONTAINER.with(|v| {
958 if v.borrow().is_some() {
959 if let Some(ref container) = *v.borrow() {
960 Ok(container.clone())
961 } else {
962 Err(VfioError::BorrowVfioContainer)
963 }
964 } else {
965 let container = Arc::new(Mutex::new(VfioContainer::new()?));
966 *v.borrow_mut() = Some(container.clone());
967 Ok(container)
968 }
969 })
970 }
971 IommuDevType::PkvmPviommu => {
972 // One VFIO container is used for devices attached to pKVM
973 PKVM_IOMMU_CONTAINER.with(|v| {
974 if v.borrow().is_some() {
975 if let Some(ref container) = *v.borrow() {
976 Ok(container.clone())
977 } else {
978 Err(VfioError::BorrowVfioContainer)
979 }
980 } else {
981 let container = Arc::new(Mutex::new(VfioContainer::new()?));
982 *v.borrow_mut() = Some(container.clone());
983 Ok(container)
984 }
985 })
986 }
987 }
988 }
989 }
990
991 /// Vfio Irq type used to enable/disable/mask/unmask vfio irq
992 pub enum VfioIrqType {
993 Intx,
994 Msi,
995 Msix,
996 }
997
998 /// Vfio Irq information used to assign and enable/disable/mask/unmask vfio irq
999 pub struct VfioIrq {
1000 pub flags: u32,
1001 pub index: u32,
1002 }
1003
1004 /// Address on VFIO memory region.
1005 #[derive(Debug, Default, Clone)]
1006 pub struct VfioRegionAddr {
1007 /// region number.
1008 pub index: usize,
1009 /// offset in the region.
1010 pub addr: u64,
1011 }
1012
1013 #[derive(Debug)]
1014 pub struct VfioRegion {
1015 // flags for this region: read/write/mmap
1016 flags: u32,
1017 size: u64,
1018 // region offset used to read/write with vfio device descriptor
1019 offset: u64,
1020 // vectors for mmap offset and size
1021 mmaps: Vec<vfio_region_sparse_mmap_area>,
1022 // type and subtype for cap type
1023 cap_info: Option<(u32, u32)>,
1024 }
1025
1026 /// Vfio device for exposing regions which could be read/write to kernel vfio device.
1027 pub struct VfioDevice {
1028 dev: File,
1029 name: String,
1030 container: Arc<Mutex<VfioContainer>>,
1031 dev_type: VfioDeviceType,
1032 group_descriptor: RawDescriptor,
1033 group_id: u32,
1034 // vec for vfio device's regions
1035 regions: Vec<VfioRegion>,
1036 num_irqs: u32,
1037
1038 iova_alloc: Arc<Mutex<AddressAllocator>>,
1039 dt_symbol: Option<String>,
1040 pviommu: Option<(Arc<Mutex<KvmVfioPviommu>>, Vec<u32>)>,
1041 }
1042
1043 impl VfioDevice {
1044 /// Create a new vfio device, then guest read/write on this device could be
1045 /// transfered into kernel vfio.
1046 /// sysfspath specify the vfio device path in sys file system.
new_passthrough<P: AsRef<Path>>( sysfspath: &P, vm: &impl Vm, container: Arc<Mutex<VfioContainer>>, iommu_dev: IommuDevType, dt_symbol: Option<String>, ) -> Result<Self>1047 pub fn new_passthrough<P: AsRef<Path>>(
1048 sysfspath: &P,
1049 vm: &impl Vm,
1050 container: Arc<Mutex<VfioContainer>>,
1051 iommu_dev: IommuDevType,
1052 dt_symbol: Option<String>,
1053 ) -> Result<Self> {
1054 let group_id = VfioGroup::get_group_id(sysfspath)?;
1055
1056 let group = container
1057 .lock()
1058 .get_group_with_vm(group_id, vm, iommu_dev)?;
1059 let name_osstr = sysfspath
1060 .as_ref()
1061 .file_name()
1062 .ok_or(VfioError::InvalidPath)?;
1063 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1064 let name = String::from(name_str);
1065 let dev = group.lock().get_device(&name)?;
1066 let (dev_info, dev_type) = Self::get_device_info(&dev)?;
1067 let regions = Self::get_regions(&dev, dev_info.num_regions)?;
1068 group.lock().add_device_num();
1069 let group_descriptor = group.lock().as_raw_descriptor();
1070
1071 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1072 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1073 .map_err(VfioError::Resources)?;
1074
1075 let pviommu = if matches!(iommu_dev, IommuDevType::PkvmPviommu) {
1076 // We currently have a 1-to-1 mapping between pvIOMMUs and VFIO devices.
1077 let pviommu = KvmVfioPviommu::new(vm)?;
1078
1079 let vsids_len = KvmVfioPviommu::get_sid_count(vm, &dev)?.try_into().unwrap();
1080 let max_vsid = u32::MAX.try_into().unwrap();
1081 let random_vsids = sample(&mut thread_rng(), max_vsid, vsids_len).into_iter();
1082 let vsids = Vec::from_iter(random_vsids.map(|v| u32::try_from(v).unwrap()));
1083 for (i, vsid) in vsids.iter().enumerate() {
1084 pviommu.attach(&dev, i.try_into().unwrap(), *vsid)?;
1085 }
1086
1087 Some((Arc::new(Mutex::new(pviommu)), vsids))
1088 } else {
1089 None
1090 };
1091
1092 Ok(VfioDevice {
1093 dev,
1094 name,
1095 container,
1096 dev_type,
1097 group_descriptor,
1098 group_id,
1099 regions,
1100 num_irqs: dev_info.num_irqs,
1101 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1102 dt_symbol,
1103 pviommu,
1104 })
1105 }
1106
new<P: AsRef<Path>>( sysfspath: &P, container: Arc<Mutex<VfioContainer>>, ) -> Result<Self>1107 pub fn new<P: AsRef<Path>>(
1108 sysfspath: &P,
1109 container: Arc<Mutex<VfioContainer>>,
1110 ) -> Result<Self> {
1111 let group_id = VfioGroup::get_group_id(sysfspath)?;
1112 let group = container.lock().get_group(group_id)?;
1113 let name_osstr = sysfspath
1114 .as_ref()
1115 .file_name()
1116 .ok_or(VfioError::InvalidPath)?;
1117 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1118 let name = String::from(name_str);
1119
1120 let dev = match group.lock().get_device(&name) {
1121 Ok(dev) => dev,
1122 Err(e) => {
1123 container.lock().remove_group(group_id, false);
1124 return Err(e);
1125 }
1126 };
1127 let (dev_info, dev_type) = match Self::get_device_info(&dev) {
1128 Ok(dev_info) => dev_info,
1129 Err(e) => {
1130 container.lock().remove_group(group_id, false);
1131 return Err(e);
1132 }
1133 };
1134 let regions = match Self::get_regions(&dev, dev_info.num_regions) {
1135 Ok(regions) => regions,
1136 Err(e) => {
1137 container.lock().remove_group(group_id, false);
1138 return Err(e);
1139 }
1140 };
1141 group.lock().add_device_num();
1142 let group_descriptor = group.lock().as_raw_descriptor();
1143
1144 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1145 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1146 .map_err(VfioError::Resources)?;
1147
1148 Ok(VfioDevice {
1149 dev,
1150 name,
1151 container,
1152 dev_type,
1153 group_descriptor,
1154 group_id,
1155 regions,
1156 num_irqs: dev_info.num_irqs,
1157 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1158 dt_symbol: None,
1159 pviommu: None,
1160 })
1161 }
1162
1163 /// Returns the file for this device.
dev_file(&self) -> &File1164 pub fn dev_file(&self) -> &File {
1165 &self.dev
1166 }
1167
1168 /// Returns PCI device name, formatted as BUS:DEVICE.FUNCTION string.
device_name(&self) -> &String1169 pub fn device_name(&self) -> &String {
1170 &self.name
1171 }
1172
1173 /// Returns the type of this VFIO device.
device_type(&self) -> VfioDeviceType1174 pub fn device_type(&self) -> VfioDeviceType {
1175 self.dev_type
1176 }
1177
1178 /// Returns the DT symbol (node label) of this VFIO device.
dt_symbol(&self) -> Option<&str>1179 pub fn dt_symbol(&self) -> Option<&str> {
1180 self.dt_symbol.as_deref()
1181 }
1182
1183 /// Returns the type and indentifier (if applicable) of the IOMMU used by this VFIO device and
1184 /// its master IDs.
iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])>1185 pub fn iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])> {
1186 // We currently only report IommuDevType::PkvmPviommu.
1187 if let Some((ref pviommu, ref ids)) = self.pviommu {
1188 Some((
1189 IommuDevType::PkvmPviommu,
1190 Some(pviommu.lock().id()),
1191 ids.as_ref(),
1192 ))
1193 } else {
1194 None
1195 }
1196 }
1197
1198 /// enter the device's low power state
pm_low_power_enter(&self) -> Result<()>1199 pub fn pm_low_power_enter(&self) -> Result<()> {
1200 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1201 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1202 device_feature[0].flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY;
1203 // SAFETY:
1204 // Safe as we are the owner of self and power_management which are valid value
1205 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE(), &device_feature[0]) };
1206 if ret < 0 {
1207 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1208 } else {
1209 Ok(())
1210 }
1211 }
1212
1213 /// enter the device's low power state with wakeup notification
pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()>1214 pub fn pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()> {
1215 let payload = vfio_device_low_power_entry_with_wakeup {
1216 wakeup_eventfd: wakeup_evt.as_raw_descriptor(),
1217 reserved: 0,
1218 };
1219 let payload_size = mem::size_of::<vfio_device_low_power_entry_with_wakeup>();
1220 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(payload_size);
1221 device_feature[0].argsz = (mem::size_of::<vfio_device_feature>() + payload_size) as u32;
1222 device_feature[0].flags =
1223 VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP;
1224 // SAFETY:
1225 // Safe as we know vfio_device_low_power_entry_with_wakeup has two 32-bit int fields
1226 unsafe {
1227 device_feature[0]
1228 .data
1229 .as_mut_slice(payload_size)
1230 .copy_from_slice(
1231 mem::transmute::<vfio_device_low_power_entry_with_wakeup, [u8; 8]>(payload)
1232 .as_slice(),
1233 );
1234 }
1235 // SAFETY:
1236 // Safe as we are the owner of self and power_management which are valid value
1237 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE(), &device_feature[0]) };
1238 if ret < 0 {
1239 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1240 } else {
1241 Ok(())
1242 }
1243 }
1244
1245 /// exit the device's low power state
pm_low_power_exit(&self) -> Result<()>1246 pub fn pm_low_power_exit(&self) -> Result<()> {
1247 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1248 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1249 device_feature[0].flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT;
1250 // SAFETY:
1251 // Safe as we are the owner of self and power_management which are valid value
1252 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE(), &device_feature[0]) };
1253 if ret < 0 {
1254 Err(VfioError::VfioPmLowPowerExit(get_error()))
1255 } else {
1256 Ok(())
1257 }
1258 }
1259
1260 /// call _DSM from the device's ACPI table
acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>>1261 pub fn acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>> {
1262 let count = args.len();
1263 let mut dsm = vec_with_array_field::<vfio_acpi_dsm, u8>(count);
1264 dsm[0].argsz = (mem::size_of::<vfio_acpi_dsm>() + mem::size_of_val(args)) as u32;
1265 dsm[0].padding = 0;
1266 // SAFETY:
1267 // Safe as we allocated enough space to hold args
1268 unsafe {
1269 dsm[0].args.as_mut_slice(count).clone_from_slice(args);
1270 }
1271 // SAFETY:
1272 // Safe as we are the owner of self and dsm which are valid value
1273 let ret = unsafe { ioctl_with_mut_ref(&self.dev, VFIO_DEVICE_ACPI_DSM(), &mut dsm[0]) };
1274 if ret < 0 {
1275 Err(VfioError::VfioAcpiDsm(get_error()))
1276 } else {
1277 // SAFETY:
1278 // Safe as we allocated enough space to hold args
1279 let res = unsafe { dsm[0].args.as_slice(count) };
1280 Ok(res.to_vec())
1281 }
1282 }
1283
1284 /// Enable vfio device's ACPI notifications and associate EventFD with device.
acpi_notification_evt_enable( &self, acpi_notification_eventfd: &Event, index: u32, ) -> Result<()>1285 pub fn acpi_notification_evt_enable(
1286 &self,
1287 acpi_notification_eventfd: &Event,
1288 index: u32,
1289 ) -> Result<()> {
1290 let u32_size = mem::size_of::<u32>();
1291 let count = 1;
1292
1293 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1294 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1295 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1296 irq_set[0].index = index;
1297 irq_set[0].start = 0;
1298 irq_set[0].count = count as u32;
1299
1300 // SAFETY:
1301 // It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1302 let data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1303 data.copy_from_slice(&acpi_notification_eventfd.as_raw_descriptor().to_ne_bytes()[..]);
1304
1305 // SAFETY:
1306 // Safe as we are the owner of self and irq_set which are valid value
1307 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1308 if ret < 0 {
1309 Err(VfioError::VfioAcpiNotificationEnable(get_error()))
1310 } else {
1311 Ok(())
1312 }
1313 }
1314
1315 /// Disable vfio device's ACPI notification and disconnect EventFd with device.
acpi_notification_disable(&self, index: u32) -> Result<()>1316 pub fn acpi_notification_disable(&self, index: u32) -> Result<()> {
1317 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1318 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1319 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1320 irq_set[0].index = index;
1321 irq_set[0].start = 0;
1322 irq_set[0].count = 0;
1323
1324 // SAFETY:
1325 // Safe as we are the owner of self and irq_set which are valid value
1326 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1327 if ret < 0 {
1328 Err(VfioError::VfioAcpiNotificationDisable(get_error()))
1329 } else {
1330 Ok(())
1331 }
1332 }
1333
1334 /// Test vfio device's ACPI notification by simulating hardware triggering.
1335 /// When the signaling mechanism is set, the VFIO_IRQ_SET_DATA_BOOL can be used with
1336 /// VFIO_IRQ_SET_ACTION_TRIGGER to perform kernel level interrupt loopback testing.
acpi_notification_test(&self, index: u32, val: u32) -> Result<()>1337 pub fn acpi_notification_test(&self, index: u32, val: u32) -> Result<()> {
1338 let u32_size = mem::size_of::<u32>();
1339 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1340 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + u32_size) as u32;
1341 irq_set[0].flags = VFIO_IRQ_SET_DATA_BOOL | VFIO_IRQ_SET_ACTION_TRIGGER;
1342 irq_set[0].index = index;
1343 irq_set[0].start = 0;
1344 irq_set[0].count = 1;
1345
1346 // SAFETY:
1347 // It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1348 let data = unsafe { irq_set[0].data.as_mut_slice(u32_size) };
1349 data.copy_from_slice(&val.to_ne_bytes()[..]);
1350
1351 // SAFETY:
1352 // Safe as we are the owner of self and irq_set which are valid value
1353 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1354 if ret < 0 {
1355 Err(VfioError::VfioAcpiNotificationTest(get_error()))
1356 } else {
1357 Ok(())
1358 }
1359 }
1360
1361 /// Enable vfio device's irq and associate Irqfd Event with device.
1362 /// When MSIx is enabled, multi vectors will be supported, and vectors starting from subindex to
1363 /// subindex + descriptors length will be assigned with irqfd in the descriptors array.
1364 /// when index = VFIO_PCI_REQ_IRQ_INDEX, kernel vfio will trigger this event when physical
1365 /// device is removed.
1366 /// If descriptor is None, -1 is assigned to the irq. A value of -1 is used to either de-assign
1367 /// interrupts if already assigned or skip un-assigned interrupts.
irq_enable( &self, descriptors: &[Option<&Event>], index: u32, subindex: u32, ) -> Result<()>1368 pub fn irq_enable(
1369 &self,
1370 descriptors: &[Option<&Event>],
1371 index: u32,
1372 subindex: u32,
1373 ) -> Result<()> {
1374 let count = descriptors.len();
1375 let u32_size = mem::size_of::<u32>();
1376 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1377 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1378 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1379 irq_set[0].index = index;
1380 irq_set[0].start = subindex;
1381 irq_set[0].count = count as u32;
1382
1383 // SAFETY:
1384 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data
1385 // is u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1386 // together as u32. It is safe as enough space is reserved through
1387 // vec_with_array_field(u32)<count>.
1388 let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1389 for descriptor in descriptors.iter().take(count) {
1390 let (left, right) = data.split_at_mut(u32_size);
1391 match descriptor {
1392 Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
1393 None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
1394 }
1395 data = right;
1396 }
1397
1398 // SAFETY:
1399 // Safe as we are the owner of self and irq_set which are valid value
1400 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1401 if ret < 0 {
1402 Err(VfioError::VfioIrqEnable(get_error()))
1403 } else {
1404 Ok(())
1405 }
1406 }
1407
1408 /// When intx is enabled, irqfd is used to trigger a level interrupt into guest, resample irqfd
1409 /// is used to get guest EOI notification.
1410 /// When host hw generates interrupt, vfio irq handler in host kernel receive and handle it,
1411 /// this handler disable hw irq first, then trigger irqfd to inject interrupt into guest. When
1412 /// resample irqfd is triggered by guest EOI, vfio kernel could enable hw irq, so hw could
1413 /// generate another interrupts.
1414 /// This function enable resample irqfd and let vfio kernel could get EOI notification.
1415 ///
1416 /// descriptor: should be resample IrqFd.
resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()>1417 pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
1418 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1419 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
1420 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
1421 irq_set[0].index = index;
1422 irq_set[0].start = 0;
1423 irq_set[0].count = 1;
1424
1425 {
1426 // SAFETY:
1427 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data is
1428 // u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1429 // together as u32. It is safe as enough space is reserved through
1430 // vec_with_array_field(u32)<1>.
1431 let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
1432 descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
1433 }
1434
1435 // SAFETY:
1436 // Safe as we are the owner of self and irq_set which are valid value
1437 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1438 if ret < 0 {
1439 Err(VfioError::VfioIrqEnable(get_error()))
1440 } else {
1441 Ok(())
1442 }
1443 }
1444
1445 /// disable vfio device's irq and disconnect Irqfd Event with device
irq_disable(&self, index: u32) -> Result<()>1446 pub fn irq_disable(&self, index: u32) -> Result<()> {
1447 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1448 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1449 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1450 irq_set[0].index = index;
1451 irq_set[0].start = 0;
1452 irq_set[0].count = 0;
1453
1454 // SAFETY:
1455 // Safe as we are the owner of self and irq_set which are valid value
1456 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1457 if ret < 0 {
1458 Err(VfioError::VfioIrqDisable(get_error()))
1459 } else {
1460 Ok(())
1461 }
1462 }
1463
1464 /// Unmask vfio device irq
irq_unmask(&self, index: u32) -> Result<()>1465 pub fn irq_unmask(&self, index: u32) -> Result<()> {
1466 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1467 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1468 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
1469 irq_set[0].index = index;
1470 irq_set[0].start = 0;
1471 irq_set[0].count = 1;
1472
1473 // SAFETY:
1474 // Safe as we are the owner of self and irq_set which are valid value
1475 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1476 if ret < 0 {
1477 Err(VfioError::VfioIrqUnmask(get_error()))
1478 } else {
1479 Ok(())
1480 }
1481 }
1482
1483 /// Mask vfio device irq
irq_mask(&self, index: u32) -> Result<()>1484 pub fn irq_mask(&self, index: u32) -> Result<()> {
1485 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1486 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1487 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
1488 irq_set[0].index = index;
1489 irq_set[0].start = 0;
1490 irq_set[0].count = 1;
1491
1492 // SAFETY:
1493 // Safe as we are the owner of self and irq_set which are valid value
1494 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
1495 if ret < 0 {
1496 Err(VfioError::VfioIrqMask(get_error()))
1497 } else {
1498 Ok(())
1499 }
1500 }
1501
1502 /// Get and validate VFIO device information.
get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)>1503 fn get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)> {
1504 let mut dev_info = vfio_device_info {
1505 argsz: mem::size_of::<vfio_device_info>() as u32,
1506 flags: 0,
1507 num_regions: 0,
1508 num_irqs: 0,
1509 ..Default::default()
1510 };
1511
1512 // SAFETY:
1513 // Safe as we are the owner of device_file and dev_info which are valid value,
1514 // and we verify the return value.
1515 let ret = unsafe { ioctl_with_mut_ref(device_file, VFIO_DEVICE_GET_INFO(), &mut dev_info) };
1516 if ret < 0 {
1517 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1518 }
1519
1520 let dev_type = if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
1521 if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
1522 || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
1523 {
1524 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1525 }
1526
1527 VfioDeviceType::Pci
1528 } else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
1529 VfioDeviceType::Platform
1530 } else {
1531 return Err(VfioError::UnknownDeviceType(dev_info.flags));
1532 };
1533
1534 Ok((dev_info, dev_type))
1535 }
1536
1537 /// Query interrupt information
1538 /// return: Vector of interrupts information, each of which contains flags and index
get_irqs(&self) -> Result<Vec<VfioIrq>>1539 pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
1540 let mut irqs: Vec<VfioIrq> = Vec::new();
1541
1542 for i in 0..self.num_irqs {
1543 let argsz = mem::size_of::<vfio_irq_info>() as u32;
1544 let mut irq_info = vfio_irq_info {
1545 argsz,
1546 flags: 0,
1547 index: i,
1548 count: 0,
1549 };
1550 // SAFETY:
1551 // Safe as we are the owner of dev and irq_info which are valid value,
1552 // and we verify the return value.
1553 let ret = unsafe {
1554 ioctl_with_mut_ref(
1555 self.device_file(),
1556 VFIO_DEVICE_GET_IRQ_INFO(),
1557 &mut irq_info,
1558 )
1559 };
1560 if ret < 0 || irq_info.count != 1 {
1561 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1562 }
1563
1564 let irq = VfioIrq {
1565 flags: irq_info.flags,
1566 index: irq_info.index,
1567 };
1568 irqs.push(irq);
1569 }
1570 Ok(irqs)
1571 }
1572
1573 #[allow(clippy::cast_ptr_alignment)]
get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>>1574 fn get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>> {
1575 let mut regions: Vec<VfioRegion> = Vec::new();
1576 for i in 0..num_regions {
1577 let argsz = mem::size_of::<vfio_region_info>() as u32;
1578 let mut reg_info = vfio_region_info {
1579 argsz,
1580 flags: 0,
1581 index: i,
1582 cap_offset: 0,
1583 size: 0,
1584 offset: 0,
1585 };
1586 let ret =
1587 // SAFETY:
1588 // Safe as we are the owner of dev and reg_info which are valid value,
1589 // and we verify the return value.
1590 unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO(), &mut reg_info) };
1591 if ret < 0 {
1592 continue;
1593 }
1594
1595 let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1596 let mut cap_info: Option<(u32, u32)> = None;
1597 if reg_info.argsz > argsz {
1598 let cap_len: usize = (reg_info.argsz - argsz) as usize;
1599 let mut region_with_cap =
1600 vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1601 region_with_cap[0].region_info.argsz = reg_info.argsz;
1602 region_with_cap[0].region_info.flags = 0;
1603 region_with_cap[0].region_info.index = i;
1604 region_with_cap[0].region_info.cap_offset = 0;
1605 region_with_cap[0].region_info.size = 0;
1606 region_with_cap[0].region_info.offset = 0;
1607 // SAFETY:
1608 // Safe as we are the owner of dev and region_info which are valid value,
1609 // and we verify the return value.
1610 let ret = unsafe {
1611 ioctl_with_mut_ref(
1612 dev,
1613 VFIO_DEVICE_GET_REGION_INFO(),
1614 &mut (region_with_cap[0].region_info),
1615 )
1616 };
1617 if ret < 0 {
1618 return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1619 }
1620
1621 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1622 continue;
1623 }
1624
1625 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1626 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1627 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1628 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1629 let region_info_sz = reg_info.argsz;
1630
1631 // region_with_cap[0].cap_info may contain many structures, like
1632 // vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct.
1633 // Both of them begin with vfio_info_cap_header, so we will get individual cap from
1634 // vfio_into_cap_header.
1635 // Go through all the cap structs.
1636 let info_ptr = region_with_cap.as_ptr() as *mut u8;
1637 let mut offset = region_with_cap[0].region_info.cap_offset;
1638 while offset != 0 {
1639 if offset + cap_header_sz > region_info_sz {
1640 break;
1641 }
1642 // SAFETY:
1643 // Safe, as cap_header struct is in this function allocated region_with_cap
1644 // vec.
1645 let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1646 // SAFETY:
1647 // Safe, as cap_header struct is in this function allocated region_with_cap
1648 // vec.
1649 let cap_header = unsafe { &*(cap_ptr as *const vfio_info_cap_header) };
1650 if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1651 if offset + mmap_cap_sz > region_info_sz {
1652 break;
1653 }
1654 // cap_ptr is vfio_region_info_cap_sparse_mmap here
1655 let sparse_mmap =
1656 // SAFETY:
1657 // Safe, this vfio_region_info_cap_sparse_mmap is in this function
1658 // allocated region_with_cap vec.
1659 unsafe { &*(cap_ptr as *const vfio_region_info_cap_sparse_mmap) };
1660
1661 let area_num = sparse_mmap.nr_areas;
1662 if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1663 break;
1664 }
1665 let areas =
1666 // SAFETY:
1667 // Safe, these vfio_region_sparse_mmap_area are in this function allocated
1668 // region_with_cap vec.
1669 unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1670 for area in areas.iter() {
1671 mmaps.push(*area);
1672 }
1673 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1674 if offset + type_cap_sz > region_info_sz {
1675 break;
1676 }
1677 // cap_ptr is vfio_region_info_cap_type here
1678 let cap_type_info =
1679 // SAFETY:
1680 // Safe, this vfio_region_info_cap_type is in this function allocated
1681 // region_with_cap vec
1682 unsafe { &*(cap_ptr as *const vfio_region_info_cap_type) };
1683
1684 cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1685 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1686 mmaps.push(vfio_region_sparse_mmap_area {
1687 offset: 0,
1688 size: region_with_cap[0].region_info.size,
1689 });
1690 }
1691
1692 offset = cap_header.next;
1693 }
1694 } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1695 mmaps.push(vfio_region_sparse_mmap_area {
1696 offset: 0,
1697 size: reg_info.size,
1698 });
1699 }
1700
1701 let region = VfioRegion {
1702 flags: reg_info.flags,
1703 size: reg_info.size,
1704 offset: reg_info.offset,
1705 mmaps,
1706 cap_info,
1707 };
1708 regions.push(region);
1709 }
1710
1711 Ok(regions)
1712 }
1713
1714 /// get a region's flag
1715 /// the return's value may conatin:
1716 /// VFIO_REGION_INFO_FLAG_READ: region supports read
1717 /// VFIO_REGION_INFO_FLAG_WRITE: region supports write
1718 /// VFIO_REGION_INFO_FLAG_MMAP: region supports mmap
1719 /// VFIO_REGION_INFO_FLAG_CAPS: region's info supports caps
get_region_flags(&self, index: usize) -> u321720 pub fn get_region_flags(&self, index: usize) -> u32 {
1721 match self.regions.get(index) {
1722 Some(v) => v.flags,
1723 None => {
1724 warn!("get_region_flags() with invalid index: {}", index);
1725 0
1726 }
1727 }
1728 }
1729
1730 /// get a region's offset
1731 /// return: Region offset from the start of vfio device descriptor
get_region_offset(&self, index: usize) -> u641732 pub fn get_region_offset(&self, index: usize) -> u64 {
1733 match self.regions.get(index) {
1734 Some(v) => v.offset,
1735 None => {
1736 warn!("get_region_offset with invalid index: {}", index);
1737 0
1738 }
1739 }
1740 }
1741
1742 /// get a region's size
1743 /// return: Region size from the start of vfio device descriptor
get_region_size(&self, index: usize) -> u641744 pub fn get_region_size(&self, index: usize) -> u64 {
1745 match self.regions.get(index) {
1746 Some(v) => v.size,
1747 None => {
1748 warn!("get_region_size with invalid index: {}", index);
1749 0
1750 }
1751 }
1752 }
1753
1754 /// get a number of regions
1755 /// return: Number of regions of vfio device descriptor
get_region_count(&self) -> usize1756 pub fn get_region_count(&self) -> usize {
1757 self.regions.len()
1758 }
1759
1760 /// get a region's mmap info vector
get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area>1761 pub fn get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area> {
1762 match self.regions.get(index) {
1763 Some(v) => v.mmaps.clone(),
1764 None => {
1765 warn!("get_region_mmap with invalid index: {}", index);
1766 Vec::new()
1767 }
1768 }
1769 }
1770
1771 /// find the specified cap type in device regions
1772 /// Input:
1773 /// type_: cap type
1774 /// sub_type: cap sub_type
1775 /// Output:
1776 /// None: device doesn't have the specified cap type
1777 /// Some((bar_index, region_size)): device has the specified cap type, return region's
1778 /// index and size
get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)>1779 pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1780 for (index, region) in self.regions.iter().enumerate() {
1781 if let Some(cap_info) = ®ion.cap_info {
1782 if cap_info.0 == type_ && cap_info.1 == sub_type {
1783 return Some((index as u32, region.size));
1784 }
1785 }
1786 }
1787
1788 None
1789 }
1790
1791 /// Returns file offset corresponding to the given `VfioRegionAddr`.
1792 /// The offset can be used when reading/writing the VFIO device's FD directly.
get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64>1793 pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1794 let region = self
1795 .regions
1796 .get(addr.index)
1797 .ok_or(VfioError::InvalidIndex(addr.index))?;
1798 Ok(region.offset + addr.addr)
1799 }
1800
1801 /// Read region's data from VFIO device into buf
1802 /// index: region num
1803 /// buf: data destination and buf length is read size
1804 /// addr: offset in the region
region_read(&self, index: usize, buf: &mut [u8], addr: u64)1805 pub fn region_read(&self, index: usize, buf: &mut [u8], addr: u64) {
1806 let stub: &VfioRegion = self
1807 .regions
1808 .get(index)
1809 .unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {}", index));
1810
1811 let size = buf.len() as u64;
1812 if size > stub.size || addr + size > stub.size {
1813 panic!(
1814 "tried to read VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1815 index, addr, size
1816 );
1817 }
1818
1819 self.dev
1820 .read_exact_at(buf, stub.offset + addr)
1821 .unwrap_or_else(|e| {
1822 panic!(
1823 "failed to read region: index={}, addr=0x{:x}, error={}",
1824 index, addr, e
1825 )
1826 });
1827 }
1828
1829 /// Reads a value from the specified `VfioRegionAddr.addr` + `offset`.
region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T1830 pub fn region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1831 let mut val = mem::MaybeUninit::zeroed();
1832 let buf =
1833 // SAFETY:
1834 // Safe because we have zero-initialized `size_of::<T>()` bytes.
1835 unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1836 self.region_read(addr.index, buf, addr.addr + offset);
1837 // SAFETY:
1838 // Safe because any bit pattern is valid for a type that implements FromBytes.
1839 unsafe { val.assume_init() }
1840 }
1841
1842 /// write the data from buf into a vfio device region
1843 /// index: region num
1844 /// buf: data src and buf length is write size
1845 /// addr: offset in the region
region_write(&self, index: usize, buf: &[u8], addr: u64)1846 pub fn region_write(&self, index: usize, buf: &[u8], addr: u64) {
1847 let stub: &VfioRegion = self
1848 .regions
1849 .get(index)
1850 .unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {}", index));
1851
1852 let size = buf.len() as u64;
1853 if size > stub.size
1854 || addr + size > stub.size
1855 || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1856 {
1857 panic!(
1858 "tried to write VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1859 index, addr, size
1860 );
1861 }
1862
1863 self.dev
1864 .write_all_at(buf, stub.offset + addr)
1865 .unwrap_or_else(|e| {
1866 panic!(
1867 "failed to write region: index={}, addr=0x{:x}, error={}",
1868 index, addr, e
1869 )
1870 });
1871 }
1872
1873 /// Writes data into the specified `VfioRegionAddr.addr` + `offset`.
region_write_to_addr<T: AsBytes>(&self, val: &T, addr: &VfioRegionAddr, offset: u64)1874 pub fn region_write_to_addr<T: AsBytes>(&self, val: &T, addr: &VfioRegionAddr, offset: u64) {
1875 self.region_write(addr.index, val.as_bytes(), addr.addr + offset);
1876 }
1877
1878 /// get vfio device's descriptors which are passed into minijail process
keep_rds(&self) -> Vec<RawDescriptor>1879 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1880 vec![
1881 self.dev.as_raw_descriptor(),
1882 self.group_descriptor,
1883 self.container.lock().as_raw_descriptor(),
1884 ]
1885 }
1886
1887 /// Add (iova, user_addr) map into vfio container iommu table
1888 /// # Safety
1889 ///
1890 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>1891 pub unsafe fn vfio_dma_map(
1892 &self,
1893 iova: u64,
1894 size: u64,
1895 user_addr: u64,
1896 write_en: bool,
1897 ) -> Result<()> {
1898 self.container
1899 .lock()
1900 .vfio_dma_map(iova, size, user_addr, write_en)
1901 }
1902
1903 /// Remove (iova, user_addr) map from vfio container iommu table
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>1904 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1905 self.container.lock().vfio_dma_unmap(iova, size)
1906 }
1907
vfio_get_iommu_page_size_mask(&self) -> Result<u64>1908 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1909 self.container.lock().vfio_get_iommu_page_size_mask()
1910 }
1911
alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64>1912 pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1913 self.iova_alloc
1914 .lock()
1915 .allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1916 .map_err(VfioError::Resources)
1917 }
1918
get_iova(&self, alloc: &Alloc) -> Option<AddressRange>1919 pub fn get_iova(&self, alloc: &Alloc) -> Option<AddressRange> {
1920 self.iova_alloc.lock().get(alloc).map(|res| res.0)
1921 }
1922
release_iova(&self, alloc: Alloc) -> Result<AddressRange>1923 pub fn release_iova(&self, alloc: Alloc) -> Result<AddressRange> {
1924 self.iova_alloc
1925 .lock()
1926 .release(alloc)
1927 .map_err(VfioError::Resources)
1928 }
1929
get_max_addr(&self) -> u641930 pub fn get_max_addr(&self) -> u64 {
1931 self.iova_alloc.lock().get_max_addr()
1932 }
1933
1934 /// Gets the vfio device backing `File`.
device_file(&self) -> &File1935 pub fn device_file(&self) -> &File {
1936 &self.dev
1937 }
1938
1939 /// close vfio device
close(&self)1940 pub fn close(&self) {
1941 self.container.lock().remove_group(self.group_id, true);
1942 }
1943 }
1944
1945 pub struct VfioPciConfig {
1946 device: Arc<VfioDevice>,
1947 }
1948
1949 impl VfioPciConfig {
new(device: Arc<VfioDevice>) -> Self1950 pub fn new(device: Arc<VfioDevice>) -> Self {
1951 VfioPciConfig { device }
1952 }
1953
read_config<T: FromBytes>(&self, offset: u32) -> T1954 pub fn read_config<T: FromBytes>(&self, offset: u32) -> T {
1955 let mut buf = vec![0u8; std::mem::size_of::<T>()];
1956 self.device.region_read(
1957 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1958 &mut buf,
1959 offset.into(),
1960 );
1961 T::read_from(&buf[..]).expect("failed to convert config data from slice")
1962 }
1963
write_config<T: AsBytes>(&self, config: T, offset: u32)1964 pub fn write_config<T: AsBytes>(&self, config: T, offset: u32) {
1965 self.device.region_write(
1966 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1967 config.as_bytes(),
1968 offset.into(),
1969 );
1970 }
1971
1972 /// Set the VFIO device this config refers to as the bus master.
set_bus_master(&self)1973 pub fn set_bus_master(&self) {
1974 /// Constant definitions from `linux/pci_regs.h`.
1975 const PCI_COMMAND: u32 = 0x4;
1976 /// Enable bus mastering
1977 const PCI_COMMAND_MASTER: u16 = 0x4;
1978
1979 let mut cmd: u16 = self.read_config(PCI_COMMAND);
1980
1981 if cmd & PCI_COMMAND_MASTER != 0 {
1982 return;
1983 }
1984
1985 cmd |= PCI_COMMAND_MASTER;
1986
1987 self.write_config(cmd, PCI_COMMAND);
1988 }
1989 }
1990
1991 impl AsRawDescriptor for VfioDevice {
as_raw_descriptor(&self) -> RawDescriptor1992 fn as_raw_descriptor(&self) -> RawDescriptor {
1993 self.dev.as_raw_descriptor()
1994 }
1995 }
1996