• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::cell::RefCell;
6 use std::collections::HashMap;
7 use std::ffi::CString;
8 use std::fs::{File, OpenOptions};
9 use std::io;
10 use std::mem;
11 use std::os::raw::c_ulong;
12 use std::os::unix::prelude::FileExt;
13 use std::path::{Path, PathBuf};
14 use std::slice;
15 use std::sync::Arc;
16 use std::u32;
17 
18 use crate::IommuDevType;
19 use base::error;
20 use base::{
21     ioctl, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val,
22     warn, AsRawDescriptor, Error, Event, FromRawDescriptor, RawDescriptor, SafeDescriptor,
23 };
24 use data_model::{vec_with_array_field, DataInit};
25 use hypervisor::{DeviceKind, Vm};
26 use once_cell::sync::OnceCell;
27 use remain::sorted;
28 use resources::address_allocator::AddressAllocator;
29 use resources::{Alloc, Error as ResourcesError};
30 use sync::Mutex;
31 use thiserror::Error;
32 use vfio_sys::*;
33 
34 #[sorted]
35 #[derive(Error, Debug)]
36 pub enum VfioError {
37     #[error("failed to borrow global vfio container")]
38     BorrowVfioContainer,
39     #[error("failed to duplicate VfioContainer")]
40     ContainerDupError,
41     #[error("failed to set container's IOMMU driver type as VfioType1V2: {0}")]
42     ContainerSetIOMMU(Error),
43     #[error("failed to create KVM vfio device: {0}")]
44     CreateVfioKvmDevice(Error),
45     #[error("failed to get Group Status: {0}")]
46     GetGroupStatus(Error),
47     #[error("failed to get vfio device fd: {0}")]
48     GroupGetDeviceFD(Error),
49     #[error("failed to add vfio group into vfio container: {0}")]
50     GroupSetContainer(Error),
51     #[error("group is inviable")]
52     GroupViable,
53     #[error("invalid region index: {0}")]
54     InvalidIndex(u32),
55     #[error("invalid file path")]
56     InvalidPath,
57     #[error("failed to add guest memory map into iommu table: {0}")]
58     IommuDmaMap(Error),
59     #[error("failed to remove guest memory map from iommu table: {0}")]
60     IommuDmaUnmap(Error),
61     #[error("failed to get IOMMU cap info from host")]
62     IommuGetCapInfo,
63     #[error("failed to get IOMMU info from host: {0}")]
64     IommuGetInfo(Error),
65     #[error("failed to set KVM vfio device's attribute: {0}")]
66     KvmSetDeviceAttr(Error),
67     #[error("AddressAllocator is unavailable")]
68     NoRescAlloc,
69     #[error("failed to open /dev/vfio/vfio container: {0}")]
70     OpenContainer(io::Error),
71     #[error("failed to open /dev/vfio/$group_num group: {0}")]
72     OpenGroup(io::Error),
73     #[error("resources error: {0}")]
74     Resources(ResourcesError),
75     #[error(
76         "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
77     )]
78     VfioApiVersion,
79     #[error("failed to get vfio device's info or info doesn't match: {0}")]
80     VfioDeviceGetInfo(Error),
81     #[error("failed to get vfio device's region info: {0}")]
82     VfioDeviceGetRegionInfo(Error),
83     #[error("failed to disable vfio deviece's irq: {0}")]
84     VfioIrqDisable(Error),
85     #[error("failed to enable vfio deviece's irq: {0}")]
86     VfioIrqEnable(Error),
87     #[error("failed to mask vfio deviece's irq: {0}")]
88     VfioIrqMask(Error),
89     #[error("failed to unmask vfio deviece's irq: {0}")]
90     VfioIrqUnmask(Error),
91     #[error("container dones't support VfioType1V2 IOMMU driver type")]
92     VfioType1V2,
93 }
94 
95 type Result<T> = std::result::Result<T, VfioError>;
96 
get_error() -> Error97 fn get_error() -> Error {
98     Error::last()
99 }
100 
101 static KVM_VFIO_FILE: OnceCell<SafeDescriptor> = OnceCell::new();
102 
103 enum KvmVfioGroupOps {
104     Add,
105     Delete,
106 }
107 
108 #[repr(u32)]
109 enum IommuType {
110     Type1V2 = VFIO_TYPE1v2_IOMMU,
111 }
112 
113 /// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table
114 pub struct VfioContainer {
115     container: File,
116     groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
117 }
118 
extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> T where T: DataInit,119 fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> T
120 where
121     T: DataInit,
122 {
123     T::from_reader(&bytes[offset..(offset + mem::size_of::<T>())]).expect("malformed kernel data")
124 }
125 
126 const VFIO_API_VERSION: u8 = 0;
127 impl VfioContainer {
new() -> Result<Self>128     pub fn new() -> Result<Self> {
129         let container = OpenOptions::new()
130             .read(true)
131             .write(true)
132             .open("/dev/vfio/vfio")
133             .map_err(VfioError::OpenContainer)?;
134 
135         // Safe as file is vfio container descriptor and ioctl is defined by kernel.
136         let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION()) };
137         if version as u8 != VFIO_API_VERSION {
138             return Err(VfioError::VfioApiVersion);
139         }
140 
141         Ok(VfioContainer {
142             container,
143             groups: HashMap::new(),
144         })
145     }
146 
147     // Construct a VfioContainer from an exist container file.
new_from_container(container: File) -> Result<Self>148     pub fn new_from_container(container: File) -> Result<Self> {
149         // Safe as file is vfio container descriptor and ioctl is defined by kernel.
150         let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION()) };
151         if version as u8 != VFIO_API_VERSION {
152             return Err(VfioError::VfioApiVersion);
153         }
154 
155         Ok(VfioContainer {
156             container,
157             groups: HashMap::new(),
158         })
159     }
160 
is_group_set(&self, group_id: u32) -> bool161     fn is_group_set(&self, group_id: u32) -> bool {
162         self.groups.get(&group_id).is_some()
163     }
164 
check_extension(&self, val: IommuType) -> bool165     fn check_extension(&self, val: IommuType) -> bool {
166         // Safe as file is vfio container and make sure val is valid.
167         let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION(), val as c_ulong) };
168         ret == 1
169     }
170 
set_iommu(&self, val: IommuType) -> i32171     fn set_iommu(&self, val: IommuType) -> i32 {
172         // Safe as file is vfio container and make sure val is valid.
173         unsafe { ioctl_with_val(self, VFIO_SET_IOMMU(), val as c_ulong) }
174     }
175 
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>176     pub unsafe fn vfio_dma_map(
177         &self,
178         iova: u64,
179         size: u64,
180         user_addr: u64,
181         write_en: bool,
182     ) -> Result<()> {
183         let mut dma_map = vfio_iommu_type1_dma_map {
184             argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
185             flags: VFIO_DMA_MAP_FLAG_READ,
186             vaddr: user_addr,
187             iova,
188             size,
189         };
190 
191         if write_en {
192             dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
193         }
194 
195         let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA(), &dma_map);
196         if ret != 0 {
197             return Err(VfioError::IommuDmaMap(get_error()));
198         }
199 
200         Ok(())
201     }
202 
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>203     pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
204         let mut dma_unmap = vfio_iommu_type1_dma_unmap {
205             argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
206             flags: 0,
207             iova,
208             size,
209             ..Default::default()
210         };
211 
212         // Safe as file is vfio container, dma_unmap is constructed by us, and
213         // we check the return value
214         let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA(), &mut dma_unmap) };
215         if ret != 0 || dma_unmap.size != size {
216             return Err(VfioError::IommuDmaUnmap(get_error()));
217         }
218 
219         Ok(())
220     }
221 
vfio_get_iommu_page_size_mask(&self) -> Result<u64>222     pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
223         let mut iommu_info = vfio_iommu_type1_info {
224             argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
225             flags: 0,
226             iova_pgsizes: 0,
227             ..Default::default()
228         };
229 
230         // Safe as file is vfio container, iommu_info has valid values,
231         // and we check the return value
232         let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO(), &mut iommu_info) };
233         if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
234             return Err(VfioError::IommuGetInfo(get_error()));
235         }
236 
237         Ok(iommu_info.iova_pgsizes)
238     }
239 
vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<vfio_iova_range>>240     pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<vfio_iova_range>> {
241         // Query the buffer size needed fetch the capabilities.
242         let mut iommu_info_argsz = vfio_iommu_type1_info {
243             argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
244             flags: 0,
245             iova_pgsizes: 0,
246             ..Default::default()
247         };
248 
249         // Safe as file is vfio container, iommu_info_argsz has valid values,
250         // and we check the return value
251         let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO(), &mut iommu_info_argsz) };
252         if ret != 0 {
253             return Err(VfioError::IommuGetInfo(get_error()));
254         }
255 
256         if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
257             return Err(VfioError::IommuGetCapInfo);
258         }
259 
260         let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
261             iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
262         );
263         iommu_info[0].argsz = iommu_info_argsz.argsz;
264         // Safe as file is vfio container, iommu_info has valid values,
265         // and we check the return value
266         let ret =
267             unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO(), iommu_info.as_mut_ptr()) };
268         if ret != 0 {
269             return Err(VfioError::IommuGetInfo(get_error()));
270         }
271 
272         // Safe because we initialized iommu_info with enough space, u8 has less strict
273         // alignment, and since it will no longer be mutated.
274         let info_bytes = unsafe {
275             std::slice::from_raw_parts(
276                 iommu_info.as_ptr() as *const u8,
277                 iommu_info_argsz.argsz as usize,
278             )
279         };
280 
281         if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
282             return Err(VfioError::IommuGetCapInfo);
283         }
284 
285         let mut offset = iommu_info[0].cap_offset as usize;
286         while offset != 0 {
287             let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset);
288 
289             if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
290                 let iova_header = extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
291                     info_bytes, offset,
292                 );
293                 let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
294                 let mut ret = Vec::new();
295                 for i in 0..iova_header.nr_iovas {
296                     ret.push(extract_vfio_struct::<vfio_iova_range>(
297                         info_bytes,
298                         range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
299                     ));
300                 }
301                 return Ok(ret);
302             }
303             offset = header.next as usize;
304         }
305 
306         Err(VfioError::IommuGetCapInfo)
307     }
308 
init_vfio_iommu(&mut self) -> Result<()>309     fn init_vfio_iommu(&mut self) -> Result<()> {
310         if !self.check_extension(IommuType::Type1V2) {
311             return Err(VfioError::VfioType1V2);
312         }
313 
314         if self.set_iommu(IommuType::Type1V2) < 0 {
315             return Err(VfioError::ContainerSetIOMMU(get_error()));
316         }
317 
318         Ok(())
319     }
320 
get_group_with_vm( &mut self, id: u32, vm: &impl Vm, iommu_enabled: bool, ) -> Result<Arc<Mutex<VfioGroup>>>321     fn get_group_with_vm(
322         &mut self,
323         id: u32,
324         vm: &impl Vm,
325         iommu_enabled: bool,
326     ) -> Result<Arc<Mutex<VfioGroup>>> {
327         match self.groups.get(&id) {
328             Some(group) => Ok(group.clone()),
329             None => {
330                 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
331                 if self.groups.is_empty() {
332                     // Before the first group is added into container, do once per
333                     // container initialization.
334                     self.init_vfio_iommu()?;
335 
336                     if !iommu_enabled {
337                         vm.get_memory().with_regions(
338                             |_index, guest_addr, size, host_addr, _mmap, _fd_offset| {
339                                 // Safe because the guest regions are guaranteed not to overlap
340                                 unsafe {
341                                     self.vfio_dma_map(
342                                         guest_addr.0,
343                                         size as u64,
344                                         host_addr as u64,
345                                         true,
346                                     )
347                                 }
348                             },
349                         )?;
350                     }
351                 }
352 
353                 let kvm_vfio_file = KVM_VFIO_FILE
354                     .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
355                     .map_err(VfioError::CreateVfioKvmDevice)?;
356                 group
357                     .lock()
358                     .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
359 
360                 self.groups.insert(id, group.clone());
361 
362                 Ok(group)
363             }
364         }
365     }
366 
get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>>367     fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
368         match self.groups.get(&id) {
369             Some(group) => Ok(group.clone()),
370             None => {
371                 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
372 
373                 if self.groups.is_empty() {
374                     // Before the first group is added into container, do once per
375                     // container initialization.
376                     self.init_vfio_iommu()?;
377                 }
378 
379                 self.groups.insert(id, group.clone());
380                 Ok(group)
381             }
382         }
383     }
384 
remove_group(&mut self, id: u32, reduce: bool)385     fn remove_group(&mut self, id: u32, reduce: bool) {
386         let mut remove = false;
387 
388         if let Some(group) = self.groups.get(&id) {
389             if reduce {
390                 group.lock().reduce_device_num();
391             }
392             if group.lock().device_num() == 0 {
393                 let kvm_vfio_file = KVM_VFIO_FILE.get().expect("kvm vfio file isn't created");
394                 if group
395                     .lock()
396                     .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
397                     .is_err()
398                 {
399                     warn!("failing in remove vfio group from kvm device");
400                 }
401                 remove = true;
402             }
403         }
404 
405         if remove {
406             self.groups.remove(&id);
407         }
408     }
409 
into_raw_descriptor(&self) -> Result<RawDescriptor>410     pub fn into_raw_descriptor(&self) -> Result<RawDescriptor> {
411         let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
412         if raw_descriptor < 0 {
413             Err(VfioError::ContainerDupError)
414         } else {
415             Ok(raw_descriptor)
416         }
417     }
418 }
419 
420 impl AsRawDescriptor for VfioContainer {
as_raw_descriptor(&self) -> RawDescriptor421     fn as_raw_descriptor(&self) -> RawDescriptor {
422         self.container.as_raw_descriptor()
423     }
424 }
425 
426 struct VfioGroup {
427     group: File,
428     device_num: u32,
429 }
430 
431 impl VfioGroup {
new(container: &VfioContainer, id: u32) -> Result<Self>432     fn new(container: &VfioContainer, id: u32) -> Result<Self> {
433         let group_path = format!("/dev/vfio/{}", id);
434         let group_file = OpenOptions::new()
435             .read(true)
436             .write(true)
437             .open(Path::new(&group_path))
438             .map_err(VfioError::OpenGroup)?;
439 
440         let mut group_status = vfio_group_status {
441             argsz: mem::size_of::<vfio_group_status>() as u32,
442             flags: 0,
443         };
444         // Safe as we are the owner of group_file and group_status which are valid value.
445         let mut ret =
446             unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS(), &mut group_status) };
447         if ret < 0 {
448             return Err(VfioError::GetGroupStatus(get_error()));
449         }
450 
451         if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
452             return Err(VfioError::GroupViable);
453         }
454 
455         // Safe as we are the owner of group_file and container_raw_descriptor which are valid value,
456         // and we verify the ret value
457         let container_raw_descriptor = container.as_raw_descriptor();
458         ret = unsafe {
459             ioctl_with_ref(
460                 &group_file,
461                 VFIO_GROUP_SET_CONTAINER(),
462                 &container_raw_descriptor,
463             )
464         };
465         if ret < 0 {
466             return Err(VfioError::GroupSetContainer(get_error()));
467         }
468 
469         Ok(VfioGroup {
470             group: group_file,
471             device_num: 0,
472         })
473     }
474 
get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32>475     fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
476         let mut uuid_path = PathBuf::new();
477         uuid_path.push(sysfspath);
478         uuid_path.push("iommu_group");
479         let group_path = uuid_path.read_link().map_err(|_| VfioError::InvalidPath)?;
480         let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
481         let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
482         let group_id = group_str
483             .parse::<u32>()
484             .map_err(|_| VfioError::InvalidPath)?;
485 
486         Ok(group_id)
487     }
488 
kvm_device_set_group( &self, kvm_vfio_file: &SafeDescriptor, ops: KvmVfioGroupOps, ) -> Result<()>489     fn kvm_device_set_group(
490         &self,
491         kvm_vfio_file: &SafeDescriptor,
492         ops: KvmVfioGroupOps,
493     ) -> Result<()> {
494         let group_descriptor = self.as_raw_descriptor();
495         let group_descriptor_ptr = &group_descriptor as *const i32;
496         let vfio_dev_attr = match ops {
497             KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
498                 flags: 0,
499                 group: kvm_sys::KVM_DEV_VFIO_GROUP,
500                 attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
501                 addr: group_descriptor_ptr as u64,
502             },
503             KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
504                 flags: 0,
505                 group: kvm_sys::KVM_DEV_VFIO_GROUP,
506                 attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
507                 addr: group_descriptor_ptr as u64,
508             },
509         };
510 
511         // Safe as we are the owner of vfio_dev_fd and vfio_dev_attr which are valid value,
512         // and we verify the return value.
513         if 0 != unsafe {
514             ioctl_with_ref(
515                 kvm_vfio_file,
516                 kvm_sys::KVM_SET_DEVICE_ATTR(),
517                 &vfio_dev_attr,
518             )
519         } {
520             return Err(VfioError::KvmSetDeviceAttr(get_error()));
521         }
522 
523         Ok(())
524     }
525 
get_device(&self, name: &str) -> Result<File>526     fn get_device(&self, name: &str) -> Result<File> {
527         let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
528         let path_ptr = path.as_ptr();
529 
530         // Safe as we are the owner of self and path_ptr which are valid value.
531         let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD(), path_ptr) };
532         if ret < 0 {
533             return Err(VfioError::GroupGetDeviceFD(get_error()));
534         }
535 
536         // Safe as ret is valid FD
537         Ok(unsafe { File::from_raw_descriptor(ret) })
538     }
539 
add_device_num(&mut self)540     fn add_device_num(&mut self) {
541         self.device_num += 1;
542     }
543 
reduce_device_num(&mut self)544     fn reduce_device_num(&mut self) {
545         self.device_num -= 1;
546     }
547 
device_num(&self) -> u32548     fn device_num(&self) -> u32 {
549         self.device_num
550     }
551 }
552 
553 impl AsRawDescriptor for VfioGroup {
as_raw_descriptor(&self) -> RawDescriptor554     fn as_raw_descriptor(&self) -> RawDescriptor {
555         self.group.as_raw_descriptor()
556     }
557 }
558 
559 /// A helper trait for managing VFIO setup
560 pub trait VfioCommonTrait: Send + Sync {
561     /// The single place to create a VFIO container for a PCI endpoint.
562     ///
563     /// The policy to determine whether an individual or a shared VFIO container
564     /// will be created for this device is governed by the physical PCI topology,
565     /// and the argument iommu_enabled.
566     ///
567     ///  # Arguments
568     ///
569     ///  * `sysfspath` - the path to the PCI device, e.g. /sys/bus/pci/devices/0000:02:00.0
570     ///  * `iommu_enabled` - whether virtio IOMMU is enabled on this device
vfio_get_container<P: AsRef<Path>>( iommu_dev: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>571     fn vfio_get_container<P: AsRef<Path>>(
572         iommu_dev: IommuDevType,
573         sysfspath: Option<P>,
574     ) -> Result<Arc<Mutex<VfioContainer>>>;
575 }
576 
577 thread_local! {
578 
579     // One VFIO container is shared by all VFIO devices that don't
580     // attach to the virtio IOMMU device
581     static NO_IOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
582 
583     // For IOMMU enabled devices, all VFIO groups that share the same IOVA space
584     // are managed by one VFIO container
585     static IOMMU_CONTAINERS: RefCell<Option<Vec<Arc<Mutex<VfioContainer>>>>> = RefCell::new(Some(Default::default()));
586 
587     // One VFIO container is shared by all VFIO devices that
588     // attach to the CoIOMMU device
589     static COIOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
590 }
591 
592 pub struct VfioCommonSetup;
593 
594 impl VfioCommonTrait for VfioCommonSetup {
vfio_get_container<P: AsRef<Path>>( iommu_dev: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>595     fn vfio_get_container<P: AsRef<Path>>(
596         iommu_dev: IommuDevType,
597         sysfspath: Option<P>,
598     ) -> Result<Arc<Mutex<VfioContainer>>> {
599         match iommu_dev {
600             IommuDevType::NoIommu => {
601                 // One VFIO container is used for all IOMMU disabled groups
602                 NO_IOMMU_CONTAINER.with(|v| {
603                     if v.borrow().is_some() {
604                         if let Some(ref container) = *v.borrow() {
605                             Ok(container.clone())
606                         } else {
607                             Err(VfioError::BorrowVfioContainer)
608                         }
609                     } else {
610                         let container = Arc::new(Mutex::new(VfioContainer::new()?));
611                         *v.borrow_mut() = Some(container.clone());
612                         Ok(container)
613                     }
614                 })
615             }
616             IommuDevType::VirtioIommu => {
617                 let path = sysfspath.ok_or(VfioError::InvalidPath)?;
618                 let group_id = VfioGroup::get_group_id(path)?;
619 
620                 // One VFIO container is used for all devices belong to one VFIO group
621                 IOMMU_CONTAINERS.with(|v| {
622                     if let Some(ref mut containers) = *v.borrow_mut() {
623                         let container = containers
624                             .iter()
625                             .find(|container| container.lock().is_group_set(group_id));
626 
627                         match container {
628                             None => {
629                                 let container = Arc::new(Mutex::new(VfioContainer::new()?));
630                                 containers.push(container.clone());
631                                 Ok(container)
632                             }
633                             Some(container) => Ok(container.clone()),
634                         }
635                     } else {
636                         Err(VfioError::BorrowVfioContainer)
637                     }
638                 })
639             }
640             IommuDevType::CoIommu => {
641                 // One VFIO container is used for devices attached to CoIommu
642                 COIOMMU_CONTAINER.with(|v| {
643                     if v.borrow().is_some() {
644                         if let Some(ref container) = *v.borrow() {
645                             Ok(container.clone())
646                         } else {
647                             Err(VfioError::BorrowVfioContainer)
648                         }
649                     } else {
650                         let container = Arc::new(Mutex::new(VfioContainer::new()?));
651                         *v.borrow_mut() = Some(container.clone());
652                         Ok(container)
653                     }
654                 })
655             }
656         }
657     }
658 }
659 
660 /// Vfio Irq type used to enable/disable/mask/unmask vfio irq
661 pub enum VfioIrqType {
662     Intx,
663     Msi,
664     Msix,
665 }
666 
667 /// Vfio Irq information used to assign and enable/disable/mask/unmask vfio irq
668 pub struct VfioIrq {
669     pub flags: u32,
670     pub index: u32,
671 }
672 
673 /// Address on VFIO memory region.
674 #[derive(Debug, Default, Clone)]
675 pub struct VfioRegionAddr {
676     /// region number.
677     pub index: u32,
678     /// offset in the region.
679     pub addr: u64,
680 }
681 
682 #[derive(Debug)]
683 pub struct VfioRegion {
684     // flags for this region: read/write/mmap
685     flags: u32,
686     size: u64,
687     // region offset used to read/write with vfio device descriptor
688     offset: u64,
689     // vectors for mmap offset and size
690     mmaps: Vec<vfio_region_sparse_mmap_area>,
691     // type and subtype for cap type
692     cap_info: Option<(u32, u32)>,
693 }
694 
695 /// Vfio device for exposing regions which could be read/write to kernel vfio device.
696 pub struct VfioDevice {
697     dev: File,
698     name: String,
699     container: Arc<Mutex<VfioContainer>>,
700     group_descriptor: RawDescriptor,
701     group_id: u32,
702     // vec for vfio device's regions
703     regions: Vec<VfioRegion>,
704 
705     iova_alloc: Option<Arc<Mutex<AddressAllocator>>>,
706 }
707 
708 impl VfioDevice {
709     /// Create a new vfio device, then guest read/write on this device could be
710     /// transfered into kernel vfio.
711     /// sysfspath specify the vfio device path in sys file system.
new_passthrough<P: AsRef<Path>>( sysfspath: &P, vm: &impl Vm, container: Arc<Mutex<VfioContainer>>, iommu_enabled: bool, ) -> Result<Self>712     pub fn new_passthrough<P: AsRef<Path>>(
713         sysfspath: &P,
714         vm: &impl Vm,
715         container: Arc<Mutex<VfioContainer>>,
716         iommu_enabled: bool,
717     ) -> Result<Self> {
718         let group_id = VfioGroup::get_group_id(&sysfspath)?;
719 
720         let group = container
721             .lock()
722             .get_group_with_vm(group_id, vm, iommu_enabled)?;
723         let name_osstr = sysfspath
724             .as_ref()
725             .file_name()
726             .ok_or(VfioError::InvalidPath)?;
727         let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
728         let name = String::from(name_str);
729         let dev = group.lock().get_device(&name)?;
730         let regions = Self::get_regions(&dev)?;
731         group.lock().add_device_num();
732         let group_descriptor = group.lock().as_raw_descriptor();
733 
734         Ok(VfioDevice {
735             dev,
736             name,
737             container,
738             group_descriptor,
739             group_id,
740             regions,
741             iova_alloc: None,
742         })
743     }
744 
new<P: AsRef<Path>>( sysfspath: &P, container: Arc<Mutex<VfioContainer>>, ) -> Result<Self>745     pub fn new<P: AsRef<Path>>(
746         sysfspath: &P,
747         container: Arc<Mutex<VfioContainer>>,
748     ) -> Result<Self> {
749         let group_id = VfioGroup::get_group_id(&sysfspath)?;
750         let group = container.lock().get_group(group_id)?;
751         let name_osstr = sysfspath
752             .as_ref()
753             .file_name()
754             .ok_or(VfioError::InvalidPath)?;
755         let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
756         let name = String::from(name_str);
757 
758         let dev = match group.lock().get_device(&name) {
759             Ok(dev) => dev,
760             Err(e) => {
761                 container.lock().remove_group(group_id, false);
762                 return Err(e);
763             }
764         };
765         let regions = match Self::get_regions(&dev) {
766             Ok(regions) => regions,
767             Err(e) => {
768                 container.lock().remove_group(group_id, false);
769                 return Err(e);
770             }
771         };
772         group.lock().add_device_num();
773         let group_descriptor = group.lock().as_raw_descriptor();
774 
775         let iova_ranges = container
776             .lock()
777             .vfio_iommu_iova_get_iova_ranges()?
778             .into_iter()
779             .map(|r| std::ops::RangeInclusive::new(r.start, r.end));
780         let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
781             .map_err(VfioError::Resources)?;
782 
783         Ok(VfioDevice {
784             dev,
785             name,
786             container,
787             group_descriptor,
788             group_id,
789             regions,
790             iova_alloc: Some(Arc::new(Mutex::new(iova_alloc))),
791         })
792     }
793 
794     /// Returns the file for this device.
dev_file(&self) -> &File795     pub fn dev_file(&self) -> &File {
796         &self.dev
797     }
798 
799     /// Returns PCI device name, formatted as BUS:DEVICE.FUNCTION string.
device_name(&self) -> &String800     pub fn device_name(&self) -> &String {
801         &self.name
802     }
803 
804     /// Enable vfio device's irq and associate Irqfd Event with device.
805     /// When MSIx is enabled, multi vectors will be supported, and vectors starting from subindex to subindex +
806     /// descriptors length will be assigned with irqfd in the descriptors array.
807     /// when index = VFIO_PCI_REQ_IRQ_INDEX, kernel vfio will trigger this event when physical device
808     /// is removed.
809     /// If descriptor is None, -1 is assigned to the irq. A value of -1 is used to either de-assign
810     /// interrupts if already assigned or skip un-assigned interrupts.
irq_enable( &self, descriptors: &[Option<&Event>], index: u32, subindex: u32, ) -> Result<()>811     pub fn irq_enable(
812         &self,
813         descriptors: &[Option<&Event>],
814         index: u32,
815         subindex: u32,
816     ) -> Result<()> {
817         let count = descriptors.len();
818         let u32_size = mem::size_of::<u32>();
819         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
820         irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
821         irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
822         irq_set[0].index = index;
823         irq_set[0].start = subindex;
824         irq_set[0].count = count as u32;
825 
826         // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data
827         // is u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
828         // together as u32. It is safe as enough space is reserved through
829         // vec_with_array_field(u32)<count>.
830         let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
831         for descriptor in descriptors.iter().take(count) {
832             let (left, right) = data.split_at_mut(u32_size);
833             match descriptor {
834                 Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
835                 None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
836             }
837             data = right;
838         }
839 
840         // Safe as we are the owner of self and irq_set which are valid value
841         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
842         if ret < 0 {
843             Err(VfioError::VfioIrqEnable(get_error()))
844         } else {
845             Ok(())
846         }
847     }
848 
849     /// When intx is enabled, irqfd is used to trigger a level interrupt into guest, resample irqfd
850     /// is used to get guest EOI notification.
851     /// When host hw generates interrupt, vfio irq handler in host kernel receive and handle it,
852     /// this handler disable hw irq first, then trigger irqfd to inject interrupt into guest. When
853     /// resample irqfd is triggered by guest EOI, vfio kernel could enable hw irq, so hw could
854     /// generate another interrupts.
855     /// This function enable resample irqfd and let vfio kernel could get EOI notification.
856     ///
857     /// descriptor: should be resample IrqFd.
resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()>858     pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
859         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
860         irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
861         irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
862         irq_set[0].index = index;
863         irq_set[0].start = 0;
864         irq_set[0].count = 1;
865 
866         {
867             // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data is
868             // u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
869             // together as u32. It is safe as enough space is reserved through
870             // vec_with_array_field(u32)<1>.
871             let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
872             descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
873         }
874 
875         // Safe as we are the owner of self and irq_set which are valid value
876         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
877         if ret < 0 {
878             Err(VfioError::VfioIrqEnable(get_error()))
879         } else {
880             Ok(())
881         }
882     }
883 
884     /// disable vfio device's irq and disconnect Irqfd Event with device
irq_disable(&self, index: u32) -> Result<()>885     pub fn irq_disable(&self, index: u32) -> Result<()> {
886         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
887         irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
888         irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
889         irq_set[0].index = index;
890         irq_set[0].start = 0;
891         irq_set[0].count = 0;
892 
893         // Safe as we are the owner of self and irq_set which are valid value
894         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
895         if ret < 0 {
896             Err(VfioError::VfioIrqDisable(get_error()))
897         } else {
898             Ok(())
899         }
900     }
901 
902     /// Unmask vfio device irq
irq_unmask(&self, index: u32) -> Result<()>903     pub fn irq_unmask(&self, index: u32) -> Result<()> {
904         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
905         irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
906         irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
907         irq_set[0].index = index;
908         irq_set[0].start = 0;
909         irq_set[0].count = 1;
910 
911         // Safe as we are the owner of self and irq_set which are valid value
912         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
913         if ret < 0 {
914             Err(VfioError::VfioIrqUnmask(get_error()))
915         } else {
916             Ok(())
917         }
918     }
919 
920     /// Mask vfio device irq
irq_mask(&self, index: u32) -> Result<()>921     pub fn irq_mask(&self, index: u32) -> Result<()> {
922         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
923         irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
924         irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
925         irq_set[0].index = index;
926         irq_set[0].start = 0;
927         irq_set[0].count = 1;
928 
929         // Safe as we are the owner of self and irq_set which are valid value
930         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
931         if ret < 0 {
932             Err(VfioError::VfioIrqMask(get_error()))
933         } else {
934             Ok(())
935         }
936     }
937 
validate_dev_info(dev_info: &mut vfio_device_info) -> Result<()>938     fn validate_dev_info(dev_info: &mut vfio_device_info) -> Result<()> {
939         if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
940             if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
941                 || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
942             {
943                 return Err(VfioError::VfioDeviceGetInfo(get_error()));
944             }
945             return Ok(());
946         } else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
947             return Ok(());
948         }
949 
950         Err(VfioError::VfioDeviceGetInfo(get_error()))
951     }
952 
953     /// Get and validate VFIO device information.
check_device_info(&self) -> Result<vfio_device_info>954     pub fn check_device_info(&self) -> Result<vfio_device_info> {
955         let mut dev_info = vfio_device_info {
956             argsz: mem::size_of::<vfio_device_info>() as u32,
957             flags: 0,
958             num_regions: 0,
959             num_irqs: 0,
960             ..Default::default()
961         };
962 
963         // Safe as we are the owner of device_file and dev_info which are valid value,
964         // and we verify the return value.
965         let ret = unsafe {
966             ioctl_with_mut_ref(self.device_file(), VFIO_DEVICE_GET_INFO(), &mut dev_info)
967         };
968         if ret < 0 {
969             return Err(VfioError::VfioDeviceGetInfo(get_error()));
970         }
971 
972         Self::validate_dev_info(&mut dev_info)?;
973         Ok(dev_info)
974     }
975 
976     /// Query interrupt information
977     /// return: Vector of interrupts information, each of which contains flags and index
get_irqs(&self) -> Result<Vec<VfioIrq>>978     pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
979         let dev_info = self.check_device_info()?;
980         let mut irqs: Vec<VfioIrq> = Vec::new();
981 
982         for i in 0..dev_info.num_irqs {
983             let argsz = mem::size_of::<vfio_irq_info>() as u32;
984             let mut irq_info = vfio_irq_info {
985                 argsz,
986                 flags: 0,
987                 index: i,
988                 count: 0,
989             };
990             // Safe as we are the owner of dev and dev_info which are valid value,
991             // and we verify the return value.
992             let ret = unsafe {
993                 ioctl_with_mut_ref(
994                     self.device_file(),
995                     VFIO_DEVICE_GET_IRQ_INFO(),
996                     &mut irq_info,
997                 )
998             };
999             if ret < 0 || irq_info.count != 1 {
1000                 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1001             }
1002 
1003             let irq = VfioIrq {
1004                 flags: irq_info.flags,
1005                 index: irq_info.index,
1006             };
1007             irqs.push(irq);
1008         }
1009         Ok(irqs)
1010     }
1011 
1012     #[allow(clippy::cast_ptr_alignment)]
get_regions(dev: &File) -> Result<Vec<VfioRegion>>1013     fn get_regions(dev: &File) -> Result<Vec<VfioRegion>> {
1014         let mut regions: Vec<VfioRegion> = Vec::new();
1015         let mut dev_info = vfio_device_info {
1016             argsz: mem::size_of::<vfio_device_info>() as u32,
1017             flags: 0,
1018             num_regions: 0,
1019             num_irqs: 0,
1020             ..Default::default()
1021         };
1022         // Safe as we are the owner of dev and dev_info which are valid value,
1023         // and we verify the return value.
1024         let mut ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_INFO(), &mut dev_info) };
1025         if ret < 0 {
1026             return Err(VfioError::VfioDeviceGetInfo(get_error()));
1027         }
1028 
1029         Self::validate_dev_info(&mut dev_info)?;
1030         for i in 0..dev_info.num_regions {
1031             let argsz = mem::size_of::<vfio_region_info>() as u32;
1032             let mut reg_info = vfio_region_info {
1033                 argsz,
1034                 flags: 0,
1035                 index: i,
1036                 cap_offset: 0,
1037                 size: 0,
1038                 offset: 0,
1039             };
1040             // Safe as we are the owner of dev and reg_info which are valid value,
1041             // and we verify the return value.
1042             ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO(), &mut reg_info) };
1043             if ret < 0 {
1044                 continue;
1045             }
1046 
1047             let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1048             let mut cap_info: Option<(u32, u32)> = None;
1049             if reg_info.argsz > argsz {
1050                 let cap_len: usize = (reg_info.argsz - argsz) as usize;
1051                 let mut region_with_cap =
1052                     vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1053                 region_with_cap[0].region_info.argsz = reg_info.argsz;
1054                 region_with_cap[0].region_info.flags = 0;
1055                 region_with_cap[0].region_info.index = i;
1056                 region_with_cap[0].region_info.cap_offset = 0;
1057                 region_with_cap[0].region_info.size = 0;
1058                 region_with_cap[0].region_info.offset = 0;
1059                 // Safe as we are the owner of dev and region_info which are valid value,
1060                 // and we verify the return value.
1061                 ret = unsafe {
1062                     ioctl_with_mut_ref(
1063                         dev,
1064                         VFIO_DEVICE_GET_REGION_INFO(),
1065                         &mut (region_with_cap[0].region_info),
1066                     )
1067                 };
1068                 if ret < 0 {
1069                     return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1070                 }
1071 
1072                 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1073                     continue;
1074                 }
1075 
1076                 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1077                 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1078                 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1079                 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1080                 let region_info_sz = reg_info.argsz;
1081 
1082                 // region_with_cap[0].cap_info may contain many structures, like
1083                 // vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct.
1084                 // Both of them begin with vfio_info_cap_header, so we will get individual cap from
1085                 // vfio_into_cap_header.
1086                 // Go through all the cap structs.
1087                 let info_ptr = region_with_cap.as_ptr() as *mut u8;
1088                 let mut offset = region_with_cap[0].region_info.cap_offset;
1089                 while offset != 0 {
1090                     if offset + cap_header_sz > region_info_sz {
1091                         break;
1092                     }
1093                     // Safe, as cap_header struct is in this function allocated region_with_cap
1094                     // vec.
1095                     let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1096                     let cap_header =
1097                         unsafe { &*(cap_ptr as *mut u8 as *const vfio_info_cap_header) };
1098                     if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1099                         if offset + mmap_cap_sz > region_info_sz {
1100                             break;
1101                         }
1102                         // cap_ptr is vfio_region_info_cap_sparse_mmap here
1103                         // Safe, this vfio_region_info_cap_sparse_mmap is in this function allocated
1104                         // region_with_cap vec.
1105                         let sparse_mmap = unsafe {
1106                             &*(cap_ptr as *mut u8 as *const vfio_region_info_cap_sparse_mmap)
1107                         };
1108 
1109                         let area_num = sparse_mmap.nr_areas;
1110                         if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1111                             break;
1112                         }
1113                         // Safe, these vfio_region_sparse_mmap_area are in this function allocated
1114                         // region_with_cap vec.
1115                         let areas =
1116                             unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1117                         for area in areas.iter() {
1118                             mmaps.push(*area);
1119                         }
1120                     } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1121                         if offset + type_cap_sz > region_info_sz {
1122                             break;
1123                         }
1124                         // cap_ptr is vfio_region_info_cap_type here
1125                         // Safe, this vfio_region_info_cap_type is in this function allocated
1126                         // region_with_cap vec
1127                         let cap_type_info =
1128                             unsafe { &*(cap_ptr as *mut u8 as *const vfio_region_info_cap_type) };
1129 
1130                         cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1131                     } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1132                         mmaps.push(vfio_region_sparse_mmap_area {
1133                             offset: region_with_cap[0].region_info.offset,
1134                             size: region_with_cap[0].region_info.size,
1135                         });
1136                     }
1137 
1138                     offset = cap_header.next;
1139                 }
1140             } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1141                 mmaps.push(vfio_region_sparse_mmap_area {
1142                     offset: 0,
1143                     size: reg_info.size,
1144                 });
1145             }
1146 
1147             let region = VfioRegion {
1148                 flags: reg_info.flags,
1149                 size: reg_info.size,
1150                 offset: reg_info.offset,
1151                 mmaps,
1152                 cap_info,
1153             };
1154             regions.push(region);
1155         }
1156 
1157         Ok(regions)
1158     }
1159 
1160     /// get a region's flag
1161     /// the return's value may conatin:
1162     ///     VFIO_REGION_INFO_FLAG_READ:  region supports read
1163     ///     VFIO_REGION_INFO_FLAG_WRITE: region supports write
1164     ///     VFIO_REGION_INFO_FLAG_MMAP:  region supports mmap
1165     ///     VFIO_REGION_INFO_FLAG_CAPS:  region's info supports caps
get_region_flags(&self, index: u32) -> u321166     pub fn get_region_flags(&self, index: u32) -> u32 {
1167         match self.regions.get(index as usize) {
1168             Some(v) => v.flags,
1169             None => {
1170                 warn!("get_region_flags() with invalid index: {}", index);
1171                 0
1172             }
1173         }
1174     }
1175 
1176     /// get a region's offset
1177     /// return: Region offset from the start of vfio device descriptor
get_region_offset(&self, index: u32) -> u641178     pub fn get_region_offset(&self, index: u32) -> u64 {
1179         match self.regions.get(index as usize) {
1180             Some(v) => v.offset,
1181             None => {
1182                 warn!("get_region_offset with invalid index: {}", index);
1183                 0
1184             }
1185         }
1186     }
1187 
1188     /// get a region's size
1189     /// return: Region size from the start of vfio device descriptor
get_region_size(&self, index: u32) -> u641190     pub fn get_region_size(&self, index: u32) -> u64 {
1191         match self.regions.get(index as usize) {
1192             Some(v) => v.size,
1193             None => {
1194                 warn!("get_region_size with invalid index: {}", index);
1195                 0
1196             }
1197         }
1198     }
1199 
1200     /// get a number of regions
1201     /// return: Number of regions of vfio device descriptor
get_region_count(&self) -> u321202     pub fn get_region_count(&self) -> u32 {
1203         self.regions.len() as u32
1204     }
1205 
1206     /// get a region's mmap info vector
get_region_mmap(&self, index: u32) -> Vec<vfio_region_sparse_mmap_area>1207     pub fn get_region_mmap(&self, index: u32) -> Vec<vfio_region_sparse_mmap_area> {
1208         match self.regions.get(index as usize) {
1209             Some(v) => v.mmaps.clone(),
1210             None => {
1211                 warn!("get_region_mmap with invalid index: {}", index);
1212                 Vec::new()
1213             }
1214         }
1215     }
1216 
1217     /// find the specified cap type in device regions
1218     /// Input:
1219     ///      type_:  cap type
1220     ///      sub_type: cap sub_type
1221     /// Output:
1222     ///     None: device doesn't have the specified cap type
1223     ///     Some((bar_index, region_size)): device has the specified cap type, return region's
1224     ///                                     index and size
get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)>1225     pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1226         for (index, region) in self.regions.iter().enumerate() {
1227             if let Some(cap_info) = &region.cap_info {
1228                 if cap_info.0 == type_ && cap_info.1 == sub_type {
1229                     return Some((index as u32, region.size));
1230                 }
1231             }
1232         }
1233 
1234         None
1235     }
1236 
1237     /// Returns file offset corresponding to the given `VfioRegionAddr`.
1238     /// The offset can be used when reading/writing the VFIO device's FD directly.
get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64>1239     pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1240         let region = self
1241             .regions
1242             .get(addr.index as usize)
1243             .ok_or(VfioError::InvalidIndex(addr.index))?;
1244         Ok(region.offset + addr.addr)
1245     }
1246 
1247     /// Read region's data from VFIO device into buf
1248     /// index: region num
1249     /// buf: data destination and buf length is read size
1250     /// addr: offset in the region
region_read(&self, index: u32, buf: &mut [u8], addr: u64)1251     pub fn region_read(&self, index: u32, buf: &mut [u8], addr: u64) {
1252         let stub: &VfioRegion = self
1253             .regions
1254             .get(index as usize)
1255             .unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {}", index));
1256 
1257         let size = buf.len() as u64;
1258         if size > stub.size || addr + size > stub.size {
1259             panic!(
1260                 "tried to read VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1261                 index, addr, size
1262             );
1263         }
1264 
1265         self.dev
1266             .read_exact_at(buf, stub.offset + addr)
1267             .unwrap_or_else(|e| {
1268                 panic!(
1269                     "failed to read region: index={}, addr=0x{:x}, error={}",
1270                     index, addr, e
1271                 )
1272             });
1273     }
1274 
1275     /// Reads a value from the specified `VfioRegionAddr.addr` + `offset`.
region_read_from_addr<T: DataInit>(&self, addr: &VfioRegionAddr, offset: u64) -> T1276     pub fn region_read_from_addr<T: DataInit>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1277         let mut val = mem::MaybeUninit::zeroed();
1278         // Safe because we have zero-initialized `size_of::<T>()` bytes.
1279         let buf =
1280             unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1281         self.region_read(addr.index, buf, addr.addr + offset);
1282         // Safe because any bit pattern is valid for a type that implements
1283         // DataInit.
1284         unsafe { val.assume_init() }
1285     }
1286 
1287     /// write the data from buf into a vfio device region
1288     /// index: region num
1289     /// buf: data src and buf length is write size
1290     /// addr: offset in the region
region_write(&self, index: u32, buf: &[u8], addr: u64)1291     pub fn region_write(&self, index: u32, buf: &[u8], addr: u64) {
1292         let stub: &VfioRegion = self
1293             .regions
1294             .get(index as usize)
1295             .unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {}", index));
1296 
1297         let size = buf.len() as u64;
1298         if size > stub.size
1299             || addr + size > stub.size
1300             || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1301         {
1302             panic!(
1303                 "tried to write VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1304                 index, addr, size
1305             );
1306         }
1307 
1308         self.dev
1309             .write_all_at(buf, stub.offset + addr)
1310             .unwrap_or_else(|e| {
1311                 panic!(
1312                     "failed to write region: index={}, addr=0x{:x}, error={}",
1313                     index, addr, e
1314                 )
1315             });
1316     }
1317 
1318     /// Writes data into the specified `VfioRegionAddr.addr` + `offset`.
region_write_to_addr<T: DataInit>(&self, val: &T, addr: &VfioRegionAddr, offset: u64)1319     pub fn region_write_to_addr<T: DataInit>(&self, val: &T, addr: &VfioRegionAddr, offset: u64) {
1320         self.region_write(addr.index, val.as_slice(), addr.addr + offset);
1321     }
1322 
1323     /// get vfio device's descriptors which are passed into minijail process
keep_rds(&self) -> Vec<RawDescriptor>1324     pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1325         vec![
1326             self.dev.as_raw_descriptor(),
1327             self.group_descriptor,
1328             self.container.lock().as_raw_descriptor(),
1329         ]
1330     }
1331 
1332     /// Add (iova, user_addr) map into vfio container iommu table
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>1333     pub unsafe fn vfio_dma_map(
1334         &self,
1335         iova: u64,
1336         size: u64,
1337         user_addr: u64,
1338         write_en: bool,
1339     ) -> Result<()> {
1340         self.container
1341             .lock()
1342             .vfio_dma_map(iova, size, user_addr, write_en)
1343     }
1344 
1345     /// Remove (iova, user_addr) map from vfio container iommu table
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>1346     pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1347         self.container.lock().vfio_dma_unmap(iova, size)
1348     }
1349 
vfio_get_iommu_page_size_mask(&self) -> Result<u64>1350     pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1351         self.container.lock().vfio_get_iommu_page_size_mask()
1352     }
1353 
alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64>1354     pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1355         match &self.iova_alloc {
1356             None => Err(VfioError::NoRescAlloc),
1357             Some(iova_alloc) => iova_alloc
1358                 .lock()
1359                 .allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1360                 .map_err(VfioError::Resources),
1361         }
1362     }
1363 
1364     /// Gets the vfio device backing `File`.
device_file(&self) -> &File1365     pub fn device_file(&self) -> &File {
1366         &self.dev
1367     }
1368 
1369     /// close vfio device
close(&self)1370     pub fn close(&self) {
1371         self.container.lock().remove_group(self.group_id, true);
1372     }
1373 }
1374 
1375 pub struct VfioPciConfig {
1376     device: Arc<VfioDevice>,
1377 }
1378 
1379 impl VfioPciConfig {
new(device: Arc<VfioDevice>) -> Self1380     pub fn new(device: Arc<VfioDevice>) -> Self {
1381         VfioPciConfig { device }
1382     }
1383 
read_config<T: DataInit>(&self, offset: u32) -> T1384     pub fn read_config<T: DataInit>(&self, offset: u32) -> T {
1385         let mut buf = vec![0u8; std::mem::size_of::<T>()];
1386         self.device
1387             .region_read(VFIO_PCI_CONFIG_REGION_INDEX, &mut buf, offset.into());
1388         T::from_slice(&buf)
1389             .copied()
1390             .expect("failed to convert config data from slice")
1391     }
1392 
write_config<T: DataInit>(&self, config: T, offset: u32)1393     pub fn write_config<T: DataInit>(&self, config: T, offset: u32) {
1394         self.device.region_write(
1395             VFIO_PCI_CONFIG_REGION_INDEX,
1396             config.as_slice(),
1397             offset.into(),
1398         );
1399     }
1400 
1401     /// Set the VFIO device this config refers to as the bus master.
set_bus_master(&self)1402     pub fn set_bus_master(&self) {
1403         /// Constant definitions from `linux/pci_regs.h`.
1404         const PCI_COMMAND: u32 = 0x4;
1405         /// Enable bus mastering
1406         const PCI_COMMAND_MASTER: u16 = 0x4;
1407 
1408         let mut cmd: u16 = self.read_config(PCI_COMMAND);
1409 
1410         if cmd & PCI_COMMAND_MASTER != 0 {
1411             return;
1412         }
1413 
1414         cmd |= PCI_COMMAND_MASTER;
1415 
1416         self.write_config(cmd, PCI_COMMAND);
1417     }
1418 }
1419 
1420 impl AsRawDescriptor for VfioDevice {
as_raw_descriptor(&self) -> RawDescriptor1421     fn as_raw_descriptor(&self) -> RawDescriptor {
1422         self.dev.as_raw_descriptor()
1423     }
1424 }
1425