• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use data_model::vec_with_array_field;
6 use std::collections::HashMap;
7 use std::ffi::CString;
8 use std::fmt;
9 use std::fs::{File, OpenOptions};
10 use std::io;
11 use std::mem;
12 use std::os::unix::prelude::FileExt;
13 use std::path::{Path, PathBuf};
14 use std::sync::Arc;
15 use std::u32;
16 use sync::Mutex;
17 
18 use base::{
19     ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, warn,
20     AsRawDescriptor, Error, Event, FromRawDescriptor, RawDescriptor, SafeDescriptor,
21 };
22 use hypervisor::{DeviceKind, Vm};
23 use vm_memory::GuestMemory;
24 
25 use vfio_sys::*;
26 
27 #[derive(Debug)]
28 pub enum VfioError {
29     OpenContainer(io::Error),
30     OpenGroup(io::Error),
31     GetGroupStatus(Error),
32     GroupViable,
33     VfioApiVersion,
34     VfioType1V2,
35     GroupSetContainer(Error),
36     ContainerSetIOMMU(Error),
37     GroupGetDeviceFD(Error),
38     CreateVfioKvmDevice(Error),
39     KvmSetDeviceAttr(Error),
40     VfioDeviceGetInfo(Error),
41     VfioDeviceGetRegionInfo(Error),
42     InvalidPath,
43     IommuDmaMap(Error),
44     IommuDmaUnmap(Error),
45     VfioIrqEnable(Error),
46     VfioIrqDisable(Error),
47     VfioIrqUnmask(Error),
48     VfioIrqMask(Error),
49 }
50 
51 impl fmt::Display for VfioError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result52     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53         match self {
54             VfioError::OpenContainer(e) => write!(f, "failed to open /dev/vfio/vfio container: {}", e),
55             VfioError::OpenGroup(e) => write!(f, "failed to open /dev/vfio/$group_num group: {}", e),
56             VfioError::GetGroupStatus(e) => write!(f, "failed to get Group Status: {}", e),
57             VfioError::GroupViable => write!(f, "group is inviable"),
58             VfioError::VfioApiVersion => write!(f, "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/srv/vfio.rs"),
59             VfioError::VfioType1V2 => write!(f, "container dones't support VfioType1V2 IOMMU driver type"),
60             VfioError::GroupSetContainer(e) => write!(f, "failed to add vfio group into vfio container: {}", e),
61             VfioError::ContainerSetIOMMU(e) => write!(f, "failed to set container's IOMMU driver type as VfioType1V2: {}", e),
62             VfioError::GroupGetDeviceFD(e) => write!(f, "failed to get vfio device fd: {}", e),
63             VfioError::CreateVfioKvmDevice(e) => write!(f, "failed to create KVM vfio device: {}", e),
64             VfioError::KvmSetDeviceAttr(e) => write!(f, "failed to set KVM vfio device's attribute: {}", e),
65             VfioError::VfioDeviceGetInfo(e) => write!(f, "failed to get vfio device's info or info doesn't match: {}", e),
66             VfioError::VfioDeviceGetRegionInfo(e) => write!(f, "failed to get vfio device's region info: {}", e),
67             VfioError::InvalidPath => write!(f,"invalid file path"),
68             VfioError::IommuDmaMap(e) => write!(f, "failed to add guest memory map into iommu table: {}", e),
69             VfioError::IommuDmaUnmap(e) => write!(f, "failed to remove guest memory map from iommu table: {}", e),
70             VfioError::VfioIrqEnable(e) => write!(f, "failed to enable vfio deviece's irq: {}", e),
71             VfioError::VfioIrqDisable(e) => write!(f, "failed to disable vfio deviece's irq: {}", e),
72             VfioError::VfioIrqUnmask(e) => write!(f, "failed to unmask vfio deviece's irq: {}", e),
73             VfioError::VfioIrqMask(e) => write!(f, "failed to mask vfio deviece's irq: {}", e),
74         }
75     }
76 }
77 
get_error() -> Error78 fn get_error() -> Error {
79     Error::last()
80 }
81 
82 /// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table
83 pub struct VfioContainer {
84     container: File,
85     kvm_vfio_dev: Option<SafeDescriptor>,
86     groups: HashMap<u32, Arc<VfioGroup>>,
87 }
88 
89 const VFIO_API_VERSION: u8 = 0;
90 impl VfioContainer {
91     /// Open VfioContainer
new() -> Result<Self, VfioError>92     pub fn new() -> Result<Self, VfioError> {
93         let container = OpenOptions::new()
94             .read(true)
95             .write(true)
96             .open("/dev/vfio/vfio")
97             .map_err(VfioError::OpenContainer)?;
98 
99         // Safe as file is vfio container descriptor and ioctl is defined by kernel.
100         let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION()) };
101         if version as u8 != VFIO_API_VERSION {
102             return Err(VfioError::VfioApiVersion);
103         }
104 
105         Ok(VfioContainer {
106             container,
107             kvm_vfio_dev: None,
108             groups: HashMap::new(),
109         })
110     }
111 
check_extension(&self, val: u32) -> bool112     fn check_extension(&self, val: u32) -> bool {
113         if val != VFIO_TYPE1_IOMMU && val != VFIO_TYPE1v2_IOMMU {
114             panic!("IOMMU type error");
115         }
116 
117         // Safe as file is vfio container and make sure val is valid.
118         let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION(), val.into()) };
119         ret == 1
120     }
121 
set_iommu(&self, val: u32) -> i32122     fn set_iommu(&self, val: u32) -> i32 {
123         if val != VFIO_TYPE1_IOMMU && val != VFIO_TYPE1v2_IOMMU {
124             panic!("IOMMU type error");
125         }
126 
127         // Safe as file is vfio container and make sure val is valid.
128         unsafe { ioctl_with_val(self, VFIO_SET_IOMMU(), val.into()) }
129     }
130 
vfio_dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<(), VfioError>131     unsafe fn vfio_dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<(), VfioError> {
132         let dma_map = vfio_iommu_type1_dma_map {
133             argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
134             flags: VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
135             vaddr: user_addr,
136             iova,
137             size,
138         };
139 
140         let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA(), &dma_map);
141         if ret != 0 {
142             return Err(VfioError::IommuDmaMap(get_error()));
143         }
144 
145         Ok(())
146     }
147 
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioError>148     fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioError> {
149         let mut dma_unmap = vfio_iommu_type1_dma_unmap {
150             argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
151             flags: 0,
152             iova,
153             size,
154         };
155 
156         // Safe as file is vfio container, dma_unmap is constructed by us, and
157         // we check the return value
158         let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA(), &mut dma_unmap) };
159         if ret != 0 || dma_unmap.size != size {
160             return Err(VfioError::IommuDmaUnmap(get_error()));
161         }
162 
163         Ok(())
164     }
165 
init(&mut self, vm: &impl Vm, guest_mem: &GuestMemory) -> Result<(), VfioError>166     fn init(&mut self, vm: &impl Vm, guest_mem: &GuestMemory) -> Result<(), VfioError> {
167         if !self.check_extension(VFIO_TYPE1v2_IOMMU) {
168             return Err(VfioError::VfioType1V2);
169         }
170 
171         if self.set_iommu(VFIO_TYPE1v2_IOMMU) < 0 {
172             return Err(VfioError::ContainerSetIOMMU(get_error()));
173         }
174 
175         // Add all guest memory regions into vfio container's iommu table,
176         // then vfio kernel driver could access guest memory from gfn
177         guest_mem.with_regions(|_index, guest_addr, size, host_addr, _mmap, _fd_offset| {
178             // Safe because the guest regions are guaranteed not to overlap
179             unsafe { self.vfio_dma_map(guest_addr.0, size as u64, host_addr as u64) }
180         })?;
181 
182         let vfio_descriptor = vm
183             .create_device(DeviceKind::Vfio)
184             .map_err(VfioError::CreateVfioKvmDevice)?;
185         self.kvm_vfio_dev = Some(vfio_descriptor);
186 
187         Ok(())
188     }
189 
get_group( &mut self, id: u32, vm: &impl Vm, guest_mem: &GuestMemory, ) -> Result<Arc<VfioGroup>, VfioError>190     fn get_group(
191         &mut self,
192         id: u32,
193         vm: &impl Vm,
194         guest_mem: &GuestMemory,
195     ) -> Result<Arc<VfioGroup>, VfioError> {
196         match self.groups.get(&id) {
197             Some(group) => Ok(group.clone()),
198             None => {
199                 let group = Arc::new(VfioGroup::new(self, id)?);
200 
201                 if self.groups.is_empty() {
202                     // Before the first group is added into container, do once cotainer
203                     // initialize for a vm
204                     self.init(vm, guest_mem)?;
205                 }
206 
207                 let kvm_vfio_file = self
208                     .kvm_vfio_dev
209                     .as_ref()
210                     .expect("kvm vfio device should exist");
211                 group.kvm_device_add_group(kvm_vfio_file)?;
212 
213                 self.groups.insert(id, group.clone());
214 
215                 Ok(group)
216             }
217         }
218     }
219 }
220 
221 impl AsRawDescriptor for VfioContainer {
as_raw_descriptor(&self) -> RawDescriptor222     fn as_raw_descriptor(&self) -> RawDescriptor {
223         self.container.as_raw_descriptor()
224     }
225 }
226 
227 struct VfioGroup {
228     group: File,
229 }
230 
231 impl VfioGroup {
new(container: &VfioContainer, id: u32) -> Result<Self, VfioError>232     fn new(container: &VfioContainer, id: u32) -> Result<Self, VfioError> {
233         let mut group_path = String::from("/dev/vfio/");
234         let s_id = &id;
235         group_path.push_str(s_id.to_string().as_str());
236 
237         let group_file = OpenOptions::new()
238             .read(true)
239             .write(true)
240             .open(Path::new(&group_path))
241             .map_err(VfioError::OpenGroup)?;
242 
243         let mut group_status = vfio_group_status {
244             argsz: mem::size_of::<vfio_group_status>() as u32,
245             flags: 0,
246         };
247         // Safe as we are the owner of group_file and group_status which are valid value.
248         let mut ret =
249             unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS(), &mut group_status) };
250         if ret < 0 {
251             return Err(VfioError::GetGroupStatus(get_error()));
252         }
253 
254         if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
255             return Err(VfioError::GroupViable);
256         }
257 
258         // Safe as we are the owner of group_file and container_raw_descriptor which are valid value,
259         // and we verify the ret value
260         let container_raw_descriptor = container.as_raw_descriptor();
261         ret = unsafe {
262             ioctl_with_ref(
263                 &group_file,
264                 VFIO_GROUP_SET_CONTAINER(),
265                 &container_raw_descriptor,
266             )
267         };
268         if ret < 0 {
269             return Err(VfioError::GroupSetContainer(get_error()));
270         }
271 
272         Ok(VfioGroup { group: group_file })
273     }
274 
kvm_device_add_group(&self, kvm_vfio_file: &SafeDescriptor) -> Result<(), VfioError>275     fn kvm_device_add_group(&self, kvm_vfio_file: &SafeDescriptor) -> Result<(), VfioError> {
276         let group_descriptor = self.as_raw_descriptor();
277         let group_descriptor_ptr = &group_descriptor as *const i32;
278         let vfio_dev_attr = kvm_sys::kvm_device_attr {
279             flags: 0,
280             group: kvm_sys::KVM_DEV_VFIO_GROUP,
281             attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
282             addr: group_descriptor_ptr as u64,
283         };
284 
285         // Safe as we are the owner of vfio_dev_fd and vfio_dev_attr which are valid value,
286         // and we verify the return value.
287         if 0 != unsafe {
288             ioctl_with_ref(
289                 kvm_vfio_file,
290                 kvm_sys::KVM_SET_DEVICE_ATTR(),
291                 &vfio_dev_attr,
292             )
293         } {
294             return Err(VfioError::KvmSetDeviceAttr(get_error()));
295         }
296 
297         Ok(())
298     }
299 
get_device(&self, name: &str) -> Result<File, VfioError>300     fn get_device(&self, name: &str) -> Result<File, VfioError> {
301         let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
302         let path_ptr = path.as_ptr();
303 
304         // Safe as we are the owner of self and path_ptr which are valid value.
305         let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD(), path_ptr) };
306         if ret < 0 {
307             return Err(VfioError::GroupGetDeviceFD(get_error()));
308         }
309 
310         // Safe as ret is valid FD
311         Ok(unsafe { File::from_raw_descriptor(ret) })
312     }
313 }
314 
315 impl AsRawDescriptor for VfioGroup {
as_raw_descriptor(&self) -> RawDescriptor316     fn as_raw_descriptor(&self) -> RawDescriptor {
317         self.group.as_raw_descriptor()
318     }
319 }
320 
321 /// Vfio Irq type used to enable/disable/mask/unmask vfio irq
322 pub enum VfioIrqType {
323     Intx,
324     Msi,
325     Msix,
326 }
327 
328 struct VfioRegion {
329     // flags for this region: read/write/mmap
330     flags: u32,
331     size: u64,
332     // region offset used to read/write with vfio device descriptor
333     offset: u64,
334     // vectors for mmap offset and size
335     mmaps: Vec<vfio_region_sparse_mmap_area>,
336     // type and subtype for cap type
337     cap_info: Option<(u32, u32)>,
338 }
339 
340 /// Vfio device for exposing regions which could be read/write to kernel vfio device.
341 pub struct VfioDevice {
342     dev: File,
343     name: String,
344     container: Arc<Mutex<VfioContainer>>,
345     group_descriptor: RawDescriptor,
346     // vec for vfio device's regions
347     regions: Vec<VfioRegion>,
348 }
349 
350 impl VfioDevice {
351     /// Create a new vfio device, then guest read/write on this device could be
352     /// transfered into kernel vfio.
353     /// sysfspath specify the vfio device path in sys file system.
new( sysfspath: &Path, vm: &impl Vm, guest_mem: &GuestMemory, container: Arc<Mutex<VfioContainer>>, ) -> Result<Self, VfioError>354     pub fn new(
355         sysfspath: &Path,
356         vm: &impl Vm,
357         guest_mem: &GuestMemory,
358         container: Arc<Mutex<VfioContainer>>,
359     ) -> Result<Self, VfioError> {
360         let mut uuid_path = PathBuf::new();
361         uuid_path.push(sysfspath);
362         uuid_path.push("iommu_group");
363         let group_path = uuid_path.read_link().map_err(|_| VfioError::InvalidPath)?;
364         let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
365         let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
366         let group_id = group_str
367             .parse::<u32>()
368             .map_err(|_| VfioError::InvalidPath)?;
369 
370         let group = container.lock().get_group(group_id, vm, guest_mem)?;
371         let name_osstr = sysfspath.file_name().ok_or(VfioError::InvalidPath)?;
372         let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
373         let name = String::from(name_str);
374         let dev = group.get_device(&name)?;
375         let regions = Self::get_regions(&dev)?;
376 
377         Ok(VfioDevice {
378             dev,
379             name,
380             container,
381             group_descriptor: group.as_raw_descriptor(),
382             regions,
383         })
384     }
385 
386     /// Returns PCI device name, formatted as BUS:DEVICE.FUNCTION string.
device_name(&self) -> &String387     pub fn device_name(&self) -> &String {
388         &self.name
389     }
390 
391     /// Enable vfio device's irq and associate Irqfd Event with device.
392     /// When MSIx is enabled, multi vectors will be supported, so descriptors is vector and the vector
393     /// length is the num of MSIx vectors
irq_enable(&self, descriptors: Vec<&Event>, index: u32) -> Result<(), VfioError>394     pub fn irq_enable(&self, descriptors: Vec<&Event>, index: u32) -> Result<(), VfioError> {
395         let count = descriptors.len();
396         let u32_size = mem::size_of::<u32>();
397         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
398         irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
399         irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
400         irq_set[0].index = index;
401         irq_set[0].start = 0;
402         irq_set[0].count = count as u32;
403 
404         // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data
405         // is u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
406         // together as u32. It is safe as enough space is reserved through
407         // vec_with_array_field(u32)<count>.
408         let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
409         for descriptor in descriptors.iter().take(count) {
410             let (left, right) = data.split_at_mut(u32_size);
411             left.copy_from_slice(&descriptor.as_raw_descriptor().to_ne_bytes()[..]);
412             data = right;
413         }
414 
415         // Safe as we are the owner of self and irq_set which are valid value
416         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
417         if ret < 0 {
418             Err(VfioError::VfioIrqEnable(get_error()))
419         } else {
420             Ok(())
421         }
422     }
423 
424     /// When intx is enabled, irqfd is used to trigger a level interrupt into guest, resample irqfd
425     /// is used to get guest EOI notification.
426     /// When host hw generates interrupt, vfio irq handler in host kernel receive and handle it,
427     /// this handler disable hw irq first, then trigger irqfd to inject interrupt into guest. When
428     /// resample irqfd is triggered by guest EOI, vfio kernel could enable hw irq, so hw could
429     /// generate another interrupts.
430     /// This function enable resample irqfd and let vfio kernel could get EOI notification.
431     ///
432     /// descriptor: should be resample IrqFd.
resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<(), VfioError>433     pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<(), VfioError> {
434         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
435         irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
436         irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
437         irq_set[0].index = index;
438         irq_set[0].start = 0;
439         irq_set[0].count = 1;
440 
441         {
442             // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data is
443             // u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
444             // together as u32. It is safe as enough space is reserved through
445             // vec_with_array_field(u32)<1>.
446             let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
447             descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
448         }
449 
450         // Safe as we are the owner of self and irq_set which are valid value
451         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
452         if ret < 0 {
453             Err(VfioError::VfioIrqEnable(get_error()))
454         } else {
455             Ok(())
456         }
457     }
458 
459     /// disable vfio device's irq and disconnect Irqfd Event with device
irq_disable(&self, index: u32) -> Result<(), VfioError>460     pub fn irq_disable(&self, index: u32) -> Result<(), VfioError> {
461         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
462         irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
463         irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
464         irq_set[0].index = index;
465         irq_set[0].start = 0;
466         irq_set[0].count = 0;
467 
468         // Safe as we are the owner of self and irq_set which are valid value
469         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
470         if ret < 0 {
471             Err(VfioError::VfioIrqDisable(get_error()))
472         } else {
473             Ok(())
474         }
475     }
476 
477     /// Unmask vfio device irq
irq_unmask(&self, index: u32) -> Result<(), VfioError>478     pub fn irq_unmask(&self, index: u32) -> Result<(), VfioError> {
479         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
480         irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
481         irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
482         irq_set[0].index = index;
483         irq_set[0].start = 0;
484         irq_set[0].count = 1;
485 
486         // Safe as we are the owner of self and irq_set which are valid value
487         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
488         if ret < 0 {
489             Err(VfioError::VfioIrqUnmask(get_error()))
490         } else {
491             Ok(())
492         }
493     }
494 
495     /// Mask vfio device irq
irq_mask(&self, index: u32) -> Result<(), VfioError>496     pub fn irq_mask(&self, index: u32) -> Result<(), VfioError> {
497         let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
498         irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
499         irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
500         irq_set[0].index = index;
501         irq_set[0].start = 0;
502         irq_set[0].count = 1;
503 
504         // Safe as we are the owner of self and irq_set which are valid value
505         let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
506         if ret < 0 {
507             Err(VfioError::VfioIrqMask(get_error()))
508         } else {
509             Ok(())
510         }
511     }
512 
513     #[allow(clippy::cast_ptr_alignment)]
get_regions(dev: &File) -> Result<Vec<VfioRegion>, VfioError>514     fn get_regions(dev: &File) -> Result<Vec<VfioRegion>, VfioError> {
515         let mut regions: Vec<VfioRegion> = Vec::new();
516         let mut dev_info = vfio_device_info {
517             argsz: mem::size_of::<vfio_device_info>() as u32,
518             flags: 0,
519             num_regions: 0,
520             num_irqs: 0,
521         };
522         // Safe as we are the owner of dev and dev_info which are valid value,
523         // and we verify the return value.
524         let mut ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_INFO(), &mut dev_info) };
525         if ret < 0
526             || (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) == 0
527             || dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
528             || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
529         {
530             return Err(VfioError::VfioDeviceGetInfo(get_error()));
531         }
532 
533         for i in VFIO_PCI_BAR0_REGION_INDEX..dev_info.num_regions {
534             let argsz = mem::size_of::<vfio_region_info>() as u32;
535             let mut reg_info = vfio_region_info {
536                 argsz,
537                 flags: 0,
538                 index: i,
539                 cap_offset: 0,
540                 size: 0,
541                 offset: 0,
542             };
543             // Safe as we are the owner of dev and reg_info which are valid value,
544             // and we verify the return value.
545             ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO(), &mut reg_info) };
546             if ret < 0 {
547                 continue;
548             }
549 
550             let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
551             let mut cap_info: Option<(u32, u32)> = None;
552             if reg_info.argsz > argsz {
553                 let cap_len: usize = (reg_info.argsz - argsz) as usize;
554                 let mut region_with_cap =
555                     vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
556                 region_with_cap[0].region_info.argsz = reg_info.argsz;
557                 region_with_cap[0].region_info.flags = 0;
558                 region_with_cap[0].region_info.index = i;
559                 region_with_cap[0].region_info.cap_offset = 0;
560                 region_with_cap[0].region_info.size = 0;
561                 region_with_cap[0].region_info.offset = 0;
562                 // Safe as we are the owner of dev and region_info which are valid value,
563                 // and we verify the return value.
564                 ret = unsafe {
565                     ioctl_with_mut_ref(
566                         dev,
567                         VFIO_DEVICE_GET_REGION_INFO(),
568                         &mut (region_with_cap[0].region_info),
569                     )
570                 };
571                 if ret < 0 {
572                     return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
573                 }
574 
575                 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
576                     continue;
577                 }
578 
579                 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
580                 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
581                 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
582                 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
583                 let region_info_sz = reg_info.argsz;
584 
585                 // region_with_cap[0].cap_info may contain many structures, like
586                 // vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct.
587                 // Both of them begin with vfio_info_cap_header, so we will get individual cap from
588                 // vfio_into_cap_header.
589                 // Go through all the cap structs.
590                 let info_ptr = region_with_cap.as_ptr() as *mut u8;
591                 let mut offset = region_with_cap[0].region_info.cap_offset;
592                 while offset != 0 {
593                     if offset + cap_header_sz >= region_info_sz {
594                         break;
595                     }
596                     // Safe, as cap_header struct is in this function allocated region_with_cap
597                     // vec.
598                     let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
599                     let cap_header =
600                         unsafe { &*(cap_ptr as *mut u8 as *const vfio_info_cap_header) };
601                     if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
602                         if offset + mmap_cap_sz >= region_info_sz {
603                             break;
604                         }
605                         // cap_ptr is vfio_region_info_cap_sparse_mmap here
606                         // Safe, this vfio_region_info_cap_sparse_mmap is in this function allocated
607                         // region_with_cap vec.
608                         let sparse_mmap = unsafe {
609                             &*(cap_ptr as *mut u8 as *const vfio_region_info_cap_sparse_mmap)
610                         };
611 
612                         let area_num = sparse_mmap.nr_areas;
613                         if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
614                             break;
615                         }
616                         // Safe, these vfio_region_sparse_mmap_area are in this function allocated
617                         // region_with_cap vec.
618                         let areas =
619                             unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
620                         for area in areas.iter() {
621                             mmaps.push(*area);
622                         }
623                     } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
624                         if offset + type_cap_sz > region_info_sz {
625                             break;
626                         }
627                         // cap_ptr is vfio_region_info_cap_type here
628                         // Safe, this vfio_region_info_cap_type is in this function allocated
629                         // region_with_cap vec
630                         let cap_type_info =
631                             unsafe { &*(cap_ptr as *mut u8 as *const vfio_region_info_cap_type) };
632 
633                         cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
634                     }
635 
636                     offset = cap_header.next;
637                 }
638             } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
639                 mmaps.push(vfio_region_sparse_mmap_area {
640                     offset: 0,
641                     size: reg_info.size,
642                 });
643             }
644 
645             let region = VfioRegion {
646                 flags: reg_info.flags,
647                 size: reg_info.size,
648                 offset: reg_info.offset,
649                 mmaps,
650                 cap_info,
651             };
652             regions.push(region);
653         }
654 
655         Ok(regions)
656     }
657 
658     /// get a region's flag
659     /// the return's value may conatin:
660     ///     VFIO_REGION_INFO_FLAG_READ:  region supports read
661     ///     VFIO_REGION_INFO_FLAG_WRITE: region supports write
662     ///     VFIO_REGION_INFO_FLAG_MMAP:  region supports mmap
663     ///     VFIO_REGION_INFO_FLAG_CAPS:  region's info supports caps
get_region_flags(&self, index: u32) -> u32664     pub fn get_region_flags(&self, index: u32) -> u32 {
665         match self.regions.get(index as usize) {
666             Some(v) => v.flags,
667             None => {
668                 warn!("get_region_flags() with invalid index: {}", index);
669                 0
670             }
671         }
672     }
673 
674     /// get a region's offset
675     /// return: Region offset from the start of vfio device descriptor
get_region_offset(&self, index: u32) -> u64676     pub fn get_region_offset(&self, index: u32) -> u64 {
677         match self.regions.get(index as usize) {
678             Some(v) => v.offset,
679             None => {
680                 warn!("get_region_offset with invalid index: {}", index);
681                 0
682             }
683         }
684     }
685 
686     /// get a region's mmap info vector
get_region_mmap(&self, index: u32) -> Vec<vfio_region_sparse_mmap_area>687     pub fn get_region_mmap(&self, index: u32) -> Vec<vfio_region_sparse_mmap_area> {
688         match self.regions.get(index as usize) {
689             Some(v) => v.mmaps.clone(),
690             None => {
691                 warn!("get_region_mmap with invalid index: {}", index);
692                 Vec::new()
693             }
694         }
695     }
696 
697     /// find the specified cap type in device regions
698     /// Input:
699     ///      type_:  cap type
700     ///      sub_type: cap sub_type
701     /// Output:
702     ///     None: device doesn't have the specified cap type
703     ///     Some((bar_index, region_size)): device has the specified cap type, return region's
704     ///                                     index and size
get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)>705     pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
706         for (index, region) in self.regions.iter().enumerate() {
707             if let Some(cap_info) = &region.cap_info {
708                 if cap_info.0 == type_ && cap_info.1 == sub_type {
709                     return Some((index as u32, region.size));
710                 }
711             }
712         }
713 
714         None
715     }
716 
717     /// Read region's data from VFIO device into buf
718     /// index: region num
719     /// buf: data destination and buf length is read size
720     /// addr: offset in the region
region_read(&self, index: u32, buf: &mut [u8], addr: u64)721     pub fn region_read(&self, index: u32, buf: &mut [u8], addr: u64) {
722         let stub: &VfioRegion;
723         match self.regions.get(index as usize) {
724             Some(v) => stub = v,
725             None => {
726                 warn!("region read with invalid index: {}", index);
727                 return;
728             }
729         }
730 
731         let size = buf.len() as u64;
732         if size > stub.size || addr + size > stub.size {
733             warn!(
734                 "region read with invalid parameter, index: {}, add: {:x}, size: {:x}",
735                 index, addr, size
736             );
737             return;
738         }
739 
740         if let Err(e) = self.dev.read_exact_at(buf, stub.offset + addr) {
741             warn!(
742                 "Failed to read region in index: {}, addr: {:x}, error: {}",
743                 index, addr, e
744             );
745         }
746     }
747 
748     /// write the data from buf into a vfio device region
749     /// index: region num
750     /// buf: data src and buf length is write size
751     /// addr: offset in the region
region_write(&self, index: u32, buf: &[u8], addr: u64)752     pub fn region_write(&self, index: u32, buf: &[u8], addr: u64) {
753         let stub: &VfioRegion;
754         match self.regions.get(index as usize) {
755             Some(v) => stub = v,
756             None => {
757                 warn!("region write with invalid index: {}", index);
758                 return;
759             }
760         }
761 
762         let size = buf.len() as u64;
763         if size > stub.size
764             || addr + size > stub.size
765             || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
766         {
767             warn!(
768                 "region write with invalid parameter,indxe: {}, add: {:x}, size: {:x}",
769                 index, addr, size
770             );
771             return;
772         }
773 
774         if let Err(e) = self.dev.write_all_at(buf, stub.offset + addr) {
775             warn!(
776                 "Failed to write region in index: {}, addr: {:x}, error: {}",
777                 index, addr, e
778             );
779         }
780     }
781 
782     /// get vfio device's descriptors which are passed into minijail process
keep_rds(&self) -> Vec<RawDescriptor>783     pub fn keep_rds(&self) -> Vec<RawDescriptor> {
784         let mut rds = Vec::new();
785         rds.push(self.dev.as_raw_descriptor());
786         rds.push(self.group_descriptor);
787         rds.push(self.container.lock().as_raw_descriptor());
788         rds
789     }
790 
791     /// Add (iova, user_addr) map into vfio container iommu table
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, ) -> Result<(), VfioError>792     pub unsafe fn vfio_dma_map(
793         &self,
794         iova: u64,
795         size: u64,
796         user_addr: u64,
797     ) -> Result<(), VfioError> {
798         self.container.lock().vfio_dma_map(iova, size, user_addr)
799     }
800 
801     /// Remove (iova, user_addr) map from vfio container iommu table
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioError>802     pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioError> {
803         self.container.lock().vfio_dma_unmap(iova, size)
804     }
805 
806     /// Gets the vfio device backing `File`.
device_file(&self) -> &File807     pub fn device_file(&self) -> &File {
808         &self.dev
809     }
810 }
811