1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::cell::RefCell;
6 use std::collections::HashMap;
7 use std::ffi::CString;
8 use std::fs::{File, OpenOptions};
9 use std::io;
10 use std::mem;
11 use std::os::raw::c_ulong;
12 use std::os::unix::prelude::FileExt;
13 use std::path::{Path, PathBuf};
14 use std::slice;
15 use std::sync::Arc;
16 use std::u32;
17
18 use crate::IommuDevType;
19 use base::error;
20 use base::{
21 ioctl, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val,
22 warn, AsRawDescriptor, Error, Event, FromRawDescriptor, RawDescriptor, SafeDescriptor,
23 };
24 use data_model::{vec_with_array_field, DataInit};
25 use hypervisor::{DeviceKind, Vm};
26 use once_cell::sync::OnceCell;
27 use remain::sorted;
28 use resources::address_allocator::AddressAllocator;
29 use resources::{Alloc, Error as ResourcesError};
30 use sync::Mutex;
31 use thiserror::Error;
32 use vfio_sys::*;
33
34 #[sorted]
35 #[derive(Error, Debug)]
36 pub enum VfioError {
37 #[error("failed to borrow global vfio container")]
38 BorrowVfioContainer,
39 #[error("failed to duplicate VfioContainer")]
40 ContainerDupError,
41 #[error("failed to set container's IOMMU driver type as VfioType1V2: {0}")]
42 ContainerSetIOMMU(Error),
43 #[error("failed to create KVM vfio device: {0}")]
44 CreateVfioKvmDevice(Error),
45 #[error("failed to get Group Status: {0}")]
46 GetGroupStatus(Error),
47 #[error("failed to get vfio device fd: {0}")]
48 GroupGetDeviceFD(Error),
49 #[error("failed to add vfio group into vfio container: {0}")]
50 GroupSetContainer(Error),
51 #[error("group is inviable")]
52 GroupViable,
53 #[error("invalid region index: {0}")]
54 InvalidIndex(u32),
55 #[error("invalid file path")]
56 InvalidPath,
57 #[error("failed to add guest memory map into iommu table: {0}")]
58 IommuDmaMap(Error),
59 #[error("failed to remove guest memory map from iommu table: {0}")]
60 IommuDmaUnmap(Error),
61 #[error("failed to get IOMMU cap info from host")]
62 IommuGetCapInfo,
63 #[error("failed to get IOMMU info from host: {0}")]
64 IommuGetInfo(Error),
65 #[error("failed to set KVM vfio device's attribute: {0}")]
66 KvmSetDeviceAttr(Error),
67 #[error("AddressAllocator is unavailable")]
68 NoRescAlloc,
69 #[error("failed to open /dev/vfio/vfio container: {0}")]
70 OpenContainer(io::Error),
71 #[error("failed to open /dev/vfio/$group_num group: {0}")]
72 OpenGroup(io::Error),
73 #[error("resources error: {0}")]
74 Resources(ResourcesError),
75 #[error(
76 "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
77 )]
78 VfioApiVersion,
79 #[error("failed to get vfio device's info or info doesn't match: {0}")]
80 VfioDeviceGetInfo(Error),
81 #[error("failed to get vfio device's region info: {0}")]
82 VfioDeviceGetRegionInfo(Error),
83 #[error("failed to disable vfio deviece's irq: {0}")]
84 VfioIrqDisable(Error),
85 #[error("failed to enable vfio deviece's irq: {0}")]
86 VfioIrqEnable(Error),
87 #[error("failed to mask vfio deviece's irq: {0}")]
88 VfioIrqMask(Error),
89 #[error("failed to unmask vfio deviece's irq: {0}")]
90 VfioIrqUnmask(Error),
91 #[error("container dones't support VfioType1V2 IOMMU driver type")]
92 VfioType1V2,
93 }
94
95 type Result<T> = std::result::Result<T, VfioError>;
96
get_error() -> Error97 fn get_error() -> Error {
98 Error::last()
99 }
100
101 static KVM_VFIO_FILE: OnceCell<SafeDescriptor> = OnceCell::new();
102
103 enum KvmVfioGroupOps {
104 Add,
105 Delete,
106 }
107
108 #[repr(u32)]
109 enum IommuType {
110 Type1V2 = VFIO_TYPE1v2_IOMMU,
111 }
112
113 /// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table
114 pub struct VfioContainer {
115 container: File,
116 groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
117 }
118
extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> T where T: DataInit,119 fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> T
120 where
121 T: DataInit,
122 {
123 T::from_reader(&bytes[offset..(offset + mem::size_of::<T>())]).expect("malformed kernel data")
124 }
125
126 const VFIO_API_VERSION: u8 = 0;
127 impl VfioContainer {
new() -> Result<Self>128 pub fn new() -> Result<Self> {
129 let container = OpenOptions::new()
130 .read(true)
131 .write(true)
132 .open("/dev/vfio/vfio")
133 .map_err(VfioError::OpenContainer)?;
134
135 // Safe as file is vfio container descriptor and ioctl is defined by kernel.
136 let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION()) };
137 if version as u8 != VFIO_API_VERSION {
138 return Err(VfioError::VfioApiVersion);
139 }
140
141 Ok(VfioContainer {
142 container,
143 groups: HashMap::new(),
144 })
145 }
146
147 // Construct a VfioContainer from an exist container file.
new_from_container(container: File) -> Result<Self>148 pub fn new_from_container(container: File) -> Result<Self> {
149 // Safe as file is vfio container descriptor and ioctl is defined by kernel.
150 let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION()) };
151 if version as u8 != VFIO_API_VERSION {
152 return Err(VfioError::VfioApiVersion);
153 }
154
155 Ok(VfioContainer {
156 container,
157 groups: HashMap::new(),
158 })
159 }
160
is_group_set(&self, group_id: u32) -> bool161 fn is_group_set(&self, group_id: u32) -> bool {
162 self.groups.get(&group_id).is_some()
163 }
164
check_extension(&self, val: IommuType) -> bool165 fn check_extension(&self, val: IommuType) -> bool {
166 // Safe as file is vfio container and make sure val is valid.
167 let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION(), val as c_ulong) };
168 ret == 1
169 }
170
set_iommu(&self, val: IommuType) -> i32171 fn set_iommu(&self, val: IommuType) -> i32 {
172 // Safe as file is vfio container and make sure val is valid.
173 unsafe { ioctl_with_val(self, VFIO_SET_IOMMU(), val as c_ulong) }
174 }
175
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>176 pub unsafe fn vfio_dma_map(
177 &self,
178 iova: u64,
179 size: u64,
180 user_addr: u64,
181 write_en: bool,
182 ) -> Result<()> {
183 let mut dma_map = vfio_iommu_type1_dma_map {
184 argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
185 flags: VFIO_DMA_MAP_FLAG_READ,
186 vaddr: user_addr,
187 iova,
188 size,
189 };
190
191 if write_en {
192 dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
193 }
194
195 let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA(), &dma_map);
196 if ret != 0 {
197 return Err(VfioError::IommuDmaMap(get_error()));
198 }
199
200 Ok(())
201 }
202
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>203 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
204 let mut dma_unmap = vfio_iommu_type1_dma_unmap {
205 argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
206 flags: 0,
207 iova,
208 size,
209 ..Default::default()
210 };
211
212 // Safe as file is vfio container, dma_unmap is constructed by us, and
213 // we check the return value
214 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA(), &mut dma_unmap) };
215 if ret != 0 || dma_unmap.size != size {
216 return Err(VfioError::IommuDmaUnmap(get_error()));
217 }
218
219 Ok(())
220 }
221
vfio_get_iommu_page_size_mask(&self) -> Result<u64>222 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
223 let mut iommu_info = vfio_iommu_type1_info {
224 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
225 flags: 0,
226 iova_pgsizes: 0,
227 ..Default::default()
228 };
229
230 // Safe as file is vfio container, iommu_info has valid values,
231 // and we check the return value
232 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO(), &mut iommu_info) };
233 if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
234 return Err(VfioError::IommuGetInfo(get_error()));
235 }
236
237 Ok(iommu_info.iova_pgsizes)
238 }
239
vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<vfio_iova_range>>240 pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<vfio_iova_range>> {
241 // Query the buffer size needed fetch the capabilities.
242 let mut iommu_info_argsz = vfio_iommu_type1_info {
243 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
244 flags: 0,
245 iova_pgsizes: 0,
246 ..Default::default()
247 };
248
249 // Safe as file is vfio container, iommu_info_argsz has valid values,
250 // and we check the return value
251 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO(), &mut iommu_info_argsz) };
252 if ret != 0 {
253 return Err(VfioError::IommuGetInfo(get_error()));
254 }
255
256 if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
257 return Err(VfioError::IommuGetCapInfo);
258 }
259
260 let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
261 iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
262 );
263 iommu_info[0].argsz = iommu_info_argsz.argsz;
264 // Safe as file is vfio container, iommu_info has valid values,
265 // and we check the return value
266 let ret =
267 unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO(), iommu_info.as_mut_ptr()) };
268 if ret != 0 {
269 return Err(VfioError::IommuGetInfo(get_error()));
270 }
271
272 // Safe because we initialized iommu_info with enough space, u8 has less strict
273 // alignment, and since it will no longer be mutated.
274 let info_bytes = unsafe {
275 std::slice::from_raw_parts(
276 iommu_info.as_ptr() as *const u8,
277 iommu_info_argsz.argsz as usize,
278 )
279 };
280
281 if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
282 return Err(VfioError::IommuGetCapInfo);
283 }
284
285 let mut offset = iommu_info[0].cap_offset as usize;
286 while offset != 0 {
287 let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset);
288
289 if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
290 let iova_header = extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
291 info_bytes, offset,
292 );
293 let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
294 let mut ret = Vec::new();
295 for i in 0..iova_header.nr_iovas {
296 ret.push(extract_vfio_struct::<vfio_iova_range>(
297 info_bytes,
298 range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
299 ));
300 }
301 return Ok(ret);
302 }
303 offset = header.next as usize;
304 }
305
306 Err(VfioError::IommuGetCapInfo)
307 }
308
init_vfio_iommu(&mut self) -> Result<()>309 fn init_vfio_iommu(&mut self) -> Result<()> {
310 if !self.check_extension(IommuType::Type1V2) {
311 return Err(VfioError::VfioType1V2);
312 }
313
314 if self.set_iommu(IommuType::Type1V2) < 0 {
315 return Err(VfioError::ContainerSetIOMMU(get_error()));
316 }
317
318 Ok(())
319 }
320
get_group_with_vm( &mut self, id: u32, vm: &impl Vm, iommu_enabled: bool, ) -> Result<Arc<Mutex<VfioGroup>>>321 fn get_group_with_vm(
322 &mut self,
323 id: u32,
324 vm: &impl Vm,
325 iommu_enabled: bool,
326 ) -> Result<Arc<Mutex<VfioGroup>>> {
327 match self.groups.get(&id) {
328 Some(group) => Ok(group.clone()),
329 None => {
330 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
331 if self.groups.is_empty() {
332 // Before the first group is added into container, do once per
333 // container initialization.
334 self.init_vfio_iommu()?;
335
336 if !iommu_enabled {
337 vm.get_memory().with_regions(
338 |_index, guest_addr, size, host_addr, _mmap, _fd_offset| {
339 // Safe because the guest regions are guaranteed not to overlap
340 unsafe {
341 self.vfio_dma_map(
342 guest_addr.0,
343 size as u64,
344 host_addr as u64,
345 true,
346 )
347 }
348 },
349 )?;
350 }
351 }
352
353 let kvm_vfio_file = KVM_VFIO_FILE
354 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
355 .map_err(VfioError::CreateVfioKvmDevice)?;
356 group
357 .lock()
358 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
359
360 self.groups.insert(id, group.clone());
361
362 Ok(group)
363 }
364 }
365 }
366
get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>>367 fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
368 match self.groups.get(&id) {
369 Some(group) => Ok(group.clone()),
370 None => {
371 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
372
373 if self.groups.is_empty() {
374 // Before the first group is added into container, do once per
375 // container initialization.
376 self.init_vfio_iommu()?;
377 }
378
379 self.groups.insert(id, group.clone());
380 Ok(group)
381 }
382 }
383 }
384
remove_group(&mut self, id: u32, reduce: bool)385 fn remove_group(&mut self, id: u32, reduce: bool) {
386 let mut remove = false;
387
388 if let Some(group) = self.groups.get(&id) {
389 if reduce {
390 group.lock().reduce_device_num();
391 }
392 if group.lock().device_num() == 0 {
393 let kvm_vfio_file = KVM_VFIO_FILE.get().expect("kvm vfio file isn't created");
394 if group
395 .lock()
396 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
397 .is_err()
398 {
399 warn!("failing in remove vfio group from kvm device");
400 }
401 remove = true;
402 }
403 }
404
405 if remove {
406 self.groups.remove(&id);
407 }
408 }
409
into_raw_descriptor(&self) -> Result<RawDescriptor>410 pub fn into_raw_descriptor(&self) -> Result<RawDescriptor> {
411 let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
412 if raw_descriptor < 0 {
413 Err(VfioError::ContainerDupError)
414 } else {
415 Ok(raw_descriptor)
416 }
417 }
418 }
419
420 impl AsRawDescriptor for VfioContainer {
as_raw_descriptor(&self) -> RawDescriptor421 fn as_raw_descriptor(&self) -> RawDescriptor {
422 self.container.as_raw_descriptor()
423 }
424 }
425
426 struct VfioGroup {
427 group: File,
428 device_num: u32,
429 }
430
431 impl VfioGroup {
new(container: &VfioContainer, id: u32) -> Result<Self>432 fn new(container: &VfioContainer, id: u32) -> Result<Self> {
433 let group_path = format!("/dev/vfio/{}", id);
434 let group_file = OpenOptions::new()
435 .read(true)
436 .write(true)
437 .open(Path::new(&group_path))
438 .map_err(VfioError::OpenGroup)?;
439
440 let mut group_status = vfio_group_status {
441 argsz: mem::size_of::<vfio_group_status>() as u32,
442 flags: 0,
443 };
444 // Safe as we are the owner of group_file and group_status which are valid value.
445 let mut ret =
446 unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS(), &mut group_status) };
447 if ret < 0 {
448 return Err(VfioError::GetGroupStatus(get_error()));
449 }
450
451 if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
452 return Err(VfioError::GroupViable);
453 }
454
455 // Safe as we are the owner of group_file and container_raw_descriptor which are valid value,
456 // and we verify the ret value
457 let container_raw_descriptor = container.as_raw_descriptor();
458 ret = unsafe {
459 ioctl_with_ref(
460 &group_file,
461 VFIO_GROUP_SET_CONTAINER(),
462 &container_raw_descriptor,
463 )
464 };
465 if ret < 0 {
466 return Err(VfioError::GroupSetContainer(get_error()));
467 }
468
469 Ok(VfioGroup {
470 group: group_file,
471 device_num: 0,
472 })
473 }
474
get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32>475 fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
476 let mut uuid_path = PathBuf::new();
477 uuid_path.push(sysfspath);
478 uuid_path.push("iommu_group");
479 let group_path = uuid_path.read_link().map_err(|_| VfioError::InvalidPath)?;
480 let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
481 let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
482 let group_id = group_str
483 .parse::<u32>()
484 .map_err(|_| VfioError::InvalidPath)?;
485
486 Ok(group_id)
487 }
488
kvm_device_set_group( &self, kvm_vfio_file: &SafeDescriptor, ops: KvmVfioGroupOps, ) -> Result<()>489 fn kvm_device_set_group(
490 &self,
491 kvm_vfio_file: &SafeDescriptor,
492 ops: KvmVfioGroupOps,
493 ) -> Result<()> {
494 let group_descriptor = self.as_raw_descriptor();
495 let group_descriptor_ptr = &group_descriptor as *const i32;
496 let vfio_dev_attr = match ops {
497 KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
498 flags: 0,
499 group: kvm_sys::KVM_DEV_VFIO_GROUP,
500 attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
501 addr: group_descriptor_ptr as u64,
502 },
503 KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
504 flags: 0,
505 group: kvm_sys::KVM_DEV_VFIO_GROUP,
506 attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
507 addr: group_descriptor_ptr as u64,
508 },
509 };
510
511 // Safe as we are the owner of vfio_dev_fd and vfio_dev_attr which are valid value,
512 // and we verify the return value.
513 if 0 != unsafe {
514 ioctl_with_ref(
515 kvm_vfio_file,
516 kvm_sys::KVM_SET_DEVICE_ATTR(),
517 &vfio_dev_attr,
518 )
519 } {
520 return Err(VfioError::KvmSetDeviceAttr(get_error()));
521 }
522
523 Ok(())
524 }
525
get_device(&self, name: &str) -> Result<File>526 fn get_device(&self, name: &str) -> Result<File> {
527 let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
528 let path_ptr = path.as_ptr();
529
530 // Safe as we are the owner of self and path_ptr which are valid value.
531 let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD(), path_ptr) };
532 if ret < 0 {
533 return Err(VfioError::GroupGetDeviceFD(get_error()));
534 }
535
536 // Safe as ret is valid FD
537 Ok(unsafe { File::from_raw_descriptor(ret) })
538 }
539
add_device_num(&mut self)540 fn add_device_num(&mut self) {
541 self.device_num += 1;
542 }
543
reduce_device_num(&mut self)544 fn reduce_device_num(&mut self) {
545 self.device_num -= 1;
546 }
547
device_num(&self) -> u32548 fn device_num(&self) -> u32 {
549 self.device_num
550 }
551 }
552
553 impl AsRawDescriptor for VfioGroup {
as_raw_descriptor(&self) -> RawDescriptor554 fn as_raw_descriptor(&self) -> RawDescriptor {
555 self.group.as_raw_descriptor()
556 }
557 }
558
559 /// A helper trait for managing VFIO setup
560 pub trait VfioCommonTrait: Send + Sync {
561 /// The single place to create a VFIO container for a PCI endpoint.
562 ///
563 /// The policy to determine whether an individual or a shared VFIO container
564 /// will be created for this device is governed by the physical PCI topology,
565 /// and the argument iommu_enabled.
566 ///
567 /// # Arguments
568 ///
569 /// * `sysfspath` - the path to the PCI device, e.g. /sys/bus/pci/devices/0000:02:00.0
570 /// * `iommu_enabled` - whether virtio IOMMU is enabled on this device
vfio_get_container<P: AsRef<Path>>( iommu_dev: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>571 fn vfio_get_container<P: AsRef<Path>>(
572 iommu_dev: IommuDevType,
573 sysfspath: Option<P>,
574 ) -> Result<Arc<Mutex<VfioContainer>>>;
575 }
576
577 thread_local! {
578
579 // One VFIO container is shared by all VFIO devices that don't
580 // attach to the virtio IOMMU device
581 static NO_IOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
582
583 // For IOMMU enabled devices, all VFIO groups that share the same IOVA space
584 // are managed by one VFIO container
585 static IOMMU_CONTAINERS: RefCell<Option<Vec<Arc<Mutex<VfioContainer>>>>> = RefCell::new(Some(Default::default()));
586
587 // One VFIO container is shared by all VFIO devices that
588 // attach to the CoIOMMU device
589 static COIOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
590 }
591
592 pub struct VfioCommonSetup;
593
594 impl VfioCommonTrait for VfioCommonSetup {
vfio_get_container<P: AsRef<Path>>( iommu_dev: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>595 fn vfio_get_container<P: AsRef<Path>>(
596 iommu_dev: IommuDevType,
597 sysfspath: Option<P>,
598 ) -> Result<Arc<Mutex<VfioContainer>>> {
599 match iommu_dev {
600 IommuDevType::NoIommu => {
601 // One VFIO container is used for all IOMMU disabled groups
602 NO_IOMMU_CONTAINER.with(|v| {
603 if v.borrow().is_some() {
604 if let Some(ref container) = *v.borrow() {
605 Ok(container.clone())
606 } else {
607 Err(VfioError::BorrowVfioContainer)
608 }
609 } else {
610 let container = Arc::new(Mutex::new(VfioContainer::new()?));
611 *v.borrow_mut() = Some(container.clone());
612 Ok(container)
613 }
614 })
615 }
616 IommuDevType::VirtioIommu => {
617 let path = sysfspath.ok_or(VfioError::InvalidPath)?;
618 let group_id = VfioGroup::get_group_id(path)?;
619
620 // One VFIO container is used for all devices belong to one VFIO group
621 IOMMU_CONTAINERS.with(|v| {
622 if let Some(ref mut containers) = *v.borrow_mut() {
623 let container = containers
624 .iter()
625 .find(|container| container.lock().is_group_set(group_id));
626
627 match container {
628 None => {
629 let container = Arc::new(Mutex::new(VfioContainer::new()?));
630 containers.push(container.clone());
631 Ok(container)
632 }
633 Some(container) => Ok(container.clone()),
634 }
635 } else {
636 Err(VfioError::BorrowVfioContainer)
637 }
638 })
639 }
640 IommuDevType::CoIommu => {
641 // One VFIO container is used for devices attached to CoIommu
642 COIOMMU_CONTAINER.with(|v| {
643 if v.borrow().is_some() {
644 if let Some(ref container) = *v.borrow() {
645 Ok(container.clone())
646 } else {
647 Err(VfioError::BorrowVfioContainer)
648 }
649 } else {
650 let container = Arc::new(Mutex::new(VfioContainer::new()?));
651 *v.borrow_mut() = Some(container.clone());
652 Ok(container)
653 }
654 })
655 }
656 }
657 }
658 }
659
660 /// Vfio Irq type used to enable/disable/mask/unmask vfio irq
661 pub enum VfioIrqType {
662 Intx,
663 Msi,
664 Msix,
665 }
666
667 /// Vfio Irq information used to assign and enable/disable/mask/unmask vfio irq
668 pub struct VfioIrq {
669 pub flags: u32,
670 pub index: u32,
671 }
672
673 /// Address on VFIO memory region.
674 #[derive(Debug, Default, Clone)]
675 pub struct VfioRegionAddr {
676 /// region number.
677 pub index: u32,
678 /// offset in the region.
679 pub addr: u64,
680 }
681
682 #[derive(Debug)]
683 pub struct VfioRegion {
684 // flags for this region: read/write/mmap
685 flags: u32,
686 size: u64,
687 // region offset used to read/write with vfio device descriptor
688 offset: u64,
689 // vectors for mmap offset and size
690 mmaps: Vec<vfio_region_sparse_mmap_area>,
691 // type and subtype for cap type
692 cap_info: Option<(u32, u32)>,
693 }
694
695 /// Vfio device for exposing regions which could be read/write to kernel vfio device.
696 pub struct VfioDevice {
697 dev: File,
698 name: String,
699 container: Arc<Mutex<VfioContainer>>,
700 group_descriptor: RawDescriptor,
701 group_id: u32,
702 // vec for vfio device's regions
703 regions: Vec<VfioRegion>,
704
705 iova_alloc: Option<Arc<Mutex<AddressAllocator>>>,
706 }
707
708 impl VfioDevice {
709 /// Create a new vfio device, then guest read/write on this device could be
710 /// transfered into kernel vfio.
711 /// sysfspath specify the vfio device path in sys file system.
new_passthrough<P: AsRef<Path>>( sysfspath: &P, vm: &impl Vm, container: Arc<Mutex<VfioContainer>>, iommu_enabled: bool, ) -> Result<Self>712 pub fn new_passthrough<P: AsRef<Path>>(
713 sysfspath: &P,
714 vm: &impl Vm,
715 container: Arc<Mutex<VfioContainer>>,
716 iommu_enabled: bool,
717 ) -> Result<Self> {
718 let group_id = VfioGroup::get_group_id(&sysfspath)?;
719
720 let group = container
721 .lock()
722 .get_group_with_vm(group_id, vm, iommu_enabled)?;
723 let name_osstr = sysfspath
724 .as_ref()
725 .file_name()
726 .ok_or(VfioError::InvalidPath)?;
727 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
728 let name = String::from(name_str);
729 let dev = group.lock().get_device(&name)?;
730 let regions = Self::get_regions(&dev)?;
731 group.lock().add_device_num();
732 let group_descriptor = group.lock().as_raw_descriptor();
733
734 Ok(VfioDevice {
735 dev,
736 name,
737 container,
738 group_descriptor,
739 group_id,
740 regions,
741 iova_alloc: None,
742 })
743 }
744
new<P: AsRef<Path>>( sysfspath: &P, container: Arc<Mutex<VfioContainer>>, ) -> Result<Self>745 pub fn new<P: AsRef<Path>>(
746 sysfspath: &P,
747 container: Arc<Mutex<VfioContainer>>,
748 ) -> Result<Self> {
749 let group_id = VfioGroup::get_group_id(&sysfspath)?;
750 let group = container.lock().get_group(group_id)?;
751 let name_osstr = sysfspath
752 .as_ref()
753 .file_name()
754 .ok_or(VfioError::InvalidPath)?;
755 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
756 let name = String::from(name_str);
757
758 let dev = match group.lock().get_device(&name) {
759 Ok(dev) => dev,
760 Err(e) => {
761 container.lock().remove_group(group_id, false);
762 return Err(e);
763 }
764 };
765 let regions = match Self::get_regions(&dev) {
766 Ok(regions) => regions,
767 Err(e) => {
768 container.lock().remove_group(group_id, false);
769 return Err(e);
770 }
771 };
772 group.lock().add_device_num();
773 let group_descriptor = group.lock().as_raw_descriptor();
774
775 let iova_ranges = container
776 .lock()
777 .vfio_iommu_iova_get_iova_ranges()?
778 .into_iter()
779 .map(|r| std::ops::RangeInclusive::new(r.start, r.end));
780 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
781 .map_err(VfioError::Resources)?;
782
783 Ok(VfioDevice {
784 dev,
785 name,
786 container,
787 group_descriptor,
788 group_id,
789 regions,
790 iova_alloc: Some(Arc::new(Mutex::new(iova_alloc))),
791 })
792 }
793
794 /// Returns the file for this device.
dev_file(&self) -> &File795 pub fn dev_file(&self) -> &File {
796 &self.dev
797 }
798
799 /// Returns PCI device name, formatted as BUS:DEVICE.FUNCTION string.
device_name(&self) -> &String800 pub fn device_name(&self) -> &String {
801 &self.name
802 }
803
804 /// Enable vfio device's irq and associate Irqfd Event with device.
805 /// When MSIx is enabled, multi vectors will be supported, and vectors starting from subindex to subindex +
806 /// descriptors length will be assigned with irqfd in the descriptors array.
807 /// when index = VFIO_PCI_REQ_IRQ_INDEX, kernel vfio will trigger this event when physical device
808 /// is removed.
809 /// If descriptor is None, -1 is assigned to the irq. A value of -1 is used to either de-assign
810 /// interrupts if already assigned or skip un-assigned interrupts.
irq_enable( &self, descriptors: &[Option<&Event>], index: u32, subindex: u32, ) -> Result<()>811 pub fn irq_enable(
812 &self,
813 descriptors: &[Option<&Event>],
814 index: u32,
815 subindex: u32,
816 ) -> Result<()> {
817 let count = descriptors.len();
818 let u32_size = mem::size_of::<u32>();
819 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
820 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
821 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
822 irq_set[0].index = index;
823 irq_set[0].start = subindex;
824 irq_set[0].count = count as u32;
825
826 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data
827 // is u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
828 // together as u32. It is safe as enough space is reserved through
829 // vec_with_array_field(u32)<count>.
830 let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
831 for descriptor in descriptors.iter().take(count) {
832 let (left, right) = data.split_at_mut(u32_size);
833 match descriptor {
834 Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
835 None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
836 }
837 data = right;
838 }
839
840 // Safe as we are the owner of self and irq_set which are valid value
841 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
842 if ret < 0 {
843 Err(VfioError::VfioIrqEnable(get_error()))
844 } else {
845 Ok(())
846 }
847 }
848
849 /// When intx is enabled, irqfd is used to trigger a level interrupt into guest, resample irqfd
850 /// is used to get guest EOI notification.
851 /// When host hw generates interrupt, vfio irq handler in host kernel receive and handle it,
852 /// this handler disable hw irq first, then trigger irqfd to inject interrupt into guest. When
853 /// resample irqfd is triggered by guest EOI, vfio kernel could enable hw irq, so hw could
854 /// generate another interrupts.
855 /// This function enable resample irqfd and let vfio kernel could get EOI notification.
856 ///
857 /// descriptor: should be resample IrqFd.
resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()>858 pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
859 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
860 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
861 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
862 irq_set[0].index = index;
863 irq_set[0].start = 0;
864 irq_set[0].count = 1;
865
866 {
867 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data is
868 // u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
869 // together as u32. It is safe as enough space is reserved through
870 // vec_with_array_field(u32)<1>.
871 let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
872 descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
873 }
874
875 // Safe as we are the owner of self and irq_set which are valid value
876 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
877 if ret < 0 {
878 Err(VfioError::VfioIrqEnable(get_error()))
879 } else {
880 Ok(())
881 }
882 }
883
884 /// disable vfio device's irq and disconnect Irqfd Event with device
irq_disable(&self, index: u32) -> Result<()>885 pub fn irq_disable(&self, index: u32) -> Result<()> {
886 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
887 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
888 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
889 irq_set[0].index = index;
890 irq_set[0].start = 0;
891 irq_set[0].count = 0;
892
893 // Safe as we are the owner of self and irq_set which are valid value
894 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
895 if ret < 0 {
896 Err(VfioError::VfioIrqDisable(get_error()))
897 } else {
898 Ok(())
899 }
900 }
901
902 /// Unmask vfio device irq
irq_unmask(&self, index: u32) -> Result<()>903 pub fn irq_unmask(&self, index: u32) -> Result<()> {
904 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
905 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
906 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
907 irq_set[0].index = index;
908 irq_set[0].start = 0;
909 irq_set[0].count = 1;
910
911 // Safe as we are the owner of self and irq_set which are valid value
912 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
913 if ret < 0 {
914 Err(VfioError::VfioIrqUnmask(get_error()))
915 } else {
916 Ok(())
917 }
918 }
919
920 /// Mask vfio device irq
irq_mask(&self, index: u32) -> Result<()>921 pub fn irq_mask(&self, index: u32) -> Result<()> {
922 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
923 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
924 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
925 irq_set[0].index = index;
926 irq_set[0].start = 0;
927 irq_set[0].count = 1;
928
929 // Safe as we are the owner of self and irq_set which are valid value
930 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
931 if ret < 0 {
932 Err(VfioError::VfioIrqMask(get_error()))
933 } else {
934 Ok(())
935 }
936 }
937
validate_dev_info(dev_info: &mut vfio_device_info) -> Result<()>938 fn validate_dev_info(dev_info: &mut vfio_device_info) -> Result<()> {
939 if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
940 if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
941 || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
942 {
943 return Err(VfioError::VfioDeviceGetInfo(get_error()));
944 }
945 return Ok(());
946 } else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
947 return Ok(());
948 }
949
950 Err(VfioError::VfioDeviceGetInfo(get_error()))
951 }
952
953 /// Get and validate VFIO device information.
check_device_info(&self) -> Result<vfio_device_info>954 pub fn check_device_info(&self) -> Result<vfio_device_info> {
955 let mut dev_info = vfio_device_info {
956 argsz: mem::size_of::<vfio_device_info>() as u32,
957 flags: 0,
958 num_regions: 0,
959 num_irqs: 0,
960 ..Default::default()
961 };
962
963 // Safe as we are the owner of device_file and dev_info which are valid value,
964 // and we verify the return value.
965 let ret = unsafe {
966 ioctl_with_mut_ref(self.device_file(), VFIO_DEVICE_GET_INFO(), &mut dev_info)
967 };
968 if ret < 0 {
969 return Err(VfioError::VfioDeviceGetInfo(get_error()));
970 }
971
972 Self::validate_dev_info(&mut dev_info)?;
973 Ok(dev_info)
974 }
975
976 /// Query interrupt information
977 /// return: Vector of interrupts information, each of which contains flags and index
get_irqs(&self) -> Result<Vec<VfioIrq>>978 pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
979 let dev_info = self.check_device_info()?;
980 let mut irqs: Vec<VfioIrq> = Vec::new();
981
982 for i in 0..dev_info.num_irqs {
983 let argsz = mem::size_of::<vfio_irq_info>() as u32;
984 let mut irq_info = vfio_irq_info {
985 argsz,
986 flags: 0,
987 index: i,
988 count: 0,
989 };
990 // Safe as we are the owner of dev and dev_info which are valid value,
991 // and we verify the return value.
992 let ret = unsafe {
993 ioctl_with_mut_ref(
994 self.device_file(),
995 VFIO_DEVICE_GET_IRQ_INFO(),
996 &mut irq_info,
997 )
998 };
999 if ret < 0 || irq_info.count != 1 {
1000 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1001 }
1002
1003 let irq = VfioIrq {
1004 flags: irq_info.flags,
1005 index: irq_info.index,
1006 };
1007 irqs.push(irq);
1008 }
1009 Ok(irqs)
1010 }
1011
1012 #[allow(clippy::cast_ptr_alignment)]
get_regions(dev: &File) -> Result<Vec<VfioRegion>>1013 fn get_regions(dev: &File) -> Result<Vec<VfioRegion>> {
1014 let mut regions: Vec<VfioRegion> = Vec::new();
1015 let mut dev_info = vfio_device_info {
1016 argsz: mem::size_of::<vfio_device_info>() as u32,
1017 flags: 0,
1018 num_regions: 0,
1019 num_irqs: 0,
1020 ..Default::default()
1021 };
1022 // Safe as we are the owner of dev and dev_info which are valid value,
1023 // and we verify the return value.
1024 let mut ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_INFO(), &mut dev_info) };
1025 if ret < 0 {
1026 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1027 }
1028
1029 Self::validate_dev_info(&mut dev_info)?;
1030 for i in 0..dev_info.num_regions {
1031 let argsz = mem::size_of::<vfio_region_info>() as u32;
1032 let mut reg_info = vfio_region_info {
1033 argsz,
1034 flags: 0,
1035 index: i,
1036 cap_offset: 0,
1037 size: 0,
1038 offset: 0,
1039 };
1040 // Safe as we are the owner of dev and reg_info which are valid value,
1041 // and we verify the return value.
1042 ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO(), &mut reg_info) };
1043 if ret < 0 {
1044 continue;
1045 }
1046
1047 let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1048 let mut cap_info: Option<(u32, u32)> = None;
1049 if reg_info.argsz > argsz {
1050 let cap_len: usize = (reg_info.argsz - argsz) as usize;
1051 let mut region_with_cap =
1052 vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1053 region_with_cap[0].region_info.argsz = reg_info.argsz;
1054 region_with_cap[0].region_info.flags = 0;
1055 region_with_cap[0].region_info.index = i;
1056 region_with_cap[0].region_info.cap_offset = 0;
1057 region_with_cap[0].region_info.size = 0;
1058 region_with_cap[0].region_info.offset = 0;
1059 // Safe as we are the owner of dev and region_info which are valid value,
1060 // and we verify the return value.
1061 ret = unsafe {
1062 ioctl_with_mut_ref(
1063 dev,
1064 VFIO_DEVICE_GET_REGION_INFO(),
1065 &mut (region_with_cap[0].region_info),
1066 )
1067 };
1068 if ret < 0 {
1069 return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1070 }
1071
1072 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1073 continue;
1074 }
1075
1076 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1077 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1078 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1079 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1080 let region_info_sz = reg_info.argsz;
1081
1082 // region_with_cap[0].cap_info may contain many structures, like
1083 // vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct.
1084 // Both of them begin with vfio_info_cap_header, so we will get individual cap from
1085 // vfio_into_cap_header.
1086 // Go through all the cap structs.
1087 let info_ptr = region_with_cap.as_ptr() as *mut u8;
1088 let mut offset = region_with_cap[0].region_info.cap_offset;
1089 while offset != 0 {
1090 if offset + cap_header_sz > region_info_sz {
1091 break;
1092 }
1093 // Safe, as cap_header struct is in this function allocated region_with_cap
1094 // vec.
1095 let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1096 let cap_header =
1097 unsafe { &*(cap_ptr as *mut u8 as *const vfio_info_cap_header) };
1098 if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1099 if offset + mmap_cap_sz > region_info_sz {
1100 break;
1101 }
1102 // cap_ptr is vfio_region_info_cap_sparse_mmap here
1103 // Safe, this vfio_region_info_cap_sparse_mmap is in this function allocated
1104 // region_with_cap vec.
1105 let sparse_mmap = unsafe {
1106 &*(cap_ptr as *mut u8 as *const vfio_region_info_cap_sparse_mmap)
1107 };
1108
1109 let area_num = sparse_mmap.nr_areas;
1110 if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1111 break;
1112 }
1113 // Safe, these vfio_region_sparse_mmap_area are in this function allocated
1114 // region_with_cap vec.
1115 let areas =
1116 unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1117 for area in areas.iter() {
1118 mmaps.push(*area);
1119 }
1120 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1121 if offset + type_cap_sz > region_info_sz {
1122 break;
1123 }
1124 // cap_ptr is vfio_region_info_cap_type here
1125 // Safe, this vfio_region_info_cap_type is in this function allocated
1126 // region_with_cap vec
1127 let cap_type_info =
1128 unsafe { &*(cap_ptr as *mut u8 as *const vfio_region_info_cap_type) };
1129
1130 cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1131 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1132 mmaps.push(vfio_region_sparse_mmap_area {
1133 offset: region_with_cap[0].region_info.offset,
1134 size: region_with_cap[0].region_info.size,
1135 });
1136 }
1137
1138 offset = cap_header.next;
1139 }
1140 } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1141 mmaps.push(vfio_region_sparse_mmap_area {
1142 offset: 0,
1143 size: reg_info.size,
1144 });
1145 }
1146
1147 let region = VfioRegion {
1148 flags: reg_info.flags,
1149 size: reg_info.size,
1150 offset: reg_info.offset,
1151 mmaps,
1152 cap_info,
1153 };
1154 regions.push(region);
1155 }
1156
1157 Ok(regions)
1158 }
1159
1160 /// get a region's flag
1161 /// the return's value may conatin:
1162 /// VFIO_REGION_INFO_FLAG_READ: region supports read
1163 /// VFIO_REGION_INFO_FLAG_WRITE: region supports write
1164 /// VFIO_REGION_INFO_FLAG_MMAP: region supports mmap
1165 /// VFIO_REGION_INFO_FLAG_CAPS: region's info supports caps
get_region_flags(&self, index: u32) -> u321166 pub fn get_region_flags(&self, index: u32) -> u32 {
1167 match self.regions.get(index as usize) {
1168 Some(v) => v.flags,
1169 None => {
1170 warn!("get_region_flags() with invalid index: {}", index);
1171 0
1172 }
1173 }
1174 }
1175
1176 /// get a region's offset
1177 /// return: Region offset from the start of vfio device descriptor
get_region_offset(&self, index: u32) -> u641178 pub fn get_region_offset(&self, index: u32) -> u64 {
1179 match self.regions.get(index as usize) {
1180 Some(v) => v.offset,
1181 None => {
1182 warn!("get_region_offset with invalid index: {}", index);
1183 0
1184 }
1185 }
1186 }
1187
1188 /// get a region's size
1189 /// return: Region size from the start of vfio device descriptor
get_region_size(&self, index: u32) -> u641190 pub fn get_region_size(&self, index: u32) -> u64 {
1191 match self.regions.get(index as usize) {
1192 Some(v) => v.size,
1193 None => {
1194 warn!("get_region_size with invalid index: {}", index);
1195 0
1196 }
1197 }
1198 }
1199
1200 /// get a number of regions
1201 /// return: Number of regions of vfio device descriptor
get_region_count(&self) -> u321202 pub fn get_region_count(&self) -> u32 {
1203 self.regions.len() as u32
1204 }
1205
1206 /// get a region's mmap info vector
get_region_mmap(&self, index: u32) -> Vec<vfio_region_sparse_mmap_area>1207 pub fn get_region_mmap(&self, index: u32) -> Vec<vfio_region_sparse_mmap_area> {
1208 match self.regions.get(index as usize) {
1209 Some(v) => v.mmaps.clone(),
1210 None => {
1211 warn!("get_region_mmap with invalid index: {}", index);
1212 Vec::new()
1213 }
1214 }
1215 }
1216
1217 /// find the specified cap type in device regions
1218 /// Input:
1219 /// type_: cap type
1220 /// sub_type: cap sub_type
1221 /// Output:
1222 /// None: device doesn't have the specified cap type
1223 /// Some((bar_index, region_size)): device has the specified cap type, return region's
1224 /// index and size
get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)>1225 pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1226 for (index, region) in self.regions.iter().enumerate() {
1227 if let Some(cap_info) = ®ion.cap_info {
1228 if cap_info.0 == type_ && cap_info.1 == sub_type {
1229 return Some((index as u32, region.size));
1230 }
1231 }
1232 }
1233
1234 None
1235 }
1236
1237 /// Returns file offset corresponding to the given `VfioRegionAddr`.
1238 /// The offset can be used when reading/writing the VFIO device's FD directly.
get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64>1239 pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1240 let region = self
1241 .regions
1242 .get(addr.index as usize)
1243 .ok_or(VfioError::InvalidIndex(addr.index))?;
1244 Ok(region.offset + addr.addr)
1245 }
1246
1247 /// Read region's data from VFIO device into buf
1248 /// index: region num
1249 /// buf: data destination and buf length is read size
1250 /// addr: offset in the region
region_read(&self, index: u32, buf: &mut [u8], addr: u64)1251 pub fn region_read(&self, index: u32, buf: &mut [u8], addr: u64) {
1252 let stub: &VfioRegion = self
1253 .regions
1254 .get(index as usize)
1255 .unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {}", index));
1256
1257 let size = buf.len() as u64;
1258 if size > stub.size || addr + size > stub.size {
1259 panic!(
1260 "tried to read VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1261 index, addr, size
1262 );
1263 }
1264
1265 self.dev
1266 .read_exact_at(buf, stub.offset + addr)
1267 .unwrap_or_else(|e| {
1268 panic!(
1269 "failed to read region: index={}, addr=0x{:x}, error={}",
1270 index, addr, e
1271 )
1272 });
1273 }
1274
1275 /// Reads a value from the specified `VfioRegionAddr.addr` + `offset`.
region_read_from_addr<T: DataInit>(&self, addr: &VfioRegionAddr, offset: u64) -> T1276 pub fn region_read_from_addr<T: DataInit>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1277 let mut val = mem::MaybeUninit::zeroed();
1278 // Safe because we have zero-initialized `size_of::<T>()` bytes.
1279 let buf =
1280 unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1281 self.region_read(addr.index, buf, addr.addr + offset);
1282 // Safe because any bit pattern is valid for a type that implements
1283 // DataInit.
1284 unsafe { val.assume_init() }
1285 }
1286
1287 /// write the data from buf into a vfio device region
1288 /// index: region num
1289 /// buf: data src and buf length is write size
1290 /// addr: offset in the region
region_write(&self, index: u32, buf: &[u8], addr: u64)1291 pub fn region_write(&self, index: u32, buf: &[u8], addr: u64) {
1292 let stub: &VfioRegion = self
1293 .regions
1294 .get(index as usize)
1295 .unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {}", index));
1296
1297 let size = buf.len() as u64;
1298 if size > stub.size
1299 || addr + size > stub.size
1300 || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1301 {
1302 panic!(
1303 "tried to write VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1304 index, addr, size
1305 );
1306 }
1307
1308 self.dev
1309 .write_all_at(buf, stub.offset + addr)
1310 .unwrap_or_else(|e| {
1311 panic!(
1312 "failed to write region: index={}, addr=0x{:x}, error={}",
1313 index, addr, e
1314 )
1315 });
1316 }
1317
1318 /// Writes data into the specified `VfioRegionAddr.addr` + `offset`.
region_write_to_addr<T: DataInit>(&self, val: &T, addr: &VfioRegionAddr, offset: u64)1319 pub fn region_write_to_addr<T: DataInit>(&self, val: &T, addr: &VfioRegionAddr, offset: u64) {
1320 self.region_write(addr.index, val.as_slice(), addr.addr + offset);
1321 }
1322
1323 /// get vfio device's descriptors which are passed into minijail process
keep_rds(&self) -> Vec<RawDescriptor>1324 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1325 vec![
1326 self.dev.as_raw_descriptor(),
1327 self.group_descriptor,
1328 self.container.lock().as_raw_descriptor(),
1329 ]
1330 }
1331
1332 /// Add (iova, user_addr) map into vfio container iommu table
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>1333 pub unsafe fn vfio_dma_map(
1334 &self,
1335 iova: u64,
1336 size: u64,
1337 user_addr: u64,
1338 write_en: bool,
1339 ) -> Result<()> {
1340 self.container
1341 .lock()
1342 .vfio_dma_map(iova, size, user_addr, write_en)
1343 }
1344
1345 /// Remove (iova, user_addr) map from vfio container iommu table
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>1346 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1347 self.container.lock().vfio_dma_unmap(iova, size)
1348 }
1349
vfio_get_iommu_page_size_mask(&self) -> Result<u64>1350 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1351 self.container.lock().vfio_get_iommu_page_size_mask()
1352 }
1353
alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64>1354 pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1355 match &self.iova_alloc {
1356 None => Err(VfioError::NoRescAlloc),
1357 Some(iova_alloc) => iova_alloc
1358 .lock()
1359 .allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1360 .map_err(VfioError::Resources),
1361 }
1362 }
1363
1364 /// Gets the vfio device backing `File`.
device_file(&self) -> &File1365 pub fn device_file(&self) -> &File {
1366 &self.dev
1367 }
1368
1369 /// close vfio device
close(&self)1370 pub fn close(&self) {
1371 self.container.lock().remove_group(self.group_id, true);
1372 }
1373 }
1374
1375 pub struct VfioPciConfig {
1376 device: Arc<VfioDevice>,
1377 }
1378
1379 impl VfioPciConfig {
new(device: Arc<VfioDevice>) -> Self1380 pub fn new(device: Arc<VfioDevice>) -> Self {
1381 VfioPciConfig { device }
1382 }
1383
read_config<T: DataInit>(&self, offset: u32) -> T1384 pub fn read_config<T: DataInit>(&self, offset: u32) -> T {
1385 let mut buf = vec![0u8; std::mem::size_of::<T>()];
1386 self.device
1387 .region_read(VFIO_PCI_CONFIG_REGION_INDEX, &mut buf, offset.into());
1388 T::from_slice(&buf)
1389 .copied()
1390 .expect("failed to convert config data from slice")
1391 }
1392
write_config<T: DataInit>(&self, config: T, offset: u32)1393 pub fn write_config<T: DataInit>(&self, config: T, offset: u32) {
1394 self.device.region_write(
1395 VFIO_PCI_CONFIG_REGION_INDEX,
1396 config.as_slice(),
1397 offset.into(),
1398 );
1399 }
1400
1401 /// Set the VFIO device this config refers to as the bus master.
set_bus_master(&self)1402 pub fn set_bus_master(&self) {
1403 /// Constant definitions from `linux/pci_regs.h`.
1404 const PCI_COMMAND: u32 = 0x4;
1405 /// Enable bus mastering
1406 const PCI_COMMAND_MASTER: u16 = 0x4;
1407
1408 let mut cmd: u16 = self.read_config(PCI_COMMAND);
1409
1410 if cmd & PCI_COMMAND_MASTER != 0 {
1411 return;
1412 }
1413
1414 cmd |= PCI_COMMAND_MASTER;
1415
1416 self.write_config(cmd, PCI_COMMAND);
1417 }
1418 }
1419
1420 impl AsRawDescriptor for VfioDevice {
as_raw_descriptor(&self) -> RawDescriptor1421 fn as_raw_descriptor(&self) -> RawDescriptor {
1422 self.dev.as_raw_descriptor()
1423 }
1424 }
1425