1 // Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved.
2 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
3 //
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-Google file.
7
8 //! Traits and structs to control Linux in-kernel vhost drivers.
9 //!
10 //! The initial vhost implementation is a part of the Linux kernel and uses ioctl interface to
11 //! communicate with userspace applications. This sub module provides ioctl based interfaces to
12 //! control the in-kernel net, scsi, vsock vhost drivers.
13
14 use std::mem;
15 use std::os::unix::io::{AsRawFd, RawFd};
16
17 use libc::{c_void, ssize_t, write};
18
19 use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize};
20 use vmm_sys_util::eventfd::EventFd;
21 use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref};
22
23 use super::{
24 Error, Result, VhostAccess, VhostBackend, VhostIotlbBackend, VhostIotlbMsg,
25 VhostIotlbMsgParser, VhostIotlbType, VhostUserDirtyLogRegion, VhostUserMemoryRegionInfo,
26 VringConfigData, VHOST_MAX_MEMORY_REGIONS,
27 };
28
29 pub mod vhost_binding;
30 use self::vhost_binding::*;
31
32 #[cfg(feature = "vhost-net")]
33 pub mod net;
34 #[cfg(feature = "vhost-vdpa")]
35 pub mod vdpa;
36 #[cfg(feature = "vhost-vsock")]
37 pub mod vsock;
38
39 #[inline]
ioctl_result<T>(rc: i32, res: T) -> Result<T>40 fn ioctl_result<T>(rc: i32, res: T) -> Result<T> {
41 if rc < 0 {
42 Err(Error::IoctlError(std::io::Error::last_os_error()))
43 } else {
44 Ok(res)
45 }
46 }
47
48 #[inline]
io_result<T>(rc: isize, res: T) -> Result<T>49 fn io_result<T>(rc: isize, res: T) -> Result<T> {
50 if rc < 0 {
51 Err(Error::IOError(std::io::Error::last_os_error()))
52 } else {
53 Ok(res)
54 }
55 }
56
57 /// Represent an in-kernel vhost device backend.
58 pub trait VhostKernBackend: AsRawFd {
59 /// Associated type to access guest memory.
60 type AS: GuestAddressSpace;
61
62 /// Get the object to access the guest's memory.
mem(&self) -> &Self::AS63 fn mem(&self) -> &Self::AS;
64
65 /// Check whether the ring configuration is valid.
is_valid(&self, config_data: &VringConfigData) -> bool66 fn is_valid(&self, config_data: &VringConfigData) -> bool {
67 let queue_size = config_data.queue_size;
68 if queue_size > config_data.queue_max_size
69 || queue_size == 0
70 || (queue_size & (queue_size - 1)) != 0
71 {
72 return false;
73 }
74
75 let m = self.mem().memory();
76 let desc_table_size = 16 * u64::from(queue_size) as GuestUsize;
77 let avail_ring_size = 6 + 2 * u64::from(queue_size) as GuestUsize;
78 let used_ring_size = 6 + 8 * u64::from(queue_size) as GuestUsize;
79 if GuestAddress(config_data.desc_table_addr)
80 .checked_add(desc_table_size)
81 .map_or(true, |v| !m.address_in_range(v))
82 {
83 return false;
84 }
85 if GuestAddress(config_data.avail_ring_addr)
86 .checked_add(avail_ring_size)
87 .map_or(true, |v| !m.address_in_range(v))
88 {
89 return false;
90 }
91 if GuestAddress(config_data.used_ring_addr)
92 .checked_add(used_ring_size)
93 .map_or(true, |v| !m.address_in_range(v))
94 {
95 return false;
96 }
97
98 config_data.is_log_addr_valid()
99 }
100 }
101
102 impl<T: VhostKernBackend> VhostBackend for T {
103 /// Get a bitmask of supported virtio/vhost features.
get_features(&self) -> Result<u64>104 fn get_features(&self) -> Result<u64> {
105 let mut avail_features: u64 = 0;
106 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
107 let ret = unsafe { ioctl_with_mut_ref(self, VHOST_GET_FEATURES(), &mut avail_features) };
108 ioctl_result(ret, avail_features)
109 }
110
111 /// Inform the vhost subsystem which features to enable. This should be a subset of
112 /// supported features from VHOST_GET_FEATURES.
113 ///
114 /// # Arguments
115 /// * `features` - Bitmask of features to set.
set_features(&self, features: u64) -> Result<()>116 fn set_features(&self, features: u64) -> Result<()> {
117 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
118 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_FEATURES(), &features) };
119 ioctl_result(ret, ())
120 }
121
122 /// Set the current process as the owner of this file descriptor.
123 /// This must be run before any other vhost ioctls.
set_owner(&self) -> Result<()>124 fn set_owner(&self) -> Result<()> {
125 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
126 let ret = unsafe { ioctl(self, VHOST_SET_OWNER()) };
127 ioctl_result(ret, ())
128 }
129
reset_owner(&self) -> Result<()>130 fn reset_owner(&self) -> Result<()> {
131 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
132 let ret = unsafe { ioctl(self, VHOST_RESET_OWNER()) };
133 ioctl_result(ret, ())
134 }
135
136 /// Set the guest memory mappings for vhost to use.
set_mem_table(&self, regions: &[VhostUserMemoryRegionInfo]) -> Result<()>137 fn set_mem_table(&self, regions: &[VhostUserMemoryRegionInfo]) -> Result<()> {
138 if regions.is_empty() || regions.len() > VHOST_MAX_MEMORY_REGIONS {
139 return Err(Error::InvalidGuestMemory);
140 }
141
142 let mut vhost_memory = VhostMemory::new(regions.len() as u16);
143 for (index, region) in regions.iter().enumerate() {
144 vhost_memory.set_region(
145 index as u32,
146 &vhost_memory_region {
147 guest_phys_addr: region.guest_phys_addr,
148 memory_size: region.memory_size,
149 userspace_addr: region.userspace_addr,
150 flags_padding: 0u64,
151 },
152 )?;
153 }
154
155 // SAFETY: This ioctl is called with a pointer that is valid for the lifetime
156 // of this function. The kernel will make its own copy of the memory
157 // tables. As always, check the return value.
158 let ret = unsafe { ioctl_with_ptr(self, VHOST_SET_MEM_TABLE(), vhost_memory.as_ptr()) };
159 ioctl_result(ret, ())
160 }
161
162 /// Set base address for page modification logging.
163 ///
164 /// # Arguments
165 /// * `base` - Base address for page modification logging.
set_log_base(&self, base: u64, region: Option<VhostUserDirtyLogRegion>) -> Result<()>166 fn set_log_base(&self, base: u64, region: Option<VhostUserDirtyLogRegion>) -> Result<()> {
167 if region.is_some() {
168 return Err(Error::LogAddress);
169 }
170
171 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
172 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_LOG_BASE(), &base) };
173 ioctl_result(ret, ())
174 }
175
176 /// Specify an eventfd file descriptor to signal on log write.
set_log_fd(&self, fd: RawFd) -> Result<()>177 fn set_log_fd(&self, fd: RawFd) -> Result<()> {
178 let val: i32 = fd;
179 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
180 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_LOG_FD(), &val) };
181 ioctl_result(ret, ())
182 }
183
184 /// Set the number of descriptors in the vring.
185 ///
186 /// # Arguments
187 /// * `queue_index` - Index of the queue to set descriptor count for.
188 /// * `num` - Number of descriptors in the queue.
set_vring_num(&self, queue_index: usize, num: u16) -> Result<()>189 fn set_vring_num(&self, queue_index: usize, num: u16) -> Result<()> {
190 let vring_state = vhost_vring_state {
191 index: queue_index as u32,
192 num: u32::from(num),
193 };
194
195 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
196 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_NUM(), &vring_state) };
197 ioctl_result(ret, ())
198 }
199
200 /// Set the addresses for a given vring.
201 ///
202 /// # Arguments
203 /// * `queue_index` - Index of the queue to set addresses for.
204 /// * `config_data` - Vring config data, addresses of desc_table, avail_ring
205 /// and used_ring are in the guest address space.
set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()>206 fn set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()> {
207 if !self.is_valid(config_data) {
208 return Err(Error::InvalidQueue);
209 }
210
211 // The addresses are converted into the host address space.
212 let vring_addr = config_data.to_vhost_vring_addr(queue_index, self.mem())?;
213
214 // SAFETY: This ioctl is called on a valid vhost fd and has its
215 // return value checked.
216 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ADDR(), &vring_addr) };
217 ioctl_result(ret, ())
218 }
219
220 /// Set the first index to look for available descriptors.
221 ///
222 /// # Arguments
223 /// * `queue_index` - Index of the queue to modify.
224 /// * `num` - Index where available descriptors start.
set_vring_base(&self, queue_index: usize, base: u16) -> Result<()>225 fn set_vring_base(&self, queue_index: usize, base: u16) -> Result<()> {
226 let vring_state = vhost_vring_state {
227 index: queue_index as u32,
228 num: u32::from(base),
229 };
230
231 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
232 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_BASE(), &vring_state) };
233 ioctl_result(ret, ())
234 }
235
236 /// Get a bitmask of supported virtio/vhost features.
get_vring_base(&self, queue_index: usize) -> Result<u32>237 fn get_vring_base(&self, queue_index: usize) -> Result<u32> {
238 let vring_state = vhost_vring_state {
239 index: queue_index as u32,
240 num: 0,
241 };
242 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
243 let ret = unsafe { ioctl_with_ref(self, VHOST_GET_VRING_BASE(), &vring_state) };
244 ioctl_result(ret, vring_state.num)
245 }
246
247 /// Set the eventfd to trigger when buffers have been used by the host.
248 ///
249 /// # Arguments
250 /// * `queue_index` - Index of the queue to modify.
251 /// * `fd` - EventFd to trigger.
set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<()>252 fn set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
253 let vring_file = vhost_vring_file {
254 index: queue_index as u32,
255 fd: fd.as_raw_fd(),
256 };
257
258 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
259 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_CALL(), &vring_file) };
260 ioctl_result(ret, ())
261 }
262
263 /// Set the eventfd that will be signaled by the guest when buffers are
264 /// available for the host to process.
265 ///
266 /// # Arguments
267 /// * `queue_index` - Index of the queue to modify.
268 /// * `fd` - EventFd that will be signaled from guest.
set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<()>269 fn set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
270 let vring_file = vhost_vring_file {
271 index: queue_index as u32,
272 fd: fd.as_raw_fd(),
273 };
274
275 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
276 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_KICK(), &vring_file) };
277 ioctl_result(ret, ())
278 }
279
280 /// Set the eventfd to signal an error from the vhost backend.
281 ///
282 /// # Arguments
283 /// * `queue_index` - Index of the queue to modify.
284 /// * `fd` - EventFd that will be signaled from the backend.
set_vring_err(&self, queue_index: usize, fd: &EventFd) -> Result<()>285 fn set_vring_err(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
286 let vring_file = vhost_vring_file {
287 index: queue_index as u32,
288 fd: fd.as_raw_fd(),
289 };
290
291 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
292 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ERR(), &vring_file) };
293 ioctl_result(ret, ())
294 }
295 }
296
297 /// Interface to handle in-kernel backend features.
298 pub trait VhostKernFeatures: Sized + AsRawFd {
299 /// Get features acked with the vhost backend.
get_backend_features_acked(&self) -> u64300 fn get_backend_features_acked(&self) -> u64;
301
302 /// Set features acked with the vhost backend.
set_backend_features_acked(&mut self, features: u64)303 fn set_backend_features_acked(&mut self, features: u64);
304
305 /// Get a bitmask of supported vhost backend features.
get_backend_features(&self) -> Result<u64>306 fn get_backend_features(&self) -> Result<u64> {
307 let mut avail_features: u64 = 0;
308
309 let ret =
310 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
311 unsafe { ioctl_with_mut_ref(self, VHOST_GET_BACKEND_FEATURES(), &mut avail_features) };
312 ioctl_result(ret, avail_features)
313 }
314
315 /// Inform the vhost subsystem which backend features to enable. This should
316 /// be a subset of supported features from VHOST_GET_BACKEND_FEATURES.
317 ///
318 /// # Arguments
319 /// * `features` - Bitmask of features to set.
set_backend_features(&mut self, features: u64) -> Result<()>320 fn set_backend_features(&mut self, features: u64) -> Result<()> {
321 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
322 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_BACKEND_FEATURES(), &features) };
323
324 if ret >= 0 {
325 self.set_backend_features_acked(features);
326 }
327
328 ioctl_result(ret, ())
329 }
330 }
331
332 /// Handle IOTLB messeges for in-kernel vhost device backend.
333 impl<I: VhostKernBackend + VhostKernFeatures> VhostIotlbBackend for I {
334 /// Send an IOTLB message to the in-kernel vhost backend.
335 ///
336 /// # Arguments
337 /// * `msg` - IOTLB message to send.
send_iotlb_msg(&self, msg: &VhostIotlbMsg) -> Result<()>338 fn send_iotlb_msg(&self, msg: &VhostIotlbMsg) -> Result<()> {
339 let ret: ssize_t;
340
341 if self.get_backend_features_acked() & (1 << VHOST_BACKEND_F_IOTLB_MSG_V2) != 0 {
342 let mut msg_v2 = vhost_msg_v2 {
343 type_: VHOST_IOTLB_MSG_V2,
344 ..Default::default()
345 };
346
347 msg_v2.__bindgen_anon_1.iotlb.iova = msg.iova;
348 msg_v2.__bindgen_anon_1.iotlb.size = msg.size;
349 msg_v2.__bindgen_anon_1.iotlb.uaddr = msg.userspace_addr;
350 msg_v2.__bindgen_anon_1.iotlb.perm = msg.perm as u8;
351 msg_v2.__bindgen_anon_1.iotlb.type_ = msg.msg_type as u8;
352
353 // SAFETY: This is safe because we are using a valid vhost fd, and
354 // a valid pointer and size to the vhost_msg_v2 structure.
355 ret = unsafe {
356 write(
357 self.as_raw_fd(),
358 &msg_v2 as *const vhost_msg_v2 as *const c_void,
359 mem::size_of::<vhost_msg_v2>(),
360 )
361 };
362 } else {
363 let mut msg_v1 = vhost_msg {
364 type_: VHOST_IOTLB_MSG,
365 ..Default::default()
366 };
367
368 msg_v1.__bindgen_anon_1.iotlb.iova = msg.iova;
369 msg_v1.__bindgen_anon_1.iotlb.size = msg.size;
370 msg_v1.__bindgen_anon_1.iotlb.uaddr = msg.userspace_addr;
371 msg_v1.__bindgen_anon_1.iotlb.perm = msg.perm as u8;
372 msg_v1.__bindgen_anon_1.iotlb.type_ = msg.msg_type as u8;
373
374 // SAFETY: This is safe because we are using a valid vhost fd, and
375 // a valid pointer and size to the vhost_msg structure.
376 ret = unsafe {
377 write(
378 self.as_raw_fd(),
379 &msg_v1 as *const vhost_msg as *const c_void,
380 mem::size_of::<vhost_msg>(),
381 )
382 };
383 }
384
385 io_result(ret, ())
386 }
387 }
388
389 impl VhostIotlbMsgParser for vhost_msg {
parse(&self, msg: &mut VhostIotlbMsg) -> Result<()>390 fn parse(&self, msg: &mut VhostIotlbMsg) -> Result<()> {
391 if self.type_ != VHOST_IOTLB_MSG {
392 return Err(Error::InvalidIotlbMsg);
393 }
394
395 // SAFETY: We trust the kernel to return a structure with the union
396 // fields properly initialized. We are sure it is a vhost_msg, because
397 // we checked that `self.type_` is VHOST_IOTLB_MSG.
398 unsafe {
399 if self.__bindgen_anon_1.iotlb.type_ == 0 {
400 return Err(Error::InvalidIotlbMsg);
401 }
402
403 msg.iova = self.__bindgen_anon_1.iotlb.iova;
404 msg.size = self.__bindgen_anon_1.iotlb.size;
405 msg.userspace_addr = self.__bindgen_anon_1.iotlb.uaddr;
406 msg.perm = mem::transmute::<u8, VhostAccess>(self.__bindgen_anon_1.iotlb.perm);
407 msg.msg_type = mem::transmute::<u8, VhostIotlbType>(self.__bindgen_anon_1.iotlb.type_);
408 }
409
410 Ok(())
411 }
412 }
413
414 impl VhostIotlbMsgParser for vhost_msg_v2 {
parse(&self, msg: &mut VhostIotlbMsg) -> Result<()>415 fn parse(&self, msg: &mut VhostIotlbMsg) -> Result<()> {
416 if self.type_ != VHOST_IOTLB_MSG_V2 {
417 return Err(Error::InvalidIotlbMsg);
418 }
419
420 // SAFETY: We trust the kernel to return a structure with the union
421 // fields properly initialized. We are sure it is a vhost_msg_v2, because
422 // we checked that `self.type_` is VHOST_IOTLB_MSG_V2.
423 unsafe {
424 if self.__bindgen_anon_1.iotlb.type_ == 0 {
425 return Err(Error::InvalidIotlbMsg);
426 }
427
428 msg.iova = self.__bindgen_anon_1.iotlb.iova;
429 msg.size = self.__bindgen_anon_1.iotlb.size;
430 msg.userspace_addr = self.__bindgen_anon_1.iotlb.uaddr;
431 msg.perm = mem::transmute::<u8, VhostAccess>(self.__bindgen_anon_1.iotlb.perm);
432 msg.msg_type = mem::transmute::<u8, VhostIotlbType>(self.__bindgen_anon_1.iotlb.type_);
433 }
434
435 Ok(())
436 }
437 }
438
439 impl VringConfigData {
440 /// Convert the config (guest address space) into vhost_vring_addr
441 /// (host address space).
to_vhost_vring_addr<AS: GuestAddressSpace>( &self, queue_index: usize, mem: &AS, ) -> Result<vhost_vring_addr>442 pub fn to_vhost_vring_addr<AS: GuestAddressSpace>(
443 &self,
444 queue_index: usize,
445 mem: &AS,
446 ) -> Result<vhost_vring_addr> {
447 let desc_addr = mem
448 .memory()
449 .get_host_address(GuestAddress(self.desc_table_addr))
450 .map_err(|_| Error::DescriptorTableAddress)?;
451 let avail_addr = mem
452 .memory()
453 .get_host_address(GuestAddress(self.avail_ring_addr))
454 .map_err(|_| Error::AvailAddress)?;
455 let used_addr = mem
456 .memory()
457 .get_host_address(GuestAddress(self.used_ring_addr))
458 .map_err(|_| Error::UsedAddress)?;
459 Ok(vhost_vring_addr {
460 index: queue_index as u32,
461 flags: self.flags,
462 desc_user_addr: desc_addr as u64,
463 used_user_addr: used_addr as u64,
464 avail_user_addr: avail_addr as u64,
465 log_guest_addr: self.get_log_addr(),
466 })
467 }
468 }
469