• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Alibaba Cloud. All rights reserved.
2 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
3 
4 //! Virtio Vhost Backend Drivers
5 //!
6 //! Virtio devices use virtqueues to transport data efficiently. The first generation of virtqueue
7 //! is a set of three different single-producer, single-consumer ring structures designed to store
8 //! generic scatter-gather I/O. The virtio specification 1.1 introduces an alternative compact
9 //! virtqueue layout named "Packed Virtqueue", which is more friendly to memory cache system and
10 //! hardware implemented virtio devices. The packed virtqueue uses read-write memory, that means
11 //! the memory will be both read and written by both host and guest. The new Packed Virtqueue is
12 //! preferred for performance.
13 //!
14 //! Vhost is a mechanism to improve performance of Virtio devices by delegate data plane operations
15 //! to dedicated IO service processes. Only the configuration, I/O submission notification, and I/O
16 //! completion interruption are piped through the hypervisor.
17 //! It uses the same virtqueue layout as Virtio to allow Vhost devices to be mapped directly to
18 //! Virtio devices. This allows a Vhost device to be accessed directly by a guest OS inside a
19 //! hypervisor process with an existing Virtio (PCI) driver.
20 //!
21 //! The initial vhost implementation is a part of the Linux kernel and uses ioctl interface to
22 //! communicate with userspace applications. Dedicated kernel worker threads are created to handle
23 //! IO requests from the guest.
24 //!
25 //! Later Vhost-user protocol is introduced to complement the ioctl interface used to control the
26 //! vhost implementation in the Linux kernel. It implements the control plane needed to establish
27 //! virtqueues sharing with a user space process on the same host. It uses communication over a
28 //! Unix domain socket to share file descriptors in the ancillary data of the message.
29 //! The protocol defines 2 sides of the communication, master and slave. Master is the application
30 //! that shares its virtqueues. Slave is the consumer of the virtqueues. Master and slave can be
31 //! either a client (i.e. connecting) or server (listening) in the socket communication.
32 
33 #![deny(missing_docs)]
34 
35 #[cfg(any(feature = "vmm", feature = "device"))]
36 use std::fs::File;
37 use std::io::Error as IOError;
38 
39 use remain::sorted;
40 use thiserror::Error as ThisError;
41 
42 mod backend;
43 pub use backend::*;
44 
45 pub mod message;
46 
47 pub mod connection;
48 
49 mod sys;
50 pub use sys::{SystemStream, *};
51 
52 cfg_if::cfg_if! {
53     if #[cfg(feature = "vmm")] {
54         pub(crate) mod master;
55         pub use self::master::{Master, VhostUserMaster};
56         mod master_req_handler;
57         pub use self::master_req_handler::{VhostUserMasterReqHandler,
58                                     VhostUserMasterReqHandlerMut};
59     }
60 }
61 cfg_if::cfg_if! {
62     if #[cfg(feature = "device")] {
63         mod slave_req_handler;
64         mod slave_fs_cache;
65         pub use self::slave_req_handler::{
66             Protocol, SlaveReqHandler, SlaveReqHelper, VhostUserSlaveReqHandler,
67             VhostUserSlaveReqHandlerMut,
68         };
69         pub use self::slave_fs_cache::SlaveFsCacheReq;
70     }
71 }
72 cfg_if::cfg_if! {
73     if #[cfg(all(feature = "device", unix))] {
74         mod slave;
75         pub use self::slave::SlaveListener;
76     }
77 }
78 cfg_if::cfg_if! {
79     if #[cfg(all(feature = "vmm", unix))] {
80         pub use self::master_req_handler::MasterReqHandler;
81     }
82 }
83 
84 /// Errors for vhost-user operations
85 #[sorted]
86 #[derive(Debug, ThisError)]
87 pub enum Error {
88     /// client exited properly.
89     #[error("client exited properly")]
90     ClientExit,
91     /// client disconnected.
92     /// If connection is closed properly, use `ClientExit` instead.
93     #[error("client closed the connection")]
94     Disconnect,
95     /// Virtio/protocol features mismatch.
96     #[error("virtio features mismatch")]
97     FeatureMismatch,
98     /// Fd array in question is too big or too small
99     #[error("wrong number of attached fds")]
100     IncorrectFds,
101     /// Invalid message format, flag or content.
102     #[error("invalid message")]
103     InvalidMessage,
104     /// Unsupported operations due to that the protocol feature hasn't been negotiated.
105     #[error("invalid operation")]
106     InvalidOperation,
107     /// Invalid parameters.
108     #[error("invalid parameters")]
109     InvalidParam,
110     /// Failure from the master side.
111     #[error("master Internal error")]
112     MasterInternalError,
113     /// Message is too large
114     #[error("oversized message")]
115     OversizedMsg,
116     /// Only part of a message have been sent or received successfully
117     #[error("partial message")]
118     PartialMessage,
119     /// Provided recv buffer was too small, and data was dropped.
120     #[error("buffer for recv was too small, data was dropped: got size {got}, needed {want}")]
121     RecvBufferTooSmall {
122         /// The size of the buffer received.
123         got: usize,
124         /// The expected size of the buffer.
125         want: usize,
126     },
127     /// Error from request handler
128     #[error("handler failed to handle request: {0}")]
129     ReqHandlerError(IOError),
130     /// Failure from the slave side.
131     #[error("slave internal error")]
132     SlaveInternalError,
133     /// The socket is broken or has been closed.
134     #[error("socket is broken: {0}")]
135     SocketBroken(std::io::Error),
136     /// Can't connect to peer.
137     #[error("can't connect to peer: {0}")]
138     SocketConnect(std::io::Error),
139     /// Generic socket errors.
140     #[error("socket error: {0}")]
141     SocketError(std::io::Error),
142     /// Should retry the socket operation again.
143     #[error("temporary socket error: {0}")]
144     SocketRetry(std::io::Error),
145     /// Error from tx/rx on a Tube.
146     #[error("failed to read/write on Tube: {0}")]
147     TubeError(base::TubeError),
148     /// Error from VFIO device.
149     #[error("error occurred in VFIO device: {0}")]
150     VfioDeviceError(anyhow::Error),
151 }
152 
153 impl std::convert::From<base::Error> for Error {
154     /// Convert raw socket errors into meaningful vhost-user errors.
155     ///
156     /// The base::Error is a simple wrapper over the raw errno, which doesn't means
157     /// much to the vhost-user connection manager. So convert it into meaningful errors to simplify
158     /// the connection manager logic.
159     ///
160     /// # Return:
161     /// * - Error::SocketRetry: temporary error caused by signals or short of resources.
162     /// * - Error::SocketBroken: the underline socket is broken.
163     /// * - Error::SocketError: other socket related errors.
164     #[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux
from(err: base::Error) -> Self165     fn from(err: base::Error) -> Self {
166         match err.errno() {
167             // Retry:
168             // * EAGAIN, EWOULDBLOCK: The socket is marked nonblocking and the requested operation
169             //   would block.
170             // * EINTR: A signal occurred before any data was transmitted
171             // * ENOBUFS: The  output  queue  for  a network interface was full.  This generally
172             //   indicates that the interface has stopped sending, but may be caused by transient
173             //   congestion.
174             // * ENOMEM: No memory available.
175             libc::EAGAIN | libc::EWOULDBLOCK | libc::EINTR | libc::ENOBUFS | libc::ENOMEM => {
176                 Error::SocketRetry(err.into())
177             }
178             // Broken:
179             // * ECONNRESET: Connection reset by peer.
180             // * EPIPE: The local end has been shut down on a connection oriented socket. In this
181             //   case the process will also receive a SIGPIPE unless MSG_NOSIGNAL is set.
182             libc::ECONNRESET | libc::EPIPE => Error::SocketBroken(err.into()),
183             // Write permission is denied on the destination socket file, or search permission is
184             // denied for one of the directories the path prefix.
185             libc::EACCES => Error::SocketConnect(IOError::from_raw_os_error(libc::EACCES)),
186             // Catch all other errors
187             e => Error::SocketError(IOError::from_raw_os_error(e)),
188         }
189     }
190 }
191 
192 /// Result of vhost-user operations
193 pub type Result<T> = std::result::Result<T, Error>;
194 
195 /// Result of request handler.
196 pub type HandlerResult<T> = std::result::Result<T, IOError>;
197 
198 /// Utility function to take the first element from option of a vector of files.
199 /// Returns `None` if the vector contains no file or more than one file.
200 #[cfg(any(feature = "vmm", feature = "device"))]
take_single_file(files: Option<Vec<File>>) -> Option<File>201 pub(crate) fn take_single_file(files: Option<Vec<File>>) -> Option<File> {
202     let mut files = files?;
203     if files.len() != 1 {
204         return None;
205     }
206     Some(files.swap_remove(0))
207 }
208 
209 #[cfg(all(test, feature = "device"))]
210 mod dummy_slave;
211 
212 #[cfg(all(test, feature = "vmm", feature = "device"))]
213 mod tests {
214     use base::AsRawDescriptor;
215     use std::sync::{Arc, Barrier, Mutex};
216     use std::thread;
217 
218     use super::connection::tests::*;
219     use super::dummy_slave::{DummySlaveReqHandler, VIRTIO_FEATURES};
220     use super::message::*;
221     use super::*;
222     use crate::backend::VhostBackend;
223     use crate::{VhostUserMemoryRegionInfo, VringConfigData};
224     use tempfile::tempfile;
225 
226     #[test]
create_dummy_slave()227     fn create_dummy_slave() {
228         let slave = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
229 
230         slave.set_owner().unwrap();
231         assert!(slave.set_owner().is_err());
232     }
233 
234     #[test]
test_set_owner()235     fn test_set_owner() {
236         let slave_be = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
237         let (master, mut slave) = create_master_slave_pair(slave_be.clone());
238 
239         assert!(!slave_be.lock().unwrap().owned);
240         master.set_owner().unwrap();
241         slave.handle_request().unwrap();
242         assert!(slave_be.lock().unwrap().owned);
243         master.set_owner().unwrap();
244         assert!(slave.handle_request().is_err());
245         assert!(slave_be.lock().unwrap().owned);
246     }
247 
248     #[test]
test_set_features()249     fn test_set_features() {
250         let mbar = Arc::new(Barrier::new(2));
251         let sbar = mbar.clone();
252         let slave_be = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
253         let (mut master, mut slave) = create_master_slave_pair(slave_be.clone());
254 
255         thread::spawn(move || {
256             slave.handle_request().unwrap();
257             assert!(slave_be.lock().unwrap().owned);
258 
259             slave.handle_request().unwrap();
260             slave.handle_request().unwrap();
261             assert_eq!(
262                 slave_be.lock().unwrap().acked_features,
263                 VIRTIO_FEATURES & !0x1
264             );
265 
266             slave.handle_request().unwrap();
267             slave.handle_request().unwrap();
268             assert_eq!(
269                 slave_be.lock().unwrap().acked_protocol_features,
270                 VhostUserProtocolFeatures::all().bits()
271             );
272 
273             sbar.wait();
274         });
275 
276         master.set_owner().unwrap();
277 
278         // set virtio features
279         let features = master.get_features().unwrap();
280         assert_eq!(features, VIRTIO_FEATURES);
281         master.set_features(VIRTIO_FEATURES & !0x1).unwrap();
282 
283         // set vhost protocol features
284         let features = master.get_protocol_features().unwrap();
285         assert_eq!(features.bits(), VhostUserProtocolFeatures::all().bits());
286         master.set_protocol_features(features).unwrap();
287 
288         mbar.wait();
289     }
290 
291     #[test]
test_master_slave_process()292     fn test_master_slave_process() {
293         let mbar = Arc::new(Barrier::new(2));
294         let sbar = mbar.clone();
295         let slave_be = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
296         let (mut master, mut slave) = create_master_slave_pair(slave_be.clone());
297 
298         thread::spawn(move || {
299             // set_own()
300             slave.handle_request().unwrap();
301             assert!(slave_be.lock().unwrap().owned);
302 
303             // get/set_features()
304             slave.handle_request().unwrap();
305             slave.handle_request().unwrap();
306             assert_eq!(
307                 slave_be.lock().unwrap().acked_features,
308                 VIRTIO_FEATURES & !0x1
309             );
310 
311             slave.handle_request().unwrap();
312             slave.handle_request().unwrap();
313             assert_eq!(
314                 slave_be.lock().unwrap().acked_protocol_features,
315                 VhostUserProtocolFeatures::all().bits()
316             );
317 
318             // get_inflight_fd()
319             slave.handle_request().unwrap();
320             // set_inflight_fd()
321             slave.handle_request().unwrap();
322 
323             // get_queue_num()
324             slave.handle_request().unwrap();
325 
326             // set_mem_table()
327             slave.handle_request().unwrap();
328 
329             // get/set_config()
330             slave.handle_request().unwrap();
331             slave.handle_request().unwrap();
332 
333             // set_slave_request_rd isn't implemented on Windows.
334             #[cfg(unix)]
335             {
336                 // set_slave_request_fd
337                 slave.handle_request().unwrap();
338             }
339 
340             // set_vring_enable
341             slave.handle_request().unwrap();
342 
343             // set_log_base,set_log_fd()
344             slave.handle_request().unwrap_err();
345             slave.handle_request().unwrap_err();
346 
347             // set_vring_xxx
348             slave.handle_request().unwrap();
349             slave.handle_request().unwrap();
350             slave.handle_request().unwrap();
351             slave.handle_request().unwrap();
352             slave.handle_request().unwrap();
353             slave.handle_request().unwrap();
354 
355             // get_max_mem_slots()
356             slave.handle_request().unwrap();
357 
358             // add_mem_region()
359             slave.handle_request().unwrap();
360 
361             // remove_mem_region()
362             slave.handle_request().unwrap();
363 
364             sbar.wait();
365         });
366 
367         master.set_owner().unwrap();
368 
369         // set virtio features
370         let features = master.get_features().unwrap();
371         assert_eq!(features, VIRTIO_FEATURES);
372         master.set_features(VIRTIO_FEATURES & !0x1).unwrap();
373 
374         // set vhost protocol features
375         let features = master.get_protocol_features().unwrap();
376         assert_eq!(features.bits(), VhostUserProtocolFeatures::all().bits());
377         master.set_protocol_features(features).unwrap();
378 
379         // Retrieve inflight I/O tracking information
380         let (inflight_info, inflight_file) = master
381             .get_inflight_fd(&VhostUserInflight {
382                 num_queues: 2,
383                 queue_size: 256,
384                 ..Default::default()
385             })
386             .unwrap();
387         // Set the buffer back to the backend
388         master
389             .set_inflight_fd(&inflight_info, inflight_file.as_raw_descriptor())
390             .unwrap();
391 
392         let num = master.get_queue_num().unwrap();
393         assert_eq!(num, 2);
394 
395         let event = base::Event::new().unwrap();
396         let mem = [VhostUserMemoryRegionInfo {
397             guest_phys_addr: 0,
398             memory_size: 0x10_0000,
399             userspace_addr: 0,
400             mmap_offset: 0,
401             mmap_handle: event.as_raw_descriptor(),
402         }];
403         master.set_mem_table(&mem).unwrap();
404 
405         master
406             .set_config(0x100, VhostUserConfigFlags::WRITABLE, &[0xa5u8])
407             .unwrap();
408         let buf = [0x0u8; 4];
409         let (reply_body, reply_payload) = master
410             .get_config(0x100, 4, VhostUserConfigFlags::empty(), &buf)
411             .unwrap();
412         let offset = reply_body.offset;
413         assert_eq!(offset, 0x100);
414         assert_eq!(reply_payload[0], 0xa5);
415 
416         // slave request rds are not implemented on Windows.
417         #[cfg(unix)]
418         {
419             master
420                 .set_slave_request_fd(&event as &dyn AsRawDescriptor)
421                 .unwrap();
422         }
423         master.set_vring_enable(0, true).unwrap();
424 
425         // unimplemented yet
426         master
427             .set_log_base(0, Some(event.as_raw_descriptor()))
428             .unwrap();
429         master.set_log_fd(event.as_raw_descriptor()).unwrap();
430 
431         master.set_vring_num(0, 256).unwrap();
432         master.set_vring_base(0, 0).unwrap();
433         let config = VringConfigData {
434             queue_max_size: 256,
435             queue_size: 128,
436             flags: VhostUserVringAddrFlags::VHOST_VRING_F_LOG.bits(),
437             desc_table_addr: 0x1000,
438             used_ring_addr: 0x2000,
439             avail_ring_addr: 0x3000,
440             log_addr: Some(0x4000),
441         };
442         master.set_vring_addr(0, &config).unwrap();
443         master.set_vring_call(0, &event).unwrap();
444         master.set_vring_kick(0, &event).unwrap();
445         master.set_vring_err(0, &event).unwrap();
446 
447         let max_mem_slots = master.get_max_mem_slots().unwrap();
448         assert_eq!(max_mem_slots, 32);
449 
450         let region_file = tempfile().unwrap();
451         let region = VhostUserMemoryRegionInfo {
452             guest_phys_addr: 0x10_0000,
453             memory_size: 0x10_0000,
454             userspace_addr: 0,
455             mmap_offset: 0,
456             mmap_handle: region_file.as_raw_descriptor(),
457         };
458         master.add_mem_region(&region).unwrap();
459 
460         master.remove_mem_region(&region).unwrap();
461 
462         mbar.wait();
463     }
464 
465     #[test]
test_error_display()466     fn test_error_display() {
467         assert_eq!(format!("{}", Error::InvalidParam), "invalid parameters");
468         assert_eq!(format!("{}", Error::InvalidOperation), "invalid operation");
469     }
470 
471     #[test]
test_error_from_base_error()472     fn test_error_from_base_error() {
473         let e: Error = base::Error::new(libc::EAGAIN).into();
474         if let Error::SocketRetry(e1) = e {
475             assert_eq!(e1.raw_os_error().unwrap(), libc::EAGAIN);
476         } else {
477             panic!("invalid error code conversion!");
478         }
479     }
480 }
481