• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Alibaba Cloud. All rights reserved.
2 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
3 
4 //! Virtio Vhost Backend Drivers
5 //!
6 //! Virtio devices use virtqueues to transport data efficiently. The first generation of virtqueue
7 //! is a set of three different single-producer, single-consumer ring structures designed to store
8 //! generic scatter-gather I/O. The virtio specification 1.1 introduces an alternative compact
9 //! virtqueue layout named "Packed Virtqueue", which is more friendly to memory cache system and
10 //! hardware implemented virtio devices. The packed virtqueue uses read-write memory, that means
11 //! the memory will be both read and written by both host and guest. The new Packed Virtqueue is
12 //! preferred for performance.
13 //!
14 //! Vhost is a mechanism to improve performance of Virtio devices by delegate data plane operations
15 //! to dedicated IO service processes. Only the configuration, I/O submission notification, and I/O
16 //! completion interruption are piped through the hypervisor.
17 //! It uses the same virtqueue layout as Virtio to allow Vhost devices to be mapped directly to
18 //! Virtio devices. This allows a Vhost device to be accessed directly by a guest OS inside a
19 //! hypervisor process with an existing Virtio (PCI) driver.
20 //!
21 //! The initial vhost implementation is a part of the Linux kernel and uses ioctl interface to
22 //! communicate with userspace applications. Dedicated kernel worker threads are created to handle
23 //! IO requests from the guest.
24 //!
25 //! Later Vhost-user protocol is introduced to complement the ioctl interface used to control the
26 //! vhost implementation in the Linux kernel. It implements the control plane needed to establish
27 //! virtqueues sharing with a user space process on the same host. It uses communication over a
28 //! Unix domain socket to share file descriptors in the ancillary data of the message.
29 //! The protocol defines 2 sides of the communication, master and slave. Master is the application
30 //! that shares its virtqueues. Slave is the consumer of the virtqueues. Master and slave can be
31 //! either a client (i.e. connecting) or server (listening) in the socket communication.
32 
33 #![deny(missing_docs)]
34 
35 use std::fs::File;
36 use std::io::Error as IOError;
37 
38 use remain::sorted;
39 use thiserror::Error as ThisError;
40 
41 mod backend;
42 pub use backend::*;
43 
44 pub mod message;
45 
46 pub mod connection;
47 
48 mod sys;
49 pub use sys::SystemStream;
50 pub use sys::*;
51 
52 cfg_if::cfg_if! {
53     if #[cfg(feature = "vmm")] {
54         pub(crate) mod master;
55         pub use self::master::{Master, VhostUserMaster};
56         mod master_req_handler;
57         pub use self::master_req_handler::{VhostUserMasterReqHandler,
58                                     VhostUserMasterReqHandlerMut};
59     }
60 }
61 cfg_if::cfg_if! {
62     if #[cfg(feature = "device")] {
63         mod slave_req_handler;
64         mod slave_proxy;
65         pub use self::slave_req_handler::{
66             Protocol, SlaveReqHandler, SlaveReqHelper, VhostUserSlaveReqHandler,
67             VhostUserSlaveReqHandlerMut,
68         };
69         pub use self::slave_proxy::Slave;
70     }
71 }
72 cfg_if::cfg_if! {
73     if #[cfg(all(feature = "device", unix))] {
74         mod slave;
75         pub use self::slave::SlaveListener;
76     }
77 }
78 cfg_if::cfg_if! {
79     if #[cfg(feature = "vmm")] {
80         pub use self::master_req_handler::MasterReqHandler;
81     }
82 }
83 
84 /// Errors for vhost-user operations
85 #[sorted]
86 #[derive(Debug, ThisError)]
87 pub enum Error {
88     /// client exited properly.
89     #[error("client exited properly")]
90     ClientExit,
91     /// client disconnected.
92     /// If connection is closed properly, use `ClientExit` instead.
93     #[error("client closed the connection")]
94     Disconnect,
95     /// Virtio/protocol features mismatch.
96     #[error("virtio features mismatch")]
97     FeatureMismatch,
98     /// Fd array in question is too big or too small
99     #[error("wrong number of attached fds")]
100     IncorrectFds,
101     /// Invalid message format, flag or content.
102     #[error("invalid message")]
103     InvalidMessage,
104     /// Unsupported operations due to that the protocol feature hasn't been negotiated.
105     #[error("invalid operation")]
106     InvalidOperation,
107     /// Invalid parameters.
108     #[error("invalid parameters")]
109     InvalidParam,
110     /// Failure from the master side.
111     #[error("master Internal error")]
112     MasterInternalError,
113     /// Message is too large
114     #[error("oversized message")]
115     OversizedMsg,
116     /// Only part of a message have been sent or received successfully
117     #[error("partial message")]
118     PartialMessage,
119     /// Provided recv buffer was too small, and data was dropped.
120     #[error("buffer for recv was too small, data was dropped: got size {got}, needed {want}")]
121     RecvBufferTooSmall {
122         /// The size of the buffer received.
123         got: usize,
124         /// The expected size of the buffer.
125         want: usize,
126     },
127     /// Error from request handler
128     #[error("handler failed to handle request: {0}")]
129     ReqHandlerError(IOError),
130     /// Failure from the slave side.
131     #[error("slave internal error")]
132     SlaveInternalError,
133     /// The socket is broken or has been closed.
134     #[error("socket is broken: {0}")]
135     SocketBroken(std::io::Error),
136     /// Can't connect to peer.
137     #[error("can't connect to peer: {0}")]
138     SocketConnect(std::io::Error),
139     /// Generic socket errors.
140     #[error("socket error: {0}")]
141     SocketError(std::io::Error),
142     /// Should retry the socket operation again.
143     #[error("temporary socket error: {0}")]
144     SocketRetry(std::io::Error),
145     /// Error from tx/rx on a Tube.
146     #[error("failed to read/write on Tube: {0}")]
147     TubeError(base::TubeError),
148     /// Error from VFIO device.
149     #[error("error occurred in VFIO device: {0}")]
150     VfioDeviceError(anyhow::Error),
151 }
152 
153 impl From<base::TubeError> for Error {
from(err: base::TubeError) -> Self154     fn from(err: base::TubeError) -> Self {
155         Error::TubeError(err)
156     }
157 }
158 
159 impl From<std::io::Error> for Error {
from(err: std::io::Error) -> Self160     fn from(err: std::io::Error) -> Self {
161         Error::SocketError(err)
162     }
163 }
164 
165 impl From<base::Error> for Error {
166     /// Convert raw socket errors into meaningful vhost-user errors.
167     ///
168     /// The base::Error is a simple wrapper over the raw errno, which doesn't means
169     /// much to the vhost-user connection manager. So convert it into meaningful errors to simplify
170     /// the connection manager logic.
171     ///
172     /// # Return:
173     /// * - Error::SocketRetry: temporary error caused by signals or short of resources.
174     /// * - Error::SocketBroken: the underline socket is broken.
175     /// * - Error::SocketError: other socket related errors.
176     #[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux
from(err: base::Error) -> Self177     fn from(err: base::Error) -> Self {
178         match err.errno() {
179             // Retry:
180             // * EAGAIN, EWOULDBLOCK: The socket is marked nonblocking and the requested operation
181             //   would block.
182             // * EINTR: A signal occurred before any data was transmitted
183             // * ENOBUFS: The  output  queue  for  a network interface was full.  This generally
184             //   indicates that the interface has stopped sending, but may be caused by transient
185             //   congestion.
186             // * ENOMEM: No memory available.
187             libc::EAGAIN | libc::EWOULDBLOCK | libc::EINTR | libc::ENOBUFS | libc::ENOMEM => {
188                 Error::SocketRetry(err.into())
189             }
190             // Broken:
191             // * ECONNRESET: Connection reset by peer.
192             // * EPIPE: The local end has been shut down on a connection oriented socket. In this
193             //   case the process will also receive a SIGPIPE unless MSG_NOSIGNAL is set.
194             libc::ECONNRESET | libc::EPIPE => Error::SocketBroken(err.into()),
195             // Write permission is denied on the destination socket file, or search permission is
196             // denied for one of the directories the path prefix.
197             libc::EACCES => Error::SocketConnect(IOError::from_raw_os_error(libc::EACCES)),
198             // Catch all other errors
199             e => Error::SocketError(IOError::from_raw_os_error(e)),
200         }
201     }
202 }
203 
204 /// Result of vhost-user operations
205 pub type Result<T> = std::result::Result<T, Error>;
206 
207 /// Result of request handler.
208 pub type HandlerResult<T> = std::result::Result<T, IOError>;
209 
210 /// Utility function to take the first element from option of a vector of files.
211 /// Returns `None` if the vector contains no file or more than one file.
take_single_file(files: Option<Vec<File>>) -> Option<File>212 pub(crate) fn take_single_file(files: Option<Vec<File>>) -> Option<File> {
213     let mut files = files?;
214     if files.len() != 1 {
215         return None;
216     }
217     Some(files.swap_remove(0))
218 }
219 
220 #[cfg(all(test, feature = "device"))]
221 mod dummy_slave;
222 
223 #[cfg(all(test, feature = "vmm", feature = "device"))]
224 mod tests {
225     use std::sync::Arc;
226     use std::sync::Barrier;
227     use std::sync::Mutex;
228     use std::thread;
229 
230     use base::AsRawDescriptor;
231     use tempfile::tempfile;
232 
233     use super::*;
234     use crate::backend::VhostBackend;
235     use crate::connection::tests::*;
236     use crate::dummy_slave::DummySlaveReqHandler;
237     use crate::dummy_slave::VIRTIO_FEATURES;
238     use crate::message::*;
239     use crate::VhostUserMemoryRegionInfo;
240     use crate::VringConfigData;
241 
242     /// Utility function to process a header and a message together.
handle_request( h: &mut SlaveReqHandler<Mutex<DummySlaveReqHandler>, MasterReqEndpoint>, ) -> Result<()>243     fn handle_request(
244         h: &mut SlaveReqHandler<Mutex<DummySlaveReqHandler>, MasterReqEndpoint>,
245     ) -> Result<()> {
246         // We assume that a header comes together with message body in tests so we don't wait before
247         // calling `process_message()`.
248         let (hdr, files) = h.recv_header()?;
249         h.process_message(hdr, files)
250     }
251 
252     #[test]
create_dummy_slave()253     fn create_dummy_slave() {
254         let slave = Mutex::new(DummySlaveReqHandler::new());
255 
256         slave.set_owner().unwrap();
257         assert!(slave.set_owner().is_err());
258     }
259 
260     #[test]
test_set_owner()261     fn test_set_owner() {
262         let slave_be = Mutex::new(DummySlaveReqHandler::new());
263         let (master, mut slave) = create_master_slave_pair(slave_be);
264 
265         assert!(!slave.as_ref().lock().unwrap().owned);
266         master.set_owner().unwrap();
267         handle_request(&mut slave).unwrap();
268         assert!(slave.as_ref().lock().unwrap().owned);
269         master.set_owner().unwrap();
270         assert!(handle_request(&mut slave).is_err());
271         assert!(slave.as_ref().lock().unwrap().owned);
272     }
273 
274     #[test]
test_set_features()275     fn test_set_features() {
276         let mbar = Arc::new(Barrier::new(2));
277         let sbar = mbar.clone();
278         let slave_be = Mutex::new(DummySlaveReqHandler::new());
279         let (mut master, mut slave) = create_master_slave_pair(slave_be);
280 
281         thread::spawn(move || {
282             handle_request(&mut slave).unwrap();
283             assert!(slave.as_ref().lock().unwrap().owned);
284 
285             handle_request(&mut slave).unwrap();
286             handle_request(&mut slave).unwrap();
287             assert_eq!(
288                 slave.as_ref().lock().unwrap().acked_features,
289                 VIRTIO_FEATURES & !0x1
290             );
291 
292             handle_request(&mut slave).unwrap();
293             handle_request(&mut slave).unwrap();
294             assert_eq!(
295                 slave.as_ref().lock().unwrap().acked_protocol_features,
296                 VhostUserProtocolFeatures::all().bits()
297             );
298 
299             sbar.wait();
300         });
301 
302         master.set_owner().unwrap();
303 
304         // set virtio features
305         let features = master.get_features().unwrap();
306         assert_eq!(features, VIRTIO_FEATURES);
307         master.set_features(VIRTIO_FEATURES & !0x1).unwrap();
308 
309         // set vhost protocol features
310         let features = master.get_protocol_features().unwrap();
311         assert_eq!(features.bits(), VhostUserProtocolFeatures::all().bits());
312         master.set_protocol_features(features).unwrap();
313 
314         mbar.wait();
315     }
316 
317     #[test]
test_master_slave_process()318     fn test_master_slave_process() {
319         let mbar = Arc::new(Barrier::new(2));
320         let sbar = mbar.clone();
321         let slave_be = Mutex::new(DummySlaveReqHandler::new());
322         let (mut master, mut slave) = create_master_slave_pair(slave_be);
323 
324         thread::spawn(move || {
325             // set_own()
326             handle_request(&mut slave).unwrap();
327             assert!(slave.as_ref().lock().unwrap().owned);
328 
329             // get/set_features()
330             handle_request(&mut slave).unwrap();
331             handle_request(&mut slave).unwrap();
332             assert_eq!(
333                 slave.as_ref().lock().unwrap().acked_features,
334                 VIRTIO_FEATURES & !0x1
335             );
336 
337             handle_request(&mut slave).unwrap();
338             handle_request(&mut slave).unwrap();
339             assert_eq!(
340                 slave.as_ref().lock().unwrap().acked_protocol_features,
341                 VhostUserProtocolFeatures::all().bits()
342             );
343 
344             // get_inflight_fd()
345             handle_request(&mut slave).unwrap();
346             // set_inflight_fd()
347             handle_request(&mut slave).unwrap();
348 
349             // get_queue_num()
350             handle_request(&mut slave).unwrap();
351 
352             // set_mem_table()
353             handle_request(&mut slave).unwrap();
354 
355             // get/set_config()
356             handle_request(&mut slave).unwrap();
357             handle_request(&mut slave).unwrap();
358 
359             // set_slave_request_fd
360             handle_request(&mut slave).unwrap();
361 
362             // set_vring_enable
363             handle_request(&mut slave).unwrap();
364 
365             // set_log_base,set_log_fd()
366             handle_request(&mut slave).unwrap_err();
367             handle_request(&mut slave).unwrap_err();
368 
369             // set_vring_xxx
370             handle_request(&mut slave).unwrap();
371             handle_request(&mut slave).unwrap();
372             handle_request(&mut slave).unwrap();
373             handle_request(&mut slave).unwrap();
374             handle_request(&mut slave).unwrap();
375             handle_request(&mut slave).unwrap();
376 
377             // get_max_mem_slots()
378             handle_request(&mut slave).unwrap();
379 
380             // add_mem_region()
381             handle_request(&mut slave).unwrap();
382 
383             // remove_mem_region()
384             handle_request(&mut slave).unwrap();
385 
386             sbar.wait();
387         });
388 
389         master.set_owner().unwrap();
390 
391         // set virtio features
392         let features = master.get_features().unwrap();
393         assert_eq!(features, VIRTIO_FEATURES);
394         master.set_features(VIRTIO_FEATURES & !0x1).unwrap();
395 
396         // set vhost protocol features
397         let features = master.get_protocol_features().unwrap();
398         assert_eq!(features.bits(), VhostUserProtocolFeatures::all().bits());
399         master.set_protocol_features(features).unwrap();
400 
401         // Retrieve inflight I/O tracking information
402         let (inflight_info, inflight_file) = master
403             .get_inflight_fd(&VhostUserInflight {
404                 num_queues: 2,
405                 queue_size: 256,
406                 ..Default::default()
407             })
408             .unwrap();
409         // Set the buffer back to the backend
410         master
411             .set_inflight_fd(&inflight_info, inflight_file.as_raw_descriptor())
412             .unwrap();
413 
414         let num = master.get_queue_num().unwrap();
415         assert_eq!(num, 2);
416 
417         let event = base::Event::new().unwrap();
418         let mem = [VhostUserMemoryRegionInfo {
419             guest_phys_addr: 0,
420             memory_size: 0x10_0000,
421             userspace_addr: 0,
422             mmap_offset: 0,
423             mmap_handle: event.as_raw_descriptor(),
424         }];
425         master.set_mem_table(&mem).unwrap();
426 
427         master
428             .set_config(0x100, VhostUserConfigFlags::WRITABLE, &[0xa5u8])
429             .unwrap();
430         let buf = [0x0u8; 4];
431         let (reply_body, reply_payload) = master
432             .get_config(0x100, 4, VhostUserConfigFlags::empty(), &buf)
433             .unwrap();
434         let offset = reply_body.offset;
435         assert_eq!(offset, 0x100);
436         assert_eq!(reply_payload[0], 0xa5);
437 
438         #[cfg(windows)]
439         let tubes = base::Tube::pair().unwrap();
440         #[cfg(windows)]
441         // Safe because we will be importing the Tube in the other thread.
442         let descriptor =
443             unsafe { tube_transporter::packed_tube::pack(tubes.0, std::process::id()).unwrap() };
444 
445         #[cfg(unix)]
446         let descriptor = base::Event::new().unwrap();
447 
448         master.set_slave_request_fd(&descriptor).unwrap();
449         master.set_vring_enable(0, true).unwrap();
450 
451         // unimplemented yet
452         master
453             .set_log_base(0, Some(event.as_raw_descriptor()))
454             .unwrap();
455         master.set_log_fd(event.as_raw_descriptor()).unwrap();
456 
457         master.set_vring_num(0, 256).unwrap();
458         master.set_vring_base(0, 0).unwrap();
459         let config = VringConfigData {
460             queue_max_size: 256,
461             queue_size: 128,
462             flags: VhostUserVringAddrFlags::VHOST_VRING_F_LOG.bits(),
463             desc_table_addr: 0x1000,
464             used_ring_addr: 0x2000,
465             avail_ring_addr: 0x3000,
466             log_addr: Some(0x4000),
467         };
468         master.set_vring_addr(0, &config).unwrap();
469         master.set_vring_call(0, &event).unwrap();
470         master.set_vring_kick(0, &event).unwrap();
471         master.set_vring_err(0, &event).unwrap();
472 
473         let max_mem_slots = master.get_max_mem_slots().unwrap();
474         assert_eq!(max_mem_slots, 32);
475 
476         let region_file = tempfile().unwrap();
477         let region = VhostUserMemoryRegionInfo {
478             guest_phys_addr: 0x10_0000,
479             memory_size: 0x10_0000,
480             userspace_addr: 0,
481             mmap_offset: 0,
482             mmap_handle: region_file.as_raw_descriptor(),
483         };
484         master.add_mem_region(&region).unwrap();
485 
486         master.remove_mem_region(&region).unwrap();
487 
488         mbar.wait();
489     }
490 
491     #[test]
test_error_display()492     fn test_error_display() {
493         assert_eq!(format!("{}", Error::InvalidParam), "invalid parameters");
494         assert_eq!(format!("{}", Error::InvalidOperation), "invalid operation");
495     }
496 
497     #[test]
test_error_from_base_error()498     fn test_error_from_base_error() {
499         let e: Error = base::Error::new(libc::EAGAIN).into();
500         if let Error::SocketRetry(e1) = e {
501             assert_eq!(e1.raw_os_error().unwrap(), libc::EAGAIN);
502         } else {
503             panic!("invalid error code conversion!");
504         }
505     }
506 }
507