• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
2 
3 use std::{
4     collections::{HashMap, HashSet},
5     io::{self, Result as IoResult},
6     sync::{Arc, Mutex, RwLock},
7     u16, u32, u64, u8,
8 };
9 
10 use log::warn;
11 use thiserror::Error as ThisError;
12 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
13 use vhost_user_backend::{VhostUserBackend, VringRwLock};
14 use virtio_bindings::bindings::{
15     virtio_config::VIRTIO_F_NOTIFY_ON_EMPTY, virtio_config::VIRTIO_F_VERSION_1,
16     virtio_ring::VIRTIO_RING_F_EVENT_IDX,
17 };
18 use vm_memory::{ByteValued, GuestMemoryAtomic, GuestMemoryMmap, Le64};
19 use vmm_sys_util::{
20     epoll::EventSet,
21     eventfd::{EventFd, EFD_NONBLOCK},
22 };
23 
24 use crate::thread_backend::RawPktsQ;
25 use crate::vhu_vsock_thread::*;
26 
27 pub(crate) type CidMap =
28     HashMap<u64, (Arc<RwLock<RawPktsQ>>, Arc<RwLock<HashSet<String>>>, EventFd)>;
29 
30 const NUM_QUEUES: usize = 3;
31 const QUEUE_SIZE: usize = 256;
32 
33 // New descriptors pending on the rx queue
34 const RX_QUEUE_EVENT: u16 = 0;
35 // New descriptors are pending on the tx queue.
36 const TX_QUEUE_EVENT: u16 = 1;
37 // New descriptors are pending on the event queue.
38 const EVT_QUEUE_EVENT: u16 = 2;
39 
40 /// Notification coming from the backend.
41 /// Event range [0...num_queues] is reserved for queues and exit event.
42 /// So NUM_QUEUES + 1 is used.
43 pub(crate) const BACKEND_EVENT: u16 = (NUM_QUEUES + 1) as u16;
44 
45 /// Notification coming from the sibling VM.
46 pub(crate) const SIBLING_VM_EVENT: u16 = BACKEND_EVENT + 1;
47 
48 /// CID of the host
49 pub(crate) const VSOCK_HOST_CID: u64 = 2;
50 
51 /// Connection oriented packet
52 pub(crate) const VSOCK_TYPE_STREAM: u16 = 1;
53 
54 /// Vsock packet operation ID
55 
56 /// Connection request
57 pub(crate) const VSOCK_OP_REQUEST: u16 = 1;
58 /// Connection response
59 pub(crate) const VSOCK_OP_RESPONSE: u16 = 2;
60 /// Connection reset
61 pub(crate) const VSOCK_OP_RST: u16 = 3;
62 /// Shutdown connection
63 pub(crate) const VSOCK_OP_SHUTDOWN: u16 = 4;
64 /// Data read/write
65 pub(crate) const VSOCK_OP_RW: u16 = 5;
66 /// Flow control credit update
67 pub(crate) const VSOCK_OP_CREDIT_UPDATE: u16 = 6;
68 /// Flow control credit request
69 pub(crate) const VSOCK_OP_CREDIT_REQUEST: u16 = 7;
70 
71 /// Vsock packet flags
72 
73 /// VSOCK_OP_SHUTDOWN: Packet sender will receive no more data
74 pub(crate) const VSOCK_FLAGS_SHUTDOWN_RCV: u32 = 1;
75 /// VSOCK_OP_SHUTDOWN: Packet sender will send no more data
76 pub(crate) const VSOCK_FLAGS_SHUTDOWN_SEND: u32 = 2;
77 
78 // Queue mask to select vrings.
79 const QUEUE_MASK: u64 = 0b11;
80 
81 pub(crate) type Result<T> = std::result::Result<T, Error>;
82 
83 /// Custom error types
84 #[derive(Debug, ThisError)]
85 pub(crate) enum Error {
86     #[error("Failed to handle event other than EPOLLIN event")]
87     HandleEventNotEpollIn,
88     #[error("Failed to handle unknown event")]
89     HandleUnknownEvent,
90     #[error("Failed to accept new local socket connection")]
91     UnixAccept(std::io::Error),
92     #[error("Failed to bind a unix stream")]
93     UnixBind(std::io::Error),
94     #[error("Failed to create an epoll fd")]
95     EpollFdCreate(std::io::Error),
96     #[error("Failed to add to epoll")]
97     EpollAdd(std::io::Error),
98     #[error("Failed to modify evset associated with epoll")]
99     EpollModify(std::io::Error),
100     #[error("Failed to read from unix stream")]
101     UnixRead(std::io::Error),
102     #[error("Failed to convert byte array to string")]
103     ConvertFromUtf8(std::str::Utf8Error),
104     #[error("Invalid vsock connection request from host")]
105     InvalidPortRequest,
106     #[error("Unable to convert string to integer")]
107     ParseInteger(std::num::ParseIntError),
108     #[error("Error reading stream port")]
109     ReadStreamPort(Box<Error>),
110     #[error("Failed to de-register fd from epoll")]
111     EpollRemove(std::io::Error),
112     #[error("No memory configured")]
113     NoMemoryConfigured,
114     #[error("Unable to iterate queue")]
115     IterateQueue,
116     #[error("No rx request available")]
117     NoRequestRx,
118     #[error("Packet missing data buffer")]
119     PktBufMissing,
120     #[error("Failed to connect to unix socket")]
121     UnixConnect(std::io::Error),
122     #[error("Unable to write to unix stream")]
123     UnixWrite,
124     #[error("Unable to push data to local tx buffer")]
125     LocalTxBufFull,
126     #[error("Unable to flush data from local tx buffer")]
127     LocalTxBufFlush(std::io::Error),
128     #[error("No free local port available for new host inititated connection")]
129     NoFreeLocalPort,
130     #[error("Backend rx queue is empty")]
131     EmptyBackendRxQ,
132     #[error("Failed to create an EventFd")]
133     EventFdCreate(std::io::Error),
134     #[error("Raw vsock packets queue is empty")]
135     EmptyRawPktsQueue,
136     #[error("CID already in use by another vsock device")]
137     CidAlreadyInUse,
138 }
139 
140 impl std::convert::From<Error> for std::io::Error {
from(e: Error) -> Self141     fn from(e: Error) -> Self {
142         std::io::Error::new(io::ErrorKind::Other, e)
143     }
144 }
145 
146 #[derive(Debug, Clone)]
147 /// This structure is the public API through which an external program
148 /// is allowed to configure the backend.
149 pub(crate) struct VsockConfig {
150     guest_cid: u64,
151     socket: String,
152     uds_path: String,
153     tx_buffer_size: u32,
154     groups: Vec<String>,
155 }
156 
157 impl VsockConfig {
158     /// Create a new instance of the VsockConfig struct, containing the
159     /// parameters to be fed into the vsock-backend server.
new( guest_cid: u64, socket: String, uds_path: String, tx_buffer_size: u32, groups: Vec<String>, ) -> Self160     pub fn new(
161         guest_cid: u64,
162         socket: String,
163         uds_path: String,
164         tx_buffer_size: u32,
165         groups: Vec<String>,
166     ) -> Self {
167         Self {
168             guest_cid,
169             socket,
170             uds_path,
171             tx_buffer_size,
172             groups,
173         }
174     }
175 
176     /// Return the guest's current CID.
get_guest_cid(&self) -> u64177     pub fn get_guest_cid(&self) -> u64 {
178         self.guest_cid
179     }
180 
181     /// Return the path of the unix domain socket which is listening to
182     /// requests from the host side application.
get_uds_path(&self) -> String183     pub fn get_uds_path(&self) -> String {
184         String::from(&self.uds_path)
185     }
186 
187     /// Return the path of the unix domain socket which is listening to
188     /// requests from the guest.
get_socket_path(&self) -> String189     pub fn get_socket_path(&self) -> String {
190         String::from(&self.socket)
191     }
192 
get_tx_buffer_size(&self) -> u32193     pub fn get_tx_buffer_size(&self) -> u32 {
194         self.tx_buffer_size
195     }
196 
get_groups(&self) -> Vec<String>197     pub fn get_groups(&self) -> Vec<String> {
198         self.groups.clone()
199     }
200 }
201 
202 /// A local port and peer port pair used to retrieve
203 /// the corresponding connection.
204 #[derive(Hash, PartialEq, Eq, Debug, Clone)]
205 pub(crate) struct ConnMapKey {
206     local_port: u32,
207     peer_port: u32,
208 }
209 
210 impl ConnMapKey {
new(local_port: u32, peer_port: u32) -> Self211     pub fn new(local_port: u32, peer_port: u32) -> Self {
212         Self {
213             local_port,
214             peer_port,
215         }
216     }
217 }
218 
219 /// Virtio Vsock Configuration
220 #[derive(Copy, Clone, Debug, Default, PartialEq)]
221 #[repr(C)]
222 struct VirtioVsockConfig {
223     pub guest_cid: Le64,
224 }
225 
226 // SAFETY: The layout of the structure is fixed and can be initialized by
227 // reading its content from byte array.
228 unsafe impl ByteValued for VirtioVsockConfig {}
229 
230 pub(crate) struct VhostUserVsockBackend {
231     config: VirtioVsockConfig,
232     pub threads: Vec<Mutex<VhostUserVsockThread>>,
233     queues_per_thread: Vec<u64>,
234     pub exit_event: EventFd,
235 }
236 
237 impl VhostUserVsockBackend {
new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self>238     pub fn new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self> {
239         let thread = Mutex::new(VhostUserVsockThread::new(
240             config.get_uds_path(),
241             config.get_guest_cid(),
242             config.get_tx_buffer_size(),
243             config.get_groups(),
244             cid_map,
245         )?);
246         let queues_per_thread = vec![QUEUE_MASK];
247 
248         Ok(Self {
249             config: VirtioVsockConfig {
250                 guest_cid: From::from(config.get_guest_cid()),
251             },
252             threads: vec![thread],
253             queues_per_thread,
254             exit_event: EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?,
255         })
256     }
257 }
258 
259 impl VhostUserBackend for VhostUserVsockBackend {
260     type Vring = VringRwLock;
261     type Bitmap = ();
num_queues(&self) -> usize262     fn num_queues(&self) -> usize {
263         NUM_QUEUES
264     }
265 
max_queue_size(&self) -> usize266     fn max_queue_size(&self) -> usize {
267         QUEUE_SIZE
268     }
269 
features(&self) -> u64270     fn features(&self) -> u64 {
271         1 << VIRTIO_F_VERSION_1
272             | 1 << VIRTIO_F_NOTIFY_ON_EMPTY
273             | 1 << VIRTIO_RING_F_EVENT_IDX
274             | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
275     }
276 
protocol_features(&self) -> VhostUserProtocolFeatures277     fn protocol_features(&self) -> VhostUserProtocolFeatures {
278         VhostUserProtocolFeatures::CONFIG
279     }
280 
set_event_idx(&self, enabled: bool)281     fn set_event_idx(&self, enabled: bool) {
282         for thread in self.threads.iter() {
283             thread.lock().unwrap().event_idx = enabled;
284         }
285     }
286 
update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()>287     fn update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> {
288         for thread in self.threads.iter() {
289             thread.lock().unwrap().mem = Some(atomic_mem.clone());
290         }
291         Ok(())
292     }
293 
handle_event( &self, device_event: u16, evset: EventSet, vrings: &[VringRwLock], thread_id: usize, ) -> IoResult<()>294     fn handle_event(
295         &self,
296         device_event: u16,
297         evset: EventSet,
298         vrings: &[VringRwLock],
299         thread_id: usize,
300     ) -> IoResult<()> {
301         let vring_rx = &vrings[0];
302         let vring_tx = &vrings[1];
303 
304         if evset != EventSet::IN {
305             return Err(Error::HandleEventNotEpollIn.into());
306         }
307 
308         let mut thread = self.threads[thread_id].lock().unwrap();
309         let evt_idx = thread.event_idx;
310 
311         match device_event {
312             RX_QUEUE_EVENT => {}
313             TX_QUEUE_EVENT => {
314                 thread.process_tx(vring_tx, evt_idx)?;
315             }
316             EVT_QUEUE_EVENT => {
317                 warn!("Received an unexpected EVT_QUEUE_EVENT");
318             }
319             BACKEND_EVENT => {
320                 thread.process_backend_evt(evset);
321                 if let Err(e) = thread.process_tx(vring_tx, evt_idx) {
322                     match e {
323                         Error::NoMemoryConfigured => {
324                             warn!("Received a backend event before vring initialization")
325                         }
326                         _ => return Err(e.into()),
327                     }
328                 }
329             }
330             SIBLING_VM_EVENT => {
331                 let _ = thread.sibling_event_fd.read();
332                 thread.process_raw_pkts(vring_rx, evt_idx)?;
333                 return Ok(());
334             }
335             _ => {
336                 return Err(Error::HandleUnknownEvent.into());
337             }
338         }
339 
340         if device_event != EVT_QUEUE_EVENT {
341             thread.process_rx(vring_rx, evt_idx)?;
342         }
343 
344         Ok(())
345     }
346 
get_config(&self, offset: u32, size: u32) -> Vec<u8>347     fn get_config(&self, offset: u32, size: u32) -> Vec<u8> {
348         let offset = offset as usize;
349         let size = size as usize;
350 
351         let buf = self.config.as_slice();
352 
353         if offset + size > buf.len() {
354             return Vec::new();
355         }
356 
357         buf[offset..offset + size].to_vec()
358     }
359 
queues_per_thread(&self) -> Vec<u64>360     fn queues_per_thread(&self) -> Vec<u64> {
361         self.queues_per_thread.clone()
362     }
363 
exit_event(&self, _thread_index: usize) -> Option<EventFd>364     fn exit_event(&self, _thread_index: usize) -> Option<EventFd> {
365         self.exit_event.try_clone().ok()
366     }
367 }
368 
369 #[cfg(test)]
370 mod tests {
371     use super::*;
372     use std::convert::TryInto;
373     use tempfile::tempdir;
374     use vhost_user_backend::VringT;
375     use vm_memory::GuestAddress;
376 
377     const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
378 
379     #[test]
test_vsock_backend()380     fn test_vsock_backend() {
381         const CID: u64 = 3;
382 
383         let groups_list: Vec<String> = vec![String::from("default")];
384 
385         let test_dir = tempdir().expect("Could not create a temp test directory.");
386 
387         let vhost_socket_path = test_dir
388             .path()
389             .join("test_vsock_backend.socket")
390             .display()
391             .to_string();
392         let vsock_socket_path = test_dir
393             .path()
394             .join("test_vsock_backend.vsock")
395             .display()
396             .to_string();
397 
398         let config = VsockConfig::new(
399             CID,
400             vhost_socket_path.to_string(),
401             vsock_socket_path.to_string(),
402             CONN_TX_BUF_SIZE,
403             groups_list,
404         );
405 
406         let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
407 
408         let backend = VhostUserVsockBackend::new(config, cid_map);
409 
410         assert!(backend.is_ok());
411         let backend = backend.unwrap();
412 
413         assert_eq!(backend.num_queues(), NUM_QUEUES);
414         assert_eq!(backend.max_queue_size(), QUEUE_SIZE);
415         assert_ne!(backend.features(), 0);
416         assert!(!backend.protocol_features().is_empty());
417         backend.set_event_idx(false);
418 
419         let mem = GuestMemoryAtomic::new(
420             GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(),
421         );
422         let vrings = [
423             VringRwLock::new(mem.clone(), 0x1000).unwrap(),
424             VringRwLock::new(mem.clone(), 0x2000).unwrap(),
425         ];
426         vrings[0].set_queue_info(0x100, 0x200, 0x300).unwrap();
427         vrings[0].set_queue_ready(true);
428         vrings[1].set_queue_info(0x1100, 0x1200, 0x1300).unwrap();
429         vrings[1].set_queue_ready(true);
430 
431         assert!(backend.update_memory(mem).is_ok());
432 
433         let queues_per_thread = backend.queues_per_thread();
434         assert_eq!(queues_per_thread.len(), 1);
435         assert_eq!(queues_per_thread[0], 0b11);
436 
437         let config = backend.get_config(0, 8);
438         assert_eq!(config.len(), 8);
439         let cid = u64::from_le_bytes(config.try_into().unwrap());
440         assert_eq!(cid, CID);
441 
442         let exit = backend.exit_event(0);
443         assert!(exit.is_some());
444         exit.unwrap().write(1).unwrap();
445 
446         let ret = backend.handle_event(RX_QUEUE_EVENT, EventSet::IN, &vrings, 0);
447         assert!(ret.is_ok());
448         assert!(!ret.unwrap());
449 
450         let ret = backend.handle_event(TX_QUEUE_EVENT, EventSet::IN, &vrings, 0);
451         assert!(ret.is_ok());
452         assert!(!ret.unwrap());
453 
454         let ret = backend.handle_event(EVT_QUEUE_EVENT, EventSet::IN, &vrings, 0);
455         assert!(ret.is_ok());
456         assert!(!ret.unwrap());
457 
458         let ret = backend.handle_event(BACKEND_EVENT, EventSet::IN, &vrings, 0);
459         assert!(ret.is_ok());
460         assert!(!ret.unwrap());
461 
462         // cleanup
463         let _ = std::fs::remove_file(vhost_socket_path);
464         let _ = std::fs::remove_file(vsock_socket_path);
465 
466         test_dir.close().unwrap();
467     }
468 
469     #[test]
test_vsock_backend_failures()470     fn test_vsock_backend_failures() {
471         const CID: u64 = 3;
472 
473         let groups: Vec<String> = vec![String::from("default")];
474 
475         let test_dir = tempdir().expect("Could not create a temp test directory.");
476 
477         let vhost_socket_path = test_dir
478             .path()
479             .join("test_vsock_backend_failures.socket")
480             .display()
481             .to_string();
482         let vsock_socket_path = test_dir
483             .path()
484             .join("test_vsock_backend_failures.vsock")
485             .display()
486             .to_string();
487 
488         let config = VsockConfig::new(
489             CID,
490             "/sys/not_allowed.socket".to_string(),
491             "/sys/not_allowed.vsock".to_string(),
492             CONN_TX_BUF_SIZE,
493             groups.clone(),
494         );
495 
496         let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
497 
498         let backend = VhostUserVsockBackend::new(config, cid_map.clone());
499         assert!(backend.is_err());
500 
501         let config = VsockConfig::new(
502             CID,
503             vhost_socket_path.to_string(),
504             vsock_socket_path.to_string(),
505             CONN_TX_BUF_SIZE,
506             groups,
507         );
508 
509         let backend = VhostUserVsockBackend::new(config, cid_map).unwrap();
510         let mem = GuestMemoryAtomic::new(
511             GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(),
512         );
513         let vrings = [
514             VringRwLock::new(mem.clone(), 0x1000).unwrap(),
515             VringRwLock::new(mem.clone(), 0x2000).unwrap(),
516         ];
517 
518         backend.update_memory(mem).unwrap();
519 
520         // reading out of the config space, expecting empty config
521         let config = backend.get_config(2, 8);
522         assert_eq!(config.len(), 0);
523 
524         assert_eq!(
525             backend
526                 .handle_event(RX_QUEUE_EVENT, EventSet::OUT, &vrings, 0)
527                 .unwrap_err()
528                 .to_string(),
529             Error::HandleEventNotEpollIn.to_string()
530         );
531         assert_eq!(
532             backend
533                 .handle_event(SIBLING_VM_EVENT + 1, EventSet::IN, &vrings, 0)
534                 .unwrap_err()
535                 .to_string(),
536             Error::HandleUnknownEvent.to_string()
537         );
538 
539         // cleanup
540         let _ = std::fs::remove_file(vhost_socket_path);
541         let _ = std::fs::remove_file(vsock_socket_path);
542 
543         test_dir.close().unwrap();
544     }
545 }
546