1 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause 2 3 use std::{ 4 collections::{HashMap, HashSet}, 5 io::{self, Result as IoResult}, 6 sync::{Arc, Mutex, RwLock}, 7 u16, u32, u64, u8, 8 }; 9 10 use log::warn; 11 use thiserror::Error as ThisError; 12 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 13 use vhost_user_backend::{VhostUserBackend, VringRwLock}; 14 use virtio_bindings::bindings::{ 15 virtio_config::VIRTIO_F_NOTIFY_ON_EMPTY, virtio_config::VIRTIO_F_VERSION_1, 16 virtio_ring::VIRTIO_RING_F_EVENT_IDX, 17 }; 18 use vm_memory::{ByteValued, GuestMemoryAtomic, GuestMemoryMmap, Le64}; 19 use vmm_sys_util::{ 20 epoll::EventSet, 21 eventfd::{EventFd, EFD_NONBLOCK}, 22 }; 23 24 use crate::thread_backend::RawPktsQ; 25 use crate::vhu_vsock_thread::*; 26 27 pub(crate) type CidMap = 28 HashMap<u64, (Arc<RwLock<RawPktsQ>>, Arc<RwLock<HashSet<String>>>, EventFd)>; 29 30 const NUM_QUEUES: usize = 3; 31 const QUEUE_SIZE: usize = 256; 32 33 // New descriptors pending on the rx queue 34 const RX_QUEUE_EVENT: u16 = 0; 35 // New descriptors are pending on the tx queue. 36 const TX_QUEUE_EVENT: u16 = 1; 37 // New descriptors are pending on the event queue. 38 const EVT_QUEUE_EVENT: u16 = 2; 39 40 /// Notification coming from the backend. 41 /// Event range [0...num_queues] is reserved for queues and exit event. 42 /// So NUM_QUEUES + 1 is used. 43 pub(crate) const BACKEND_EVENT: u16 = (NUM_QUEUES + 1) as u16; 44 45 /// Notification coming from the sibling VM. 46 pub(crate) const SIBLING_VM_EVENT: u16 = BACKEND_EVENT + 1; 47 48 /// CID of the host 49 pub(crate) const VSOCK_HOST_CID: u64 = 2; 50 51 /// Connection oriented packet 52 pub(crate) const VSOCK_TYPE_STREAM: u16 = 1; 53 54 /// Vsock packet operation ID 55 56 /// Connection request 57 pub(crate) const VSOCK_OP_REQUEST: u16 = 1; 58 /// Connection response 59 pub(crate) const VSOCK_OP_RESPONSE: u16 = 2; 60 /// Connection reset 61 pub(crate) const VSOCK_OP_RST: u16 = 3; 62 /// Shutdown connection 63 pub(crate) const VSOCK_OP_SHUTDOWN: u16 = 4; 64 /// Data read/write 65 pub(crate) const VSOCK_OP_RW: u16 = 5; 66 /// Flow control credit update 67 pub(crate) const VSOCK_OP_CREDIT_UPDATE: u16 = 6; 68 /// Flow control credit request 69 pub(crate) const VSOCK_OP_CREDIT_REQUEST: u16 = 7; 70 71 /// Vsock packet flags 72 73 /// VSOCK_OP_SHUTDOWN: Packet sender will receive no more data 74 pub(crate) const VSOCK_FLAGS_SHUTDOWN_RCV: u32 = 1; 75 /// VSOCK_OP_SHUTDOWN: Packet sender will send no more data 76 pub(crate) const VSOCK_FLAGS_SHUTDOWN_SEND: u32 = 2; 77 78 // Queue mask to select vrings. 79 const QUEUE_MASK: u64 = 0b11; 80 81 pub(crate) type Result<T> = std::result::Result<T, Error>; 82 83 /// Custom error types 84 #[derive(Debug, ThisError)] 85 pub(crate) enum Error { 86 #[error("Failed to handle event other than EPOLLIN event")] 87 HandleEventNotEpollIn, 88 #[error("Failed to handle unknown event")] 89 HandleUnknownEvent, 90 #[error("Failed to accept new local socket connection")] 91 UnixAccept(std::io::Error), 92 #[error("Failed to bind a unix stream")] 93 UnixBind(std::io::Error), 94 #[error("Failed to create an epoll fd")] 95 EpollFdCreate(std::io::Error), 96 #[error("Failed to add to epoll")] 97 EpollAdd(std::io::Error), 98 #[error("Failed to modify evset associated with epoll")] 99 EpollModify(std::io::Error), 100 #[error("Failed to read from unix stream")] 101 UnixRead(std::io::Error), 102 #[error("Failed to convert byte array to string")] 103 ConvertFromUtf8(std::str::Utf8Error), 104 #[error("Invalid vsock connection request from host")] 105 InvalidPortRequest, 106 #[error("Unable to convert string to integer")] 107 ParseInteger(std::num::ParseIntError), 108 #[error("Error reading stream port")] 109 ReadStreamPort(Box<Error>), 110 #[error("Failed to de-register fd from epoll")] 111 EpollRemove(std::io::Error), 112 #[error("No memory configured")] 113 NoMemoryConfigured, 114 #[error("Unable to iterate queue")] 115 IterateQueue, 116 #[error("No rx request available")] 117 NoRequestRx, 118 #[error("Packet missing data buffer")] 119 PktBufMissing, 120 #[error("Failed to connect to unix socket")] 121 UnixConnect(std::io::Error), 122 #[error("Unable to write to unix stream")] 123 UnixWrite, 124 #[error("Unable to push data to local tx buffer")] 125 LocalTxBufFull, 126 #[error("Unable to flush data from local tx buffer")] 127 LocalTxBufFlush(std::io::Error), 128 #[error("No free local port available for new host inititated connection")] 129 NoFreeLocalPort, 130 #[error("Backend rx queue is empty")] 131 EmptyBackendRxQ, 132 #[error("Failed to create an EventFd")] 133 EventFdCreate(std::io::Error), 134 #[error("Raw vsock packets queue is empty")] 135 EmptyRawPktsQueue, 136 #[error("CID already in use by another vsock device")] 137 CidAlreadyInUse, 138 } 139 140 impl std::convert::From<Error> for std::io::Error { from(e: Error) -> Self141 fn from(e: Error) -> Self { 142 std::io::Error::new(io::ErrorKind::Other, e) 143 } 144 } 145 146 #[derive(Debug, Clone)] 147 /// This structure is the public API through which an external program 148 /// is allowed to configure the backend. 149 pub(crate) struct VsockConfig { 150 guest_cid: u64, 151 socket: String, 152 uds_path: String, 153 tx_buffer_size: u32, 154 groups: Vec<String>, 155 } 156 157 impl VsockConfig { 158 /// Create a new instance of the VsockConfig struct, containing the 159 /// parameters to be fed into the vsock-backend server. new( guest_cid: u64, socket: String, uds_path: String, tx_buffer_size: u32, groups: Vec<String>, ) -> Self160 pub fn new( 161 guest_cid: u64, 162 socket: String, 163 uds_path: String, 164 tx_buffer_size: u32, 165 groups: Vec<String>, 166 ) -> Self { 167 Self { 168 guest_cid, 169 socket, 170 uds_path, 171 tx_buffer_size, 172 groups, 173 } 174 } 175 176 /// Return the guest's current CID. get_guest_cid(&self) -> u64177 pub fn get_guest_cid(&self) -> u64 { 178 self.guest_cid 179 } 180 181 /// Return the path of the unix domain socket which is listening to 182 /// requests from the host side application. get_uds_path(&self) -> String183 pub fn get_uds_path(&self) -> String { 184 String::from(&self.uds_path) 185 } 186 187 /// Return the path of the unix domain socket which is listening to 188 /// requests from the guest. get_socket_path(&self) -> String189 pub fn get_socket_path(&self) -> String { 190 String::from(&self.socket) 191 } 192 get_tx_buffer_size(&self) -> u32193 pub fn get_tx_buffer_size(&self) -> u32 { 194 self.tx_buffer_size 195 } 196 get_groups(&self) -> Vec<String>197 pub fn get_groups(&self) -> Vec<String> { 198 self.groups.clone() 199 } 200 } 201 202 /// A local port and peer port pair used to retrieve 203 /// the corresponding connection. 204 #[derive(Hash, PartialEq, Eq, Debug, Clone)] 205 pub(crate) struct ConnMapKey { 206 local_port: u32, 207 peer_port: u32, 208 } 209 210 impl ConnMapKey { new(local_port: u32, peer_port: u32) -> Self211 pub fn new(local_port: u32, peer_port: u32) -> Self { 212 Self { 213 local_port, 214 peer_port, 215 } 216 } 217 } 218 219 /// Virtio Vsock Configuration 220 #[derive(Copy, Clone, Debug, Default, PartialEq)] 221 #[repr(C)] 222 struct VirtioVsockConfig { 223 pub guest_cid: Le64, 224 } 225 226 // SAFETY: The layout of the structure is fixed and can be initialized by 227 // reading its content from byte array. 228 unsafe impl ByteValued for VirtioVsockConfig {} 229 230 pub(crate) struct VhostUserVsockBackend { 231 config: VirtioVsockConfig, 232 pub threads: Vec<Mutex<VhostUserVsockThread>>, 233 queues_per_thread: Vec<u64>, 234 pub exit_event: EventFd, 235 } 236 237 impl VhostUserVsockBackend { new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self>238 pub fn new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self> { 239 let thread = Mutex::new(VhostUserVsockThread::new( 240 config.get_uds_path(), 241 config.get_guest_cid(), 242 config.get_tx_buffer_size(), 243 config.get_groups(), 244 cid_map, 245 )?); 246 let queues_per_thread = vec![QUEUE_MASK]; 247 248 Ok(Self { 249 config: VirtioVsockConfig { 250 guest_cid: From::from(config.get_guest_cid()), 251 }, 252 threads: vec![thread], 253 queues_per_thread, 254 exit_event: EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?, 255 }) 256 } 257 } 258 259 impl VhostUserBackend for VhostUserVsockBackend { 260 type Vring = VringRwLock; 261 type Bitmap = (); num_queues(&self) -> usize262 fn num_queues(&self) -> usize { 263 NUM_QUEUES 264 } 265 max_queue_size(&self) -> usize266 fn max_queue_size(&self) -> usize { 267 QUEUE_SIZE 268 } 269 features(&self) -> u64270 fn features(&self) -> u64 { 271 1 << VIRTIO_F_VERSION_1 272 | 1 << VIRTIO_F_NOTIFY_ON_EMPTY 273 | 1 << VIRTIO_RING_F_EVENT_IDX 274 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() 275 } 276 protocol_features(&self) -> VhostUserProtocolFeatures277 fn protocol_features(&self) -> VhostUserProtocolFeatures { 278 VhostUserProtocolFeatures::CONFIG 279 } 280 set_event_idx(&self, enabled: bool)281 fn set_event_idx(&self, enabled: bool) { 282 for thread in self.threads.iter() { 283 thread.lock().unwrap().event_idx = enabled; 284 } 285 } 286 update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()>287 fn update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> { 288 for thread in self.threads.iter() { 289 thread.lock().unwrap().mem = Some(atomic_mem.clone()); 290 } 291 Ok(()) 292 } 293 handle_event( &self, device_event: u16, evset: EventSet, vrings: &[VringRwLock], thread_id: usize, ) -> IoResult<()>294 fn handle_event( 295 &self, 296 device_event: u16, 297 evset: EventSet, 298 vrings: &[VringRwLock], 299 thread_id: usize, 300 ) -> IoResult<()> { 301 let vring_rx = &vrings[0]; 302 let vring_tx = &vrings[1]; 303 304 if evset != EventSet::IN { 305 return Err(Error::HandleEventNotEpollIn.into()); 306 } 307 308 let mut thread = self.threads[thread_id].lock().unwrap(); 309 let evt_idx = thread.event_idx; 310 311 match device_event { 312 RX_QUEUE_EVENT => {} 313 TX_QUEUE_EVENT => { 314 thread.process_tx(vring_tx, evt_idx)?; 315 } 316 EVT_QUEUE_EVENT => { 317 warn!("Received an unexpected EVT_QUEUE_EVENT"); 318 } 319 BACKEND_EVENT => { 320 thread.process_backend_evt(evset); 321 if let Err(e) = thread.process_tx(vring_tx, evt_idx) { 322 match e { 323 Error::NoMemoryConfigured => { 324 warn!("Received a backend event before vring initialization") 325 } 326 _ => return Err(e.into()), 327 } 328 } 329 } 330 SIBLING_VM_EVENT => { 331 let _ = thread.sibling_event_fd.read(); 332 thread.process_raw_pkts(vring_rx, evt_idx)?; 333 return Ok(()); 334 } 335 _ => { 336 return Err(Error::HandleUnknownEvent.into()); 337 } 338 } 339 340 if device_event != EVT_QUEUE_EVENT { 341 thread.process_rx(vring_rx, evt_idx)?; 342 } 343 344 Ok(()) 345 } 346 get_config(&self, offset: u32, size: u32) -> Vec<u8>347 fn get_config(&self, offset: u32, size: u32) -> Vec<u8> { 348 let offset = offset as usize; 349 let size = size as usize; 350 351 let buf = self.config.as_slice(); 352 353 if offset + size > buf.len() { 354 return Vec::new(); 355 } 356 357 buf[offset..offset + size].to_vec() 358 } 359 queues_per_thread(&self) -> Vec<u64>360 fn queues_per_thread(&self) -> Vec<u64> { 361 self.queues_per_thread.clone() 362 } 363 exit_event(&self, _thread_index: usize) -> Option<EventFd>364 fn exit_event(&self, _thread_index: usize) -> Option<EventFd> { 365 self.exit_event.try_clone().ok() 366 } 367 } 368 369 #[cfg(test)] 370 mod tests { 371 use super::*; 372 use std::convert::TryInto; 373 use tempfile::tempdir; 374 use vhost_user_backend::VringT; 375 use vm_memory::GuestAddress; 376 377 const CONN_TX_BUF_SIZE: u32 = 64 * 1024; 378 379 #[test] test_vsock_backend()380 fn test_vsock_backend() { 381 const CID: u64 = 3; 382 383 let groups_list: Vec<String> = vec![String::from("default")]; 384 385 let test_dir = tempdir().expect("Could not create a temp test directory."); 386 387 let vhost_socket_path = test_dir 388 .path() 389 .join("test_vsock_backend.socket") 390 .display() 391 .to_string(); 392 let vsock_socket_path = test_dir 393 .path() 394 .join("test_vsock_backend.vsock") 395 .display() 396 .to_string(); 397 398 let config = VsockConfig::new( 399 CID, 400 vhost_socket_path.to_string(), 401 vsock_socket_path.to_string(), 402 CONN_TX_BUF_SIZE, 403 groups_list, 404 ); 405 406 let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new())); 407 408 let backend = VhostUserVsockBackend::new(config, cid_map); 409 410 assert!(backend.is_ok()); 411 let backend = backend.unwrap(); 412 413 assert_eq!(backend.num_queues(), NUM_QUEUES); 414 assert_eq!(backend.max_queue_size(), QUEUE_SIZE); 415 assert_ne!(backend.features(), 0); 416 assert!(!backend.protocol_features().is_empty()); 417 backend.set_event_idx(false); 418 419 let mem = GuestMemoryAtomic::new( 420 GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(), 421 ); 422 let vrings = [ 423 VringRwLock::new(mem.clone(), 0x1000).unwrap(), 424 VringRwLock::new(mem.clone(), 0x2000).unwrap(), 425 ]; 426 vrings[0].set_queue_info(0x100, 0x200, 0x300).unwrap(); 427 vrings[0].set_queue_ready(true); 428 vrings[1].set_queue_info(0x1100, 0x1200, 0x1300).unwrap(); 429 vrings[1].set_queue_ready(true); 430 431 assert!(backend.update_memory(mem).is_ok()); 432 433 let queues_per_thread = backend.queues_per_thread(); 434 assert_eq!(queues_per_thread.len(), 1); 435 assert_eq!(queues_per_thread[0], 0b11); 436 437 let config = backend.get_config(0, 8); 438 assert_eq!(config.len(), 8); 439 let cid = u64::from_le_bytes(config.try_into().unwrap()); 440 assert_eq!(cid, CID); 441 442 let exit = backend.exit_event(0); 443 assert!(exit.is_some()); 444 exit.unwrap().write(1).unwrap(); 445 446 let ret = backend.handle_event(RX_QUEUE_EVENT, EventSet::IN, &vrings, 0); 447 assert!(ret.is_ok()); 448 assert!(!ret.unwrap()); 449 450 let ret = backend.handle_event(TX_QUEUE_EVENT, EventSet::IN, &vrings, 0); 451 assert!(ret.is_ok()); 452 assert!(!ret.unwrap()); 453 454 let ret = backend.handle_event(EVT_QUEUE_EVENT, EventSet::IN, &vrings, 0); 455 assert!(ret.is_ok()); 456 assert!(!ret.unwrap()); 457 458 let ret = backend.handle_event(BACKEND_EVENT, EventSet::IN, &vrings, 0); 459 assert!(ret.is_ok()); 460 assert!(!ret.unwrap()); 461 462 // cleanup 463 let _ = std::fs::remove_file(vhost_socket_path); 464 let _ = std::fs::remove_file(vsock_socket_path); 465 466 test_dir.close().unwrap(); 467 } 468 469 #[test] test_vsock_backend_failures()470 fn test_vsock_backend_failures() { 471 const CID: u64 = 3; 472 473 let groups: Vec<String> = vec![String::from("default")]; 474 475 let test_dir = tempdir().expect("Could not create a temp test directory."); 476 477 let vhost_socket_path = test_dir 478 .path() 479 .join("test_vsock_backend_failures.socket") 480 .display() 481 .to_string(); 482 let vsock_socket_path = test_dir 483 .path() 484 .join("test_vsock_backend_failures.vsock") 485 .display() 486 .to_string(); 487 488 let config = VsockConfig::new( 489 CID, 490 "/sys/not_allowed.socket".to_string(), 491 "/sys/not_allowed.vsock".to_string(), 492 CONN_TX_BUF_SIZE, 493 groups.clone(), 494 ); 495 496 let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new())); 497 498 let backend = VhostUserVsockBackend::new(config, cid_map.clone()); 499 assert!(backend.is_err()); 500 501 let config = VsockConfig::new( 502 CID, 503 vhost_socket_path.to_string(), 504 vsock_socket_path.to_string(), 505 CONN_TX_BUF_SIZE, 506 groups, 507 ); 508 509 let backend = VhostUserVsockBackend::new(config, cid_map).unwrap(); 510 let mem = GuestMemoryAtomic::new( 511 GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(), 512 ); 513 let vrings = [ 514 VringRwLock::new(mem.clone(), 0x1000).unwrap(), 515 VringRwLock::new(mem.clone(), 0x2000).unwrap(), 516 ]; 517 518 backend.update_memory(mem).unwrap(); 519 520 // reading out of the config space, expecting empty config 521 let config = backend.get_config(2, 8); 522 assert_eq!(config.len(), 0); 523 524 assert_eq!( 525 backend 526 .handle_event(RX_QUEUE_EVENT, EventSet::OUT, &vrings, 0) 527 .unwrap_err() 528 .to_string(), 529 Error::HandleEventNotEpollIn.to_string() 530 ); 531 assert_eq!( 532 backend 533 .handle_event(SIBLING_VM_EVENT + 1, EventSet::IN, &vrings, 0) 534 .unwrap_err() 535 .to_string(), 536 Error::HandleUnknownEvent.to_string() 537 ); 538 539 // cleanup 540 let _ = std::fs::remove_file(vhost_socket_path); 541 let _ = std::fs::remove_file(vsock_socket_path); 542 543 test_dir.close().unwrap(); 544 } 545 } 546